Merge branch 'master' of ssh://master.kernel.org/pub/scm/linux/kernel/git/rusty/linux...

author David S. Miller <davem@davemloft.net>

Fri, 25 Sep 2009 18:09:08 +0000 (11:09 -0700)

committer David S. Miller <davem@davemloft.net>

Fri, 25 Sep 2009 18:09:08 +0000 (11:09 -0700)
author David S. Miller <davem@davemloft.net>
Fri, 25 Sep 2009 18:09:08 +0000 (11:09 -0700)
committer David S. Miller <davem@davemloft.net>
Fri, 25 Sep 2009 18:09:08 +0000 (11:09 -0700)
diff --git a/Documentation/auxdisplay/cfag12864b-example.c b/Documentation/auxdisplay/cfag12864b-example.c

index 1d2c010bae120faacda9f7a40324a3bc3b57a308..e7823ffb1ca0f4f06d8ebbcec85b14d4db9fc10c 100644 (file)
--- a/Documentation/auxdisplay/cfag12864b-example.c
+++ b/Documentation/auxdisplay/cfag12864b-example.c
@@ -194,7 +194,6 @@ static void cfag12864b_blit(void)
   */
  
  #include <stdio.h>
-#include <string.h>
  
  #define EXAMPLES       6
  
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt

index 6eb1a97e88ce887c9628843aa664d55aca59071d..455d4e6d346d839eb0bd8b811efed40afd3642fa 100644 (file)
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -408,6 +408,26 @@ You can attach the current shell task by echoing 0:
  
  # echo 0 > tasks
  
+2.3 Mounting hierarchies by name
+--------------------------------
+
+Passing the name=<x> option when mounting a cgroups hierarchy
+associates the given name with the hierarchy.  This can be used when
+mounting a pre-existing hierarchy, in order to refer to it by name
+rather than by its set of active subsystems.  Each hierarchy is either
+nameless, or has a unique name.
+
+The name should match [\w.-]+
+
+When passing a name=<x> option for a new hierarchy, you need to
+specify subsystems manually; the legacy behaviour of mounting all
+subsystems when none are explicitly specified is not supported when
+you give a subsystem a name.
+
+The name of the subsystem appears as part of the hierarchy description
+in /proc/mounts and /proc/<pid>/cgroups.
+
+
  3. Kernel API
  =============
  
@@ -501,7 +521,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
  called multiple times against a cgroup.
  
  int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-              struct task_struct *task)
+              struct task_struct *task, bool threadgroup)
  (cgroup_mutex held by caller)
  
  Called prior to moving a task into a cgroup; if the subsystem
@@ -509,14 +529,20 @@ returns an error, this will abort the attach operation.  If a NULL
  task is passed, then a successful result indicates that *any*
  unspecified task can be moved into the cgroup. Note that this isn't
  called on a fork. If this method returns 0 (success) then this should
-remain valid while the caller holds cgroup_mutex.
+remain valid while the caller holds cgroup_mutex. If threadgroup is
+true, then a successful result indicates that all threads in the given
+thread's threadgroup can be moved together.
  
  void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-           struct cgroup *old_cgrp, struct task_struct *task)
+           struct cgroup *old_cgrp, struct task_struct *task,
+           bool threadgroup)
  (cgroup_mutex held by caller)
  
  Called after the task has been attached to the cgroup, to allow any
  post-attachment activity that requires memory allocations or blocking.
+If threadgroup is true, the subsystem should take care of all threads
+in the specified thread's threadgroup. Currently does not support any
+subsystem that might need the old_cgrp for every thread in the group.
  
  void fork(struct cgroup_subsy *ss, struct task_struct *task)
  
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt

index 23d1262c0775755efa8363d049d008e875776f89..b871f2552b45760e87de534ef119ab5e6dcdd102 100644 (file)
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -179,6 +179,9 @@ The reclaim algorithm has not been modified for cgroups, except that
  pages that are selected for reclaiming come from the per cgroup LRU
  list.
  
+NOTE: Reclaim does not work for the root cgroup, since we cannot set any
+limits on the root cgroup.
+
  2. Locking
  
  The memory controller uses the following hierarchy
@@ -210,6 +213,7 @@ We can alter the memory limit:
  NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
  mega or gigabytes.
  NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
+NOTE: We cannot set limits on the root cgroup any more.
  
  # cat /cgroups/0/memory.limit_in_bytes
  4194304
@@ -375,7 +379,42 @@ cgroups created below it.
  
  NOTE2: This feature can be enabled/disabled per subtree.
  
-7. TODO
+7. Soft limits
+
+Soft limits allow for greater sharing of memory. The idea behind soft limits
+is to allow control groups to use as much of the memory as needed, provided
+
+a. There is no memory contention
+b. They do not exceed their hard limit
+
+When the system detects memory contention or low memory control groups
+are pushed back to their soft limits. If the soft limit of each control
+group is very high, they are pushed back as much as possible to make
+sure that one control group does not starve the others of memory.
+
+Please note that soft limits is a best effort feature, it comes with
+no guarantees, but it does its best to make sure that when memory is
+heavily contended for, memory is allocated based on the soft limit
+hints/setup. Currently soft limit based reclaim is setup such that
+it gets invoked from balance_pgdat (kswapd).
+
+7.1 Interface
+
+Soft limits can be setup by using the following commands (in this example we
+assume a soft limit of 256 megabytes)
+
+# echo 256M > memory.soft_limit_in_bytes
+
+If we want to change this to 1G, we can at any time use
+
+# echo 1G > memory.soft_limit_in_bytes
+
+NOTE1: Soft limits take effect over a long period of time, since they involve
+       reclaiming memory for balancing between memory cgroups
+NOTE2: It is recommended to set the soft limit always below the hard limit,
+       otherwise the hard limit will take precedence.
+
+8. TODO
  
  1. Add support for accounting huge pages (as a separate controller)
  2. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt

index 9f59fcbf5d82b9ce3236dd044f9e1d75b4600e79..ba046b8fa92fb4a34a360fe54e6b997c019feb43 100644 (file)
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -54,20 +54,23 @@ features surfaced as a result:
  
  3.1 General format of the API:
  struct dma_async_tx_descriptor *
-async_<operation>(<op specific parameters>,
-                 enum async_tx_flags flags,
-                 struct dma_async_tx_descriptor *dependency,
-                 dma_async_tx_callback callback_routine,
-                 void *callback_parameter);
+async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
  
  3.2 Supported operations:
-memcpy       - memory copy between a source and a destination buffer
-memset       - fill a destination buffer with a byte value
-xor          - xor a series of source buffers and write the result to a
-              destination buffer
-xor_zero_sum - xor a series of source buffers and set a flag if the
-              result is zero.  The implementation attempts to prevent
-              writes to memory
+memcpy  - memory copy between a source and a destination buffer
+memset  - fill a destination buffer with a byte value
+xor     - xor a series of source buffers and write the result to a
+         destination buffer
+xor_val - xor a series of source buffers and set a flag if the
+         result is zero.  The implementation attempts to prevent
+         writes to memory
+pq     - generate the p+q (raid6 syndrome) from a series of source buffers
+pq_val  - validate that a p and or q buffer are in sync with a given series of
+         sources
+datap  - (raid6_datap_recov) recover a raid6 data block and the p block
+         from the given sources
+2data  - (raid6_2data_recov) recover 2 raid6 data blocks from the given
+         sources
  
  3.3 Descriptor management:
  The return value is non-NULL and points to a 'descriptor' when the operation
@@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to
  recycle (or free) the descriptor.  A descriptor can be acked by one of the
  following methods:
  1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
-2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent
-   descriptor of a new operation.
+2/ submitting an unacknowledged descriptor as a dependency to another
+   async_tx call will implicitly set the acknowledged state.
  3/ calling async_tx_ack() on the descriptor.
  
  3.4 When does the operation execute?
@@ -119,30 +122,42 @@ of an operation.
  Perform a xor->copy->xor operation where each operation depends on the
  result from the previous operation:
  
-void complete_xor_copy_xor(void *param)
+void callback(void *param)
  {
-       printk("complete\n");
+       struct completion *cmp = param;
+
+       complete(cmp);
  }
  
-int run_xor_copy_xor(struct page **xor_srcs,
-                    int xor_src_cnt,
-                    struct page *xor_dest,
-                    size_t xor_len,
-                    struct page *copy_src,
-                    struct page *copy_dest,
-                    size_t copy_len)
+void run_xor_copy_xor(struct page **xor_srcs,
+                     int xor_src_cnt,
+                     struct page *xor_dest,
+                     size_t xor_len,
+                     struct page *copy_src,
+                     struct page *copy_dest,
+                     size_t copy_len)
  {
         struct dma_async_tx_descriptor *tx;
+       addr_conv_t addr_conv[xor_src_cnt];
+       struct async_submit_ctl submit;
+       addr_conv_t addr_conv[NDISKS];
+       struct completion cmp;
+
+       init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
+                         addr_conv);
+       tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
  
-       tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
-                      ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL);
-       tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len,
-                         ASYNC_TX_DEP_ACK, tx, NULL, NULL);
-       tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
-                      ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK,
-                      tx, complete_xor_copy_xor, NULL);
+       submit->depend_tx = tx;
+       tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
+
+       init_completion(&cmp);
+       init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
+                         callback, &cmp, addr_conv);
+       tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
  
         async_tx_issue_pending_all();
+
+       wait_for_completion(&cmp);
  }
  
  See include/linux/async_tx.h for more information on the flags.  See the
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt

index fa75220f8d34e602d166cecfffe0a7036fc2cb64..89a47b5aff07f11f04d504d2e91aab9aa4bfadfe 100644 (file)
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -354,14 +354,6 @@ Who:  Krzysztof Piotr Oledzki <ole@ans.pl>
  
  ---------------------------
  
-What:  fscher and fscpos drivers
-When:  June 2009
-Why:   Deprecated by the new fschmd driver.
-Who:   Hans de Goede <hdegoede@redhat.com>
-       Jean Delvare <khali@linux-fr.org>
-
----------------------------
-
  What:  sysfs ui for changing p4-clockmod parameters
  When:  September 2009
  Why:   See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and
diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt

index 6208f55c44c352b3bcef39514fdd147684ac9064..57e0b80a52747c8ef9af5ddad113802e1c972c37 100644 (file)
--- a/Documentation/filesystems/9p.txt
+++ b/Documentation/filesystems/9p.txt
@@ -18,11 +18,11 @@ the 9p client is available in the form of a USENIX paper:
  
  Other applications are described in the following papers:
         * XCPU & Clustering
-               http://www.xcpu.org/xcpu-talk.pdf
+               http://xcpu.org/papers/xcpu-talk.pdf
         * KVMFS: control file system for KVM
-               http://www.xcpu.org/kvmfs.pdf
-       * CellFS: A New ProgrammingModel for the Cell BE
-               http://www.xcpu.org/cellfs-talk.pdf
+               http://xcpu.org/papers/kvmfs.pdf
+       * CellFS: A New Programming Model for the Cell BE
+               http://xcpu.org/papers/cellfs-talk.pdf
         * PROSE I/O: Using 9p to enable Application Partitions
                 http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf
  
@@ -48,6 +48,7 @@ OPTIONS
                                  (see rfdno and wfdno)
                         virtio  - connect to the next virtio channel available
                                 (from lguest or KVM with trans_virtio module)
+                       rdma    - connect to a specified RDMA channel
  
    uname=name   user name to attempt mount as on the remote server.  The
                 server may override or ignore this value.  Certain user
@@ -59,16 +60,22 @@ OPTIONS
    cache=mode   specifies a caching policy.  By default, no caches are used.
                         loose = no attempts are made at consistency,
                                  intended for exclusive, read-only mounts
+                       fscache = use FS-Cache for a persistent, read-only
+                               cache backend.
  
    debug=n      specifies debug level.  The debug level is a bitmask.
-                       0x01 = display verbose error messages
-                       0x02 = developer debug (DEBUG_CURRENT)
-                       0x04 = display 9p trace
-                       0x08 = display VFS trace
-                       0x10 = display Marshalling debug
-                       0x20 = display RPC debug
-                       0x40 = display transport debug
-                       0x80 = display allocation debug
+                       0x01  = display verbose error messages
+                       0x02  = developer debug (DEBUG_CURRENT)
+                       0x04  = display 9p trace
+                       0x08  = display VFS trace
+                       0x10  = display Marshalling debug
+                       0x20  = display RPC debug
+                       0x40  = display transport debug
+                       0x80  = display allocation debug
+                       0x100 = display protocol message debug
+                       0x200 = display Fid debug
+                       0x400 = display packet debug
+                       0x800 = display fscache tracing debug
  
    rfdno=n      the file descriptor for reading with trans=fd
  
@@ -100,6 +107,10 @@ OPTIONS
                         any   = v9fs does single attach and performs all
                                 operations as one user
  
+  cachetag     cache tag to use the specified persistent cache.
+               cache tags for existing cache sessions can be listed at
+               /sys/fs/9p/caches. (applies only to cache=fscache)
+
  RESOURCES
  =========
  
@@ -118,7 +129,7 @@ and export.
  A Linux version of the 9p server is now maintained under the npfs project
  on sourceforge (http://sourceforge.net/projects/npfs).  The currently
  maintained version is the single-threaded version of the server (named spfs)
-available from the same CVS repository.
+available from the same SVN repository.
  
  There are user and developer mailing lists available through the v9fs project
  on sourceforge (http://sourceforge.net/projects/v9fs).
@@ -126,7 +137,8 @@ on sourceforge (http://sourceforge.net/projects/v9fs).
  A stand-alone version of the module (which should build for any 2.6 kernel)
  is available via (http://github.com/ericvh/9p-sac/tree/master)
  
-News and other information is maintained on SWiK (http://swik.net/v9fs).
+News and other information is maintained on SWiK (http://swik.net/v9fs)
+and the Wiki (http://sf.net/apps/mediawiki/v9fs/index.php).
  
  Bug reports may be issued through the kernel.org bugzilla 
  (http://bugzilla.kernel.org)
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt

index 736540045dc7b7c9e448fb90bd72fc63346e082d..23a181074f94b5f65ac25c603d65fc749fa5ed78 100644 (file)
--- a/Documentation/filesystems/sharedsubtree.txt
+++ b/Documentation/filesystems/sharedsubtree.txt
@@ -4,7 +4,7 @@ Shared Subtrees
  Contents:
         1) Overview
         2) Features
-       3) smount command
+       3) Setting mount states
         4) Use-case
         5) Detailed semantics
         6) Quiz
@@ -41,14 +41,14 @@ replicas continue to be exactly same.
  
         Here is an example:
  
-       Lets say /mnt has a mount that is shared.
+       Let's say /mnt has a mount that is shared.
         mount --make-shared /mnt
  
-       note: mount command does not yet support the --make-shared flag.
-       I have included a small C program which does the same by executing
-       'smount /mnt shared'
+       Note: mount(8) command now supports the --make-shared flag,
+       so the sample 'smount' program is no longer needed and has been
+       removed.
  
-       #mount --bind /mnt /tmp
+       # mount --bind /mnt /tmp
         The above command replicates the mount at /mnt to the mountpoint /tmp
         and the contents of both the mounts remain identical.
  
@@ -58,8 +58,8 @@ replicas continue to be exactly same.
         #ls /tmp
         a b c
  
-       Now lets say we mount a device at /tmp/a
-       #mount /dev/sd0  /tmp/a
+       Now let's say we mount a device at /tmp/a
+       # mount /dev/sd0  /tmp/a
  
         #ls /tmp/a
         t1 t2 t2
@@ -80,21 +80,20 @@ replicas continue to be exactly same.
  
         Here is an example:
  
-       Lets say /mnt has a mount which is shared.
-       #mount --make-shared /mnt
+       Let's say /mnt has a mount which is shared.
+       # mount --make-shared /mnt
  
-       Lets bind mount /mnt to /tmp
-       #mount --bind /mnt /tmp
+       Let's bind mount /mnt to /tmp
+       # mount --bind /mnt /tmp
  
         the new mount at /tmp becomes a shared mount and it is a replica of
         the mount at /mnt.
  
-       Now lets make the mount at /tmp; a slave of /mnt
-       #mount --make-slave /tmp
-       [or smount /tmp slave]
+       Now let's make the mount at /tmp; a slave of /mnt
+       # mount --make-slave /tmp
  
-       lets mount /dev/sd0 on /mnt/a
-       #mount /dev/sd0 /mnt/a
+       let's mount /dev/sd0 on /mnt/a
+       # mount /dev/sd0 /mnt/a
  
         #ls /mnt/a
         t1 t2 t3
@@ -104,9 +103,9 @@ replicas continue to be exactly same.
  
         Note the mount event has propagated to the mount at /tmp
  
-       However lets see what happens if we mount something on the mount at /tmp
+       However let's see what happens if we mount something on the mount at /tmp
  
-       #mount /dev/sd1 /tmp/b
+       # mount /dev/sd1 /tmp/b
  
         #ls /tmp/b
         s1 s2 s3
@@ -124,12 +123,11 @@ replicas continue to be exactly same.
  
  2d) A unbindable mount is a unbindable private mount
  
-       lets say we have a mount at /mnt and we make is unbindable
+       let's say we have a mount at /mnt and we make is unbindable
  
-       #mount --make-unbindable /mnt
-        [ smount /mnt  unbindable ]
+       # mount --make-unbindable /mnt
  
-        Lets try to bind mount this mount somewhere else.
+        Let's try to bind mount this mount somewhere else.
          # mount --bind /mnt /tmp
          mount: wrong fs type, bad option, bad superblock on /mnt,
                 or too many mounted file systems
@@ -137,149 +135,15 @@ replicas continue to be exactly same.
         Binding a unbindable mount is a invalid operation.
  
  
-3) smount command
+3) Setting mount states
  
-       Currently the mount command is not aware of shared subtree features.
-       Work is in progress to add the support in mount ( util-linux package ).
-       Till then use the following program.
+       The mount command (util-linux package) can be used to set mount
+       states:
  
-       ------------------------------------------------------------------------
-       //
-       //this code was developed my Miklos Szeredi <miklos@szeredi.hu>
-       //and modified by Ram Pai <linuxram@us.ibm.com>
-       // sample usage:
-       //              smount /tmp shared
-       //
-       #include <stdio.h>
-       #include <stdlib.h>
-       #include <unistd.h>
-       #include <string.h>
-       #include <sys/mount.h>
-       #include <sys/fsuid.h>
-
-       #ifndef MS_REC
-       #define MS_REC          0x4000  /* 16384: Recursive loopback */
-       #endif
-
-       #ifndef MS_SHARED
-       #define MS_SHARED               1<<20   /* Shared */
-       #endif
-
-       #ifndef MS_PRIVATE
-       #define MS_PRIVATE              1<<18   /* Private */
-       #endif
-
-       #ifndef MS_SLAVE
-       #define MS_SLAVE                1<<19   /* Slave */
-       #endif
-
-       #ifndef MS_UNBINDABLE
-       #define MS_UNBINDABLE           1<<17   /* Unbindable */
-       #endif
-
-       int main(int argc, char *argv[])
-       {
-               int type;
-               if(argc != 3) {
-                       fprintf(stderr, "usage: %s dir "
-                       "<rshared|rslave|rprivate|runbindable|shared|slave"
-                       "|private|unbindable>\n" , argv[0]);
-                       return 1;
-               }
-
-               fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]);
-
-               if (strcmp(argv[2],"rshared")==0)
-                       type=(MS_SHARED|MS_REC);
-               else if (strcmp(argv[2],"rslave")==0)
-                       type=(MS_SLAVE|MS_REC);
-               else if (strcmp(argv[2],"rprivate")==0)
-                       type=(MS_PRIVATE|MS_REC);
-               else if (strcmp(argv[2],"runbindable")==0)
-                       type=(MS_UNBINDABLE|MS_REC);
-               else if (strcmp(argv[2],"shared")==0)
-                       type=MS_SHARED;
-               else if (strcmp(argv[2],"slave")==0)
-                       type=MS_SLAVE;
-               else if (strcmp(argv[2],"private")==0)
-                       type=MS_PRIVATE;
-               else if (strcmp(argv[2],"unbindable")==0)
-                       type=MS_UNBINDABLE;
-               else {
-                       fprintf(stderr, "invalid operation: %s\n", argv[2]);
-                       return 1;
-               }
-               setfsuid(getuid());
-
-               if(mount("", argv[1], "dontcare", type, "") == -1) {
-                       perror("mount");
-                       return 1;
-               }
-               return 0;
-       }
-       -----------------------------------------------------------------------
-
-       Copy the above code snippet into smount.c
-       gcc -o smount smount.c
-
-
-       (i) To mark all the mounts under /mnt as shared execute the following
-       command:
-
-               smount /mnt rshared
-               the corresponding syntax planned for mount command is
-               mount --make-rshared /mnt
-
-           just to mark a mount /mnt as shared, execute the following
-           command:
-               smount /mnt shared
-               the corresponding syntax planned for mount command is
-               mount --make-shared /mnt
-
-       (ii) To mark all the shared mounts under /mnt as slave execute the
-       following
-
-            command:
-               smount /mnt rslave
-               the corresponding syntax planned for mount command is
-               mount --make-rslave /mnt
-
-           just to mark a mount /mnt as slave, execute the following
-           command:
-               smount /mnt slave
-               the corresponding syntax planned for mount command is
-               mount --make-slave /mnt
-
-       (iii) To mark all the mounts under /mnt as private execute the
-       following command:
-
-               smount /mnt rprivate
-               the corresponding syntax planned for mount command is
-               mount --make-rprivate /mnt
-
-           just to mark a mount /mnt as private, execute the following
-           command:
-               smount /mnt private
-               the corresponding syntax planned for mount command is
-               mount --make-private /mnt
-
-             NOTE: by default all the mounts are created as private. But if
-             you want to change some shared/slave/unbindable  mount as
-             private at a later point in time, this command can help.
-
-       (iv) To mark all the mounts under /mnt as unbindable execute the
-       following
-
-            command:
-               smount /mnt runbindable
-               the corresponding syntax planned for mount command is
-               mount --make-runbindable /mnt
-
-           just to mark a mount /mnt as unbindable, execute the following
-           command:
-               smount /mnt unbindable
-               the corresponding syntax planned for mount command is
-               mount --make-unbindable /mnt
+       mount --make-shared mountpoint
+       mount --make-slave mountpoint
+       mount --make-private mountpoint
+       mount --make-unbindable mountpoint
  
  
  4) Use cases
@@ -350,7 +214,7 @@ replicas continue to be exactly same.
                 mount --rbind / /view/v3
                 mount --rbind / /view/v4
  
-               and if /usr has a versioning filesystem mounted, than that
+               and if /usr has a versioning filesystem mounted, then that
                 mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
                 /view/v4/usr too
  
@@ -390,7 +254,7 @@ replicas continue to be exactly same.
  
                 For example:
                         mount --make-shared /mnt
-                       mount --bin /mnt /tmp
+                       mount --bind /mnt /tmp
  
                 The mount at /mnt and that at /tmp are both shared and belong
                 to the same peer group. Anything mounted or unmounted under
@@ -558,7 +422,7 @@ replicas continue to be exactly same.
         then the subtree under the unbindable mount is pruned in the new
         location.
  
-       eg: lets say we have the following mount tree.
+       eg: let's say we have the following mount tree.
  
                 A
               /   \
@@ -566,7 +430,7 @@ replicas continue to be exactly same.
              / \ / \
              D E F G
  
-            Lets say all the mount except the mount C in the tree are
+            Let's say all the mount except the mount C in the tree are
              of a type other than unbindable.
  
              If this tree is rbound to say Z
@@ -683,13 +547,13 @@ replicas continue to be exactly same.
         'b' on mounts that receive propagation from mount 'B' and does not have
         sub-mounts within them are unmounted.
  
-       Example: Lets say 'B1', 'B2', 'B3' are shared mounts that propagate to
+       Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to
         each other.
  
-       lets say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
+       let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
         'B1', 'B2' and 'B3' respectively.
  
-       lets say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
+       let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
         mount 'B1', 'B2' and 'B3' respectively.
  
         if 'C1' is unmounted, all the mounts that are most-recently-mounted on
@@ -710,7 +574,7 @@ replicas continue to be exactly same.
         A cloned namespace contains all the mounts as that of the parent
         namespace.
  
-       Lets say 'A' and 'B' are the corresponding mounts in the parent and the
+       Let's say 'A' and 'B' are the corresponding mounts in the parent and the
         child namespace.
  
         If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to
@@ -759,11 +623,11 @@ replicas continue to be exactly same.
                 mount --make-slave /mnt
  
                 At this point we have the first mount at /tmp and
-               its root dentry is 1. Lets call this mount 'A'
+               its root dentry is 1. Let's call this mount 'A'
                 And then we have a second mount at /tmp1 with root
-               dentry 2. Lets call this mount 'B'
+               dentry 2. Let's call this mount 'B'
                 Next we have a third mount at /mnt with root dentry
-               mnt. Lets call this mount 'C'
+               mnt. Let's call this mount 'C'
  
                 'B' is the slave of 'A' and 'C' is a slave of 'B'
                 A -> B -> C
@@ -794,7 +658,7 @@ replicas continue to be exactly same.
  
         Q3 Why is unbindable mount needed?
  
-               Lets say we want to replicate the mount tree at multiple
+               Let's say we want to replicate the mount tree at multiple
                 locations within the same subtree.
  
                 if one rbind mounts a tree within the same subtree 'n' times
@@ -803,7 +667,7 @@ replicas continue to be exactly same.
                 mounts. Here is a example.
  
                 step 1:
-                  lets say the root tree has just two directories with
+                  let's say the root tree has just two directories with
                    one vfsmount.
                                     root
                                    /    \
@@ -875,7 +739,7 @@ replicas continue to be exactly same.
                 Unclonable mounts come in handy here.
  
                 step 1:
-                  lets say the root tree has just two directories with
+                  let's say the root tree has just two directories with
                    one vfsmount.
                                     root
                                    /    \
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt

index f49eecf2e57354a7dd94ad73a911ba2f69135c19..623f094c9d8d95a535d3274f60d6a30dc8d71b5f 100644 (file)
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -536,6 +536,7 @@ struct address_space_operations {
         /* migrate the contents of a page to the specified target */
         int (*migratepage) (struct page *, struct page *);
         int (*launder_page) (struct page *);
+       int (*error_remove_page) (struct mapping *mapping, struct page *page);
  };
  
    writepage: called by the VM to write a dirty page to backing store.
@@ -694,6 +695,12 @@ struct address_space_operations {
         prevent redirtying the page, it is kept locked during the whole
         operation.
  
+  error_remove_page: normally set to generic_error_remove_page if truncation
+       is ok for this address space. Used for memory failure handling.
+       Setting this implies you deal with pages going away under you,
+       unless you have them locked or reference counts increased.
+
+
  The File Object
  ===============
  
diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp

index dbbe6c7025b055a1bda77b2db3d1e796d2237e12..92267b62db5937f05ff7da2b6a0a760d3e4321a4 100644 (file)
--- a/Documentation/hwmon/coretemp
+++ b/Documentation/hwmon/coretemp
@@ -4,7 +4,9 @@ Kernel driver coretemp
  Supported chips:
    * All Intel Core family
      Prefix: 'coretemp'
-    CPUID: family 0x6, models 0xe, 0xf, 0x16, 0x17
+    CPUID: family 0x6, models 0xe (Pentium M DC), 0xf (Core 2 DC 65nm),
+                              0x16 (Core 2 SC 65nm), 0x17 (Penryn 45nm),
+                              0x1a (Nehalem), 0x1c (Atom), 0x1e (Lynnfield)
      Datasheet: Intel 64 and IA-32 Architectures Software Developer's Manual
                 Volume 3A: System Programming Guide
                 http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
diff --git a/Documentation/hwmon/fscher b/Documentation/hwmon/fscher

deleted file mode 100644 (file)

index 6403165..0000000
--- a/Documentation/hwmon/fscher
+++ /dev/null
@@ -1,169 +0,0 @@
-Kernel driver fscher
-====================
-
-Supported chips:
-  * Fujitsu-Siemens Hermes chip
-    Prefix: 'fscher'
-    Addresses scanned: I2C 0x73
-
-Authors:
-        Reinhard Nissl <rnissl@gmx.de> based on work
-        from Hermann Jung <hej@odn.de>,
-        Frodo Looijaard <frodol@dds.nl>,
-        Philip Edelbrock <phil@netroedge.com>
-
-Description
------------
-
-This driver implements support for the Fujitsu-Siemens Hermes chip. It is
-described in the 'Register Set Specification BMC Hermes based Systemboard'
-from Fujitsu-Siemens.
-
-The Hermes chip implements a hardware-based system management, e.g. for
-controlling fan speed and core voltage. There is also a watchdog counter on
-the chip which can trigger an alarm and even shut the system down.
-
-The chip provides three temperature values (CPU, motherboard and
-auxiliary), three voltage values (+12V, +5V and battery) and three fans
-(power supply, CPU and auxiliary).
-
-Temperatures are measured in degrees Celsius. The resolution is 1 degree.
-
-Fan rotation speeds are reported in RPM (rotations per minute). The value
-can be divided by a programmable divider (1, 2 or 4) which is stored on
-the chip.
-
-Voltage sensors (also known as "in" sensors) report their values in volts.
-
-All values are reported as final values from the driver. There is no need
-for further calculations.
-
-
-Detailed description
---------------------
-
-Below you'll find a single line description of all the bit values. With
-this information, you're able to decode e. g. alarms, wdog, etc. To make
-use of the watchdog, you'll need to set the watchdog time and enable the
-watchdog. After that it is necessary to restart the watchdog time within
-the specified period of time, or a system reset will occur.
-
-* revision
-  READING & 0xff = 0x??: HERMES revision identification
-
-* alarms
-  READING & 0x80 = 0x80: CPU throttling active
-  READING & 0x80 = 0x00: CPU running at full speed
-
-  READING & 0x10 = 0x10: software event (see control:1)
-  READING & 0x10 = 0x00: no software event
-
-  READING & 0x08 = 0x08: watchdog event (see wdog:2)
-  READING & 0x08 = 0x00: no watchdog event
-
-  READING & 0x02 = 0x02: thermal event (see temp*:1)
-  READING & 0x02 = 0x00: no thermal event
-
-  READING & 0x01 = 0x01: fan event (see fan*:1)
-  READING & 0x01 = 0x00: no fan event
-
-  READING & 0x13 ! 0x00: ALERT LED is flashing
-
-* control
-  READING & 0x01 = 0x01: software event
-  READING & 0x01 = 0x00: no software event
-
-  WRITING & 0x01 = 0x01: set software event
-  WRITING & 0x01 = 0x00: clear software event
-
-* watchdog_control
-  READING & 0x80 = 0x80: power off on watchdog event while thermal event
-  READING & 0x80 = 0x00: watchdog power off disabled (just system reset enabled)
-
-  READING & 0x40 = 0x40: watchdog timebase 60 seconds (see also wdog:1)
-  READING & 0x40 = 0x00: watchdog timebase  2 seconds
-
-  READING & 0x10 = 0x10: watchdog enabled
-  READING & 0x10 = 0x00: watchdog disabled
-
-  WRITING & 0x80 = 0x80: enable "power off on watchdog event while thermal event"
-  WRITING & 0x80 = 0x00: disable "power off on watchdog event while thermal event"
-
-  WRITING & 0x40 = 0x40: set watchdog timebase to 60 seconds
-  WRITING & 0x40 = 0x00: set watchdog timebase to  2 seconds
-
-  WRITING & 0x20 = 0x20: disable watchdog
-
-  WRITING & 0x10 = 0x10: enable watchdog / restart watchdog time
-
-* watchdog_state
-  READING & 0x02 = 0x02: watchdog system reset occurred
-  READING & 0x02 = 0x00: no watchdog system reset occurred
-
-  WRITING & 0x02 = 0x02: clear watchdog event
-
-* watchdog_preset
-  READING & 0xff = 0x??: configured watch dog time in units (see wdog:3 0x40)
-
-  WRITING & 0xff = 0x??: configure watch dog time in units
-
-* in*     (0: +5V, 1: +12V, 2: onboard 3V battery)
-  READING: actual voltage value
-
-* temp*_status   (1: CPU sensor, 2: onboard sensor, 3: auxiliary sensor)
-  READING & 0x02 = 0x02: thermal event (overtemperature)
-  READING & 0x02 = 0x00: no thermal event
-
-  READING & 0x01 = 0x01: sensor is working
-  READING & 0x01 = 0x00: sensor is faulty
-
-  WRITING & 0x02 = 0x02: clear thermal event
-
-* temp*_input   (1: CPU sensor, 2: onboard sensor, 3: auxiliary sensor)
-  READING: actual temperature value
-
-* fan*_status   (1: power supply fan, 2: CPU fan, 3: auxiliary fan)
-  READING & 0x04 = 0x04: fan event (fan fault)
-  READING & 0x04 = 0x00: no fan event
-
-  WRITING & 0x04 = 0x04: clear fan event
-
-* fan*_div (1: power supply fan, 2: CPU fan, 3: auxiliary fan)
-       Divisors 2,4 and 8 are supported, both for reading and writing
-
-* fan*_pwm   (1: power supply fan, 2: CPU fan, 3: auxiliary fan)
-  READING & 0xff = 0x00: fan may be switched off
-  READING & 0xff = 0x01: fan must run at least at minimum speed (supply: 6V)
-  READING & 0xff = 0xff: fan must run at maximum speed (supply: 12V)
-  READING & 0xff = 0x??: fan must run at least at given speed (supply: 6V..12V)
-
-  WRITING & 0xff = 0x00: fan may be switched off
-  WRITING & 0xff = 0x01: fan must run at least at minimum speed (supply: 6V)
-  WRITING & 0xff = 0xff: fan must run at maximum speed (supply: 12V)
-  WRITING & 0xff = 0x??: fan must run at least at given speed (supply: 6V..12V)
-
-* fan*_input   (1: power supply fan, 2: CPU fan, 3: auxiliary fan)
-  READING: actual RPM value
-
-
-Limitations
------------
-
-* Measuring fan speed
-It seems that the chip counts "ripples" (typical fans produce 2 ripples per
-rotation while VERAX fans produce 18) in a 9-bit register. This register is
-read out every second, then the ripple prescaler (2, 4 or 8) is applied and
-the result is stored in the 8 bit output register. Due to the limitation of
-the counting register to 9 bits, it is impossible to measure a VERAX fan
-properly (even with a prescaler of 8). At its maximum speed of 3500 RPM the
-fan produces 1080 ripples per second which causes the counting register to
-overflow twice, leading to only 186 RPM.
-
-* Measuring input voltages
-in2 ("battery") reports the voltage of the onboard lithium battery and not
-+3.3V from the power supply.
-
-* Undocumented features
-Fujitsu-Siemens Computers has not documented all features of the chip so
-far. Their software, System Guard, shows that there are a still some
-features which cannot be controlled by this implementation.
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt

index aafca0a8f66ab9d32b3fc5e3ba1a3c81403e3415..947374977ca5a2ef72e59cdb730d9d50c7f6b0b2 100644 (file)
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -135,6 +135,7 @@ Code        Seq#    Include File            Comments
                                         <http://mikonos.dia.unisa.it/tcfs>
  'l'    40-7F   linux/udf_fs_i.h        in development:
                                         <http://sourceforge.net/projects/linux-udf/>
+'m'    00-09   linux/mmtimer.h
  'm'    all     linux/mtio.h            conflict!
  'm'    all     linux/soundcard.h       conflict!
  'm'    all     linux/synclink.h        conflict!
diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt

index f3355b6812df1a439e5d53ad24df6375cc3ad940..bb3bf38f03dac1e055dc973ce65d49d14defbe42 100644 (file)
--- a/Documentation/kbuild/kbuild.txt
+++ b/Documentation/kbuild/kbuild.txt
@@ -65,6 +65,22 @@ INSTALL_PATH
  INSTALL_PATH specifies where to place the updated kernel and system map
  images. Default is /boot, but you can set it to other values.
  
+INSTALLKERNEL
+--------------------------------------------------
+Install script called when using "make install".
+The default name is "installkernel".
+
+The script will be called with the following arguments:
+    $1 - kernel version
+    $2 - kernel image file
+    $3 - kernel map file
+    $4 - default install path (use root directory if blank)
+
+The implmentation of "make install" is architecture specific
+and it may differ from the above.
+
+INSTALLKERNEL is provided to enable the possibility to
+specify a custom installer when cross compiling a kernel.
  
  MODLIB
  --------------------------------------------------
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt

index d76cfd8712e124905807332fba8b3feee89306f2..71c602d61680d422694a81c08b0cd4f802e18e7c 100644 (file)
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -18,6 +18,7 @@ This document describes the Linux kernel Makefiles.
            --- 3.9 Dependency tracking
            --- 3.10 Special Rules
            --- 3.11 $(CC) support functions
+          --- 3.12 $(LD) support functions
  
         === 4 Host Program support
            --- 4.1 Simple Host Program
@@ -435,14 +436,14 @@ more details, with real examples.
         The second argument is optional, and if supplied will be used
         if first argument is not supported.
  
-    ld-option
-       ld-option is used to check if $(CC) when used to link object files
+    cc-ldoption
+       cc-ldoption is used to check if $(CC) when used to link object files
         supports the given option.  An optional second option may be
         specified if first option are not supported.
  
         Example:
                 #arch/i386/kernel/Makefile
-               vsyscall-flags += $(call ld-option, -Wl$(comma)--hash-style=sysv)
+               vsyscall-flags += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
  
         In the above example, vsyscall-flags will be assigned the option
         -Wl$(comma)--hash-style=sysv if it is supported by $(CC).
@@ -570,6 +571,19 @@ more details, with real examples.
                         endif
                 endif
  
+--- 3.12 $(LD) support functions
+
+    ld-option
+       ld-option is used to check if $(LD) supports the supplied option.
+       ld-option takes two options as arguments.
+       The second argument is an optional option that can be used if the
+       first option is not supported by $(LD).
+
+       Example:
+               #Makefile
+               LDFLAGS_vmlinux += $(call really-ld-option, -X)
+
+
  === 4 Host Program support
  
  Kbuild supports building executables on the host for use during the
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt

index 1458448436cc4589bc329167ba945eb92719792b..62682500878a97e69a107a99655ca280e72df915 100644 (file)
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -96,13 +96,16 @@ handles that the Linux kernel will allocate. When you get lots
  of error messages about running out of file handles, you might
  want to increase this limit.
  
-The three values in file-nr denote the number of allocated
-file handles, the number of unused file handles and the maximum
-number of file handles. When the allocated file handles come
-close to the maximum, but the number of unused file handles is
-significantly greater than 0, you've encountered a peak in your 
-usage of file handles and you don't need to increase the maximum.
-
+Historically, the three values in file-nr denoted the number of
+allocated file handles, the number of allocated but unused file
+handles, and the maximum number of file handles. Linux 2.6 always
+reports 0 as the number of free file handles -- this is not an
+error, it just means that the number of allocated file handles
+exactly matches the number of used file handles.
+
+Attempts to allocate more file descriptors than file-max are
+reported with printk, look for "VFS: file-max limit <number>
+reached".
  ==============================================================
  
  nr_open:
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt

index b3d8b492274052c8fbb538b1fba96c16a4d64cff..a028b92001eddca50be93544c8667bd585417401 100644 (file)
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -22,6 +22,7 @@ show up in /proc/sys/kernel:
  - callhome                  [ S390 only ]
  - auto_msgmni
  - core_pattern
+- core_pipe_limit
  - core_uses_pid
  - ctrl-alt-del
  - dentry-state
@@ -135,6 +136,27 @@ core_pattern is used to specify a core dumpfile pattern name.
  
  ==============================================================
  
+core_pipe_limit:
+
+This sysctl is only applicable when core_pattern is configured to pipe core
+files to user space helper a (when the first character of core_pattern is a '|',
+see above).  When collecting cores via a pipe to an application, it is
+occasionally usefull for the collecting application to gather data about the
+crashing process from its /proc/pid directory.  In order to do this safely, the
+kernel must wait for the collecting process to exit, so as not to remove the
+crashing processes proc files prematurely.  This in turn creates the possibility
+that a misbehaving userspace collecting process can block the reaping of a
+crashed process simply by never exiting.  This sysctl defends against that.  It
+defines how many concurrent crashing processes may be piped to user space
+applications in parallel.  If this value is exceeded, then those crashing
+processes above that value are noted via the kernel log and their cores are
+skipped.  0 is a special value, indicating that unlimited processes may be
+captured in parallel, but that no waiting will take place (i.e. the collecting
+process is not guaranteed access to /proc/<crahing pid>/).  This value defaults
+to 0.
+
+==============================================================
+
  core_uses_pid:
  
  The default coredump filename is "core".  By setting
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt

index e6fb1ec2744b180d25bd41e6a672e5627dd8e971..a6e360d2055c561ae347a75e20c015b3f61a8928 100644 (file)
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/vm:
  - legacy_va_layout
  - lowmem_reserve_ratio
  - max_map_count
+- memory_failure_early_kill
+- memory_failure_recovery
  - min_free_kbytes
  - min_slab_ratio
  - min_unmapped_ratio
@@ -53,7 +55,6 @@ Currently, these files are in /proc/sys/vm:
  - vfs_cache_pressure
  - zone_reclaim_mode
  
-
  ==============================================================
  
  block_dump
@@ -275,6 +276,44 @@ e.g., up to one or two maps per allocation.
  
  The default value is 65536.
  
+=============================================================
+
+memory_failure_early_kill:
+
+Control how to kill processes when uncorrected memory error (typically
+a 2bit error in a memory module) is detected in the background by hardware
+that cannot be handled by the kernel. In some cases (like the page
+still having a valid copy on disk) the kernel will handle the failure
+transparently without affecting any applications. But if there is
+no other uptodate copy of the data it will kill to prevent any data
+corruptions from propagating.
+
+1: Kill all processes that have the corrupted and not reloadable page mapped
+as soon as the corruption is detected.  Note this is not supported
+for a few types of pages, like kernel internally allocated data or
+the swap cache, but works for the majority of user pages.
+
+0: Only unmap the corrupted page from all processes and only kill a process
+who tries to access it.
+
+The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
+handle this if they want to.
+
+This is only active on architectures/platforms with advanced machine
+check handling and depends on the hardware capabilities.
+
+Applications can override this setting individually with the PR_MCE_KILL prctl
+
+==============================================================
+
+memory_failure_recovery
+
+Enable memory failure recovery (when supported by the platform)
+
+1: Attempt recovery.
+
+0: Always panic on a memory failure.
+
  ==============================================================
  
  min_free_kbytes:
diff --git a/Documentation/vm/.gitignore b/Documentation/vm/.gitignore

index 33e8a023df02287cbfe69854f41c8d316668f082..09b164a5700ff371615d6c701fc7578c6bc2f233 100644 (file)
--- a/Documentation/vm/.gitignore
+++ b/Documentation/vm/.gitignore
@@ -1 +1,2 @@
+page-types
  slabinfo
diff --git a/Documentation/vm/locking b/Documentation/vm/locking

index f366fa956179505cc602ff79f90adc83f95f7a77..25fadb448760008dc6408ed0cca2a72c66b2efd0 100644 (file)
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
  mm start up ... this is a loose form of stability on mm_users. For
  example, it is used in copy_mm to protect against a racing tlb_gather_mmu
  single address space optimization, so that the zap_page_range (from
-vmtruncate) does not lose sending ipi's to cloned threads that might 
+truncate) does not lose sending ipi's to cloned threads that might
  be spawned underneath it and go to user mode to drag in pte's into tlbs.
  
  swap_lock
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c

index 3eda8ea00852ee7cbeb44ad39a17a74f0d73bcc4..fa1a30d9e9d540a27324affbfe8aae579661c877 100644 (file)
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -5,6 +5,7 @@
   * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com>
   */
  
+#define _LARGEFILE64_SOURCE
  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
@@ -13,11 +14,32 @@
  #include <string.h>
  #include <getopt.h>
  #include <limits.h>
+#include <assert.h>
  #include <sys/types.h>
  #include <sys/errno.h>
  #include <sys/fcntl.h>
  
  
+/*
+ * pagemap kernel ABI bits
+ */
+
+#define PM_ENTRY_BYTES      sizeof(uint64_t)
+#define PM_STATUS_BITS      3
+#define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS)
+#define PM_STATUS_MASK      (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
+#define PM_STATUS(nr)       (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
+#define PM_PSHIFT_BITS      6
+#define PM_PSHIFT_OFFSET    (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
+#define PM_PSHIFT_MASK      (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
+#define PM_PSHIFT(x)        (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
+#define PM_PFRAME_MASK      ((1LL << PM_PSHIFT_OFFSET) - 1)
+#define PM_PFRAME(x)        ((x) & PM_PFRAME_MASK)
+
+#define PM_PRESENT          PM_STATUS(4LL)
+#define PM_SWAP             PM_STATUS(2LL)
+
+
  /*
   * kernel page flags
   */
@@ -126,6 +148,14 @@ static int         nr_addr_ranges;
  static unsigned long   opt_offset[MAX_ADDR_RANGES];
  static unsigned long   opt_size[MAX_ADDR_RANGES];
  
+#define MAX_VMAS       10240
+static int             nr_vmas;
+static unsigned long   pg_start[MAX_VMAS];
+static unsigned long   pg_end[MAX_VMAS];
+static unsigned long   voffset;
+
+static int             pagemap_fd;
+
  #define MAX_BIT_FILTERS        64
  static int             nr_bit_filters;
  static uint64_t                opt_mask[MAX_BIT_FILTERS];
@@ -135,7 +165,6 @@ static int          page_size;
  
  #define PAGES_BATCH    (64 << 10)      /* 64k pages */
  static int             kpageflags_fd;
-static uint64_t                kpageflags_buf[KPF_BYTES * PAGES_BATCH];
  
  #define HASH_SHIFT     13
  #define HASH_SIZE      (1 << HASH_SHIFT)
@@ -158,6 +187,11 @@ static uint64_t    page_flags[HASH_SIZE];
         type __min2 = (y);                      \
         __min1 < __min2 ? __min1 : __min2; })
  
+#define max_t(type, x, y) ({                   \
+       type __max1 = (x);                      \
+       type __max2 = (y);                      \
+       __max1 > __max2 ? __max1 : __max2; })
+
  static unsigned long pages2mb(unsigned long pages)
  {
         return (pages * page_size) >> 20;
@@ -224,26 +258,34 @@ static char *page_flag_longname(uint64_t flags)
  static void show_page_range(unsigned long offset, uint64_t flags)
  {
         static uint64_t      flags0;
+       static unsigned long voff;
         static unsigned long index;
         static unsigned long count;
  
-       if (flags == flags0 && offset == index + count) {
+       if (flags == flags0 && offset == index + count &&
+           (!opt_pid || voffset == voff + count)) {
                 count++;
                 return;
         }
  
-       if (count)
-               printf("%lu\t%lu\t%s\n",
+       if (count) {
+               if (opt_pid)
+                       printf("%lx\t", voff);
+               printf("%lx\t%lx\t%s\n",
                                 index, count, page_flag_name(flags0));
+       }
  
         flags0 = flags;
         index  = offset;
+       voff   = voffset;
         count  = 1;
  }
  
  static void show_page(unsigned long offset, uint64_t flags)
  {
-       printf("%lu\t%s\n", offset, page_flag_name(flags));
+       if (opt_pid)
+               printf("%lx\t", voffset);
+       printf("%lx\t%s\n", offset, page_flag_name(flags));
  }
  
  static void show_summary(void)
@@ -383,6 +425,8 @@ static void walk_pfn(unsigned long index, unsigned long count)
         lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET);
  
         while (count) {
+               uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
+
                 batch = min_t(unsigned long, count, PAGES_BATCH);
                 n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES);
                 if (n == 0)
@@ -404,6 +448,81 @@ static void walk_pfn(unsigned long index, unsigned long count)
         }
  }
  
+
+#define PAGEMAP_BATCH  4096
+static unsigned long task_pfn(unsigned long pgoff)
+{
+       static uint64_t buf[PAGEMAP_BATCH];
+       static unsigned long start;
+       static long count;
+       uint64_t pfn;
+
+       if (pgoff < start || pgoff >= start + count) {
+               if (lseek64(pagemap_fd,
+                           (uint64_t)pgoff * PM_ENTRY_BYTES,
+                           SEEK_SET) < 0) {
+                       perror("pagemap seek");
+                       exit(EXIT_FAILURE);
+               }
+               count = read(pagemap_fd, buf, sizeof(buf));
+               if (count == 0)
+                       return 0;
+               if (count < 0) {
+                       perror("pagemap read");
+                       exit(EXIT_FAILURE);
+               }
+               if (count % PM_ENTRY_BYTES) {
+                       fatal("pagemap read not aligned.\n");
+                       exit(EXIT_FAILURE);
+               }
+               count /= PM_ENTRY_BYTES;
+               start = pgoff;
+       }
+
+       pfn = buf[pgoff - start];
+       if (pfn & PM_PRESENT)
+               pfn = PM_PFRAME(pfn);
+       else
+               pfn = 0;
+
+       return pfn;
+}
+
+static void walk_task(unsigned long index, unsigned long count)
+{
+       int i = 0;
+       const unsigned long end = index + count;
+
+       while (index < end) {
+
+               while (pg_end[i] <= index)
+                       if (++i >= nr_vmas)
+                               return;
+               if (pg_start[i] >= end)
+                       return;
+
+               voffset = max_t(unsigned long, pg_start[i], index);
+               index   = min_t(unsigned long, pg_end[i], end);
+
+               assert(voffset < index);
+               for (; voffset < index; voffset++) {
+                       unsigned long pfn = task_pfn(voffset);
+                       if (pfn)
+                               walk_pfn(pfn, 1);
+               }
+       }
+}
+
+static void add_addr_range(unsigned long offset, unsigned long size)
+{
+       if (nr_addr_ranges >= MAX_ADDR_RANGES)
+               fatal("too many addr ranges\n");
+
+       opt_offset[nr_addr_ranges] = offset;
+       opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset);
+       nr_addr_ranges++;
+}
+
  static void walk_addr_ranges(void)
  {
         int i;
@@ -415,10 +534,13 @@ static void walk_addr_ranges(void)
         }
  
         if (!nr_addr_ranges)
-               walk_pfn(0, ULONG_MAX);
+               add_addr_range(0, ULONG_MAX);
  
         for (i = 0; i < nr_addr_ranges; i++)
-               walk_pfn(opt_offset[i], opt_size[i]);
+               if (!opt_pid)
+                       walk_pfn(opt_offset[i], opt_size[i]);
+               else
+                       walk_task(opt_offset[i], opt_size[i]);
  
         close(kpageflags_fd);
  }
@@ -446,8 +568,8 @@ static void usage(void)
  "            -r|--raw                  Raw mode, for kernel developers\n"
  "            -a|--addr    addr-spec    Walk a range of pages\n"
  "            -b|--bits    bits-spec    Walk pages with specified bits\n"
-#if 0 /* planned features */
  "            -p|--pid     pid          Walk process address space\n"
+#if 0 /* planned features */
  "            -f|--file    filename     Walk file address space\n"
  #endif
  "            -l|--list                 Show page details in ranges\n"
@@ -459,7 +581,7 @@ static void usage(void)
  "            N+M                       pages range from N to N+M-1\n"
  "            N,M                       pages range from N to M-1\n"
  "            N,                        pages range from N to end\n"
-"            ,M                        pages range from 0 to M\n"
+"            ,M                        pages range from 0 to M-1\n"
  "bits-spec:\n"
  "            bit1,bit2                 (flags & (bit1|bit2)) != 0\n"
  "            bit1,bit2=bit1            (flags & (bit1|bit2)) == bit1\n"
@@ -496,21 +618,57 @@ static unsigned long long parse_number(const char *str)
  
  static void parse_pid(const char *str)
  {
+       FILE *file;
+       char buf[5000];
+
         opt_pid = parse_number(str);
-}
  
-static void parse_file(const char *name)
-{
+       sprintf(buf, "/proc/%d/pagemap", opt_pid);
+       pagemap_fd = open(buf, O_RDONLY);
+       if (pagemap_fd < 0) {
+               perror(buf);
+               exit(EXIT_FAILURE);
+       }
+
+       sprintf(buf, "/proc/%d/maps", opt_pid);
+       file = fopen(buf, "r");
+       if (!file) {
+               perror(buf);
+               exit(EXIT_FAILURE);
+       }
+
+       while (fgets(buf, sizeof(buf), file) != NULL) {
+               unsigned long vm_start;
+               unsigned long vm_end;
+               unsigned long long pgoff;
+               int major, minor;
+               char r, w, x, s;
+               unsigned long ino;
+               int n;
+
+               n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu",
+                          &vm_start,
+                          &vm_end,
+                          &r, &w, &x, &s,
+                          &pgoff,
+                          &major, &minor,
+                          &ino);
+               if (n < 10) {
+                       fprintf(stderr, "unexpected line: %s\n", buf);
+                       continue;
+               }
+               pg_start[nr_vmas] = vm_start / page_size;
+               pg_end[nr_vmas] = vm_end / page_size;
+               if (++nr_vmas >= MAX_VMAS) {
+                       fprintf(stderr, "too many VMAs\n");
+                       break;
+               }
+       }
+       fclose(file);
  }
  
-static void add_addr_range(unsigned long offset, unsigned long size)
+static void parse_file(const char *name)
  {
-       if (nr_addr_ranges >= MAX_ADDR_RANGES)
-               fatal("too much addr ranges\n");
-
-       opt_offset[nr_addr_ranges] = offset;
-       opt_size[nr_addr_ranges] = size;
-       nr_addr_ranges++;
  }
  
  static void parse_addr_range(const char *optarg)
@@ -676,8 +834,10 @@ int main(int argc, char *argv[])
                 }
         }
  
+       if (opt_list && opt_pid)
+               printf("voffset\t");
         if (opt_list == 1)
-               printf("offset\tcount\tflags\n");
+               printf("offset\tlen\tflags\n");
         if (opt_list == 2)
                 printf("offset\tflags\n");
  
diff --git a/MAINTAINERS b/MAINTAINERS

index 7c1c0b05b298f069756c1dc119083e75f0e74a24..e797c4d48cf10775a680abf5577eb28fae66e842 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -257,12 +257,6 @@ W: http://www.lesswatts.org/projects/acpi/
  S:     Supported
  F:     drivers/acpi/fan.c
  
-ACPI PCI HOTPLUG DRIVER
-M:     Kristen Carlson Accardi <kristen.c.accardi@intel.com>
-L:     linux-pci@vger.kernel.org
-S:     Supported
-F:     drivers/pci/hotplug/acpi*
-
  ACPI THERMAL DRIVER
  M:     Zhang Rui <rui.zhang@intel.com>
  L:     linux-acpi@vger.kernel.org
@@ -2331,7 +2325,9 @@ S:        Orphan
  F:     drivers/hwmon/
  
  HARDWARE RANDOM NUMBER GENERATOR CORE
-S:     Orphan
+M:     Matt Mackall <mpm@selenic.com>
+M:     Herbert Xu <herbert@gondor.apana.org.au>
+S:     Odd fixes
  F:     Documentation/hw_random.txt
  F:     drivers/char/hw_random/
  F:     include/linux/hw_random.h
@@ -4003,11 +3999,11 @@ F:      Documentation/PCI/
  F:     drivers/pci/
  F:     include/linux/pci*
  
-PCIE HOTPLUG DRIVER
-M:     Kristen Carlson Accardi <kristen.c.accardi@intel.com>
+PCI HOTPLUG
+M:     Jesse Barnes <jbarnes@virtuousgeek.org>
  L:     linux-pci@vger.kernel.org
  S:     Supported
-F:     drivers/pci/pcie/
+F:     drivers/pci/hotplug
  
  PCMCIA SUBSYSTEM
  P:     Linux PCMCIA Team
@@ -4670,12 +4666,6 @@ F:       drivers/serial/serial_lh7a40x.c
  F:     drivers/usb/gadget/lh7a40*
  F:     drivers/usb/host/ohci-lh7a40*
  
-SHPC HOTPLUG DRIVER
-M:     Kristen Carlson Accardi <kristen.c.accardi@intel.com>
-L:     linux-pci@vger.kernel.org
-S:     Supported
-F:     drivers/pci/hotplug/shpchp*
-
  SIMPLE FIRMWARE INTERFACE (SFI)
  P:     Len Brown
  M:     lenb@kernel.org
@@ -4687,7 +4677,6 @@ F:        arch/x86/kernel/*sfi*
  F:     drivers/sfi/
  F:     include/linux/sfi*.h
  
-
  SIMTEC EB110ATX (Chalice CATS)
  P:     Ben Dooks
  M:     Vincent Sanders <support@simtec.co.uk>
diff --git a/Makefile b/Makefile

index 433493a2b77baacc36152f30f88fd2b1e7f83db4..f908accd332b877338fdf92380bf52e3734f8cec 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -179,9 +179,46 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
  # Alternatively CROSS_COMPILE can be set in the environment.
  # Default value for CROSS_COMPILE is not to prefix executables
  # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile
+#
+# To force ARCH and CROSS_COMPILE settings include kernel.* files
+# in the kernel tree - do not patch this file.
  export KBUILD_BUILDHOST := $(SUBARCH)
-ARCH           ?= $(SUBARCH)
-CROSS_COMPILE  ?=
+
+# Kbuild save the ARCH and CROSS_COMPILE setting in kernel.* files.
+# Restore these settings and check that user did not specify
+# conflicting values.
+
+saved_arch  := $(shell cat include/generated/kernel.arch  2> /dev/null)
+saved_cross := $(shell cat include/generated/kernel.cross 2> /dev/null)
+
+ifneq ($(CROSS_COMPILE),)
+        ifneq ($(saved_cross),)
+                ifneq ($(CROSS_COMPILE),$(saved_cross))
+                        $(error CROSS_COMPILE changed from \
+                                "$(saved_cross)" to \
+                                 to "$(CROSS_COMPILE)". \
+                                 Use "make mrproper" to fix it up)
+                endif
+        endif
+else
+    CROSS_COMPILE := $(saved_cross)
+endif
+
+ifneq ($(ARCH),)
+        ifneq ($(saved_arch),)
+                ifneq ($(saved_arch),$(ARCH))
+                        $(error ARCH changed from \
+                                "$(saved_arch)" to "$(ARCH)". \
+                                 Use "make mrproper" to fix it up)
+                endif
+        endif
+else
+        ifneq ($(saved_arch),)
+                ARCH := $(saved_arch)
+        else
+                ARCH := $(SUBARCH)
+        endif
+endif
  
  # Architecture as present in compile.h
  UTS_MACHINE    := $(ARCH)
@@ -315,6 +352,7 @@ OBJCOPY             = $(CROSS_COMPILE)objcopy
  OBJDUMP                = $(CROSS_COMPILE)objdump
  AWK            = awk
  GENKSYMS       = scripts/genksyms/genksyms
+INSTALLKERNEL  := installkernel
  DEPMOD         = /sbin/depmod
  KALLSYMS       = scripts/kallsyms
  PERL           = perl
@@ -353,7 +391,8 @@ KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
  
  export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
  export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE
+export CPP AR NM STRIP OBJCOPY OBJDUMP
+export MAKE AWK GENKSYMS INSTALLKERNEL PERL UTS_MACHINE
  export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
  
  export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
@@ -444,6 +483,11 @@ ifeq ($(config-targets),1)
  include $(srctree)/arch/$(SRCARCH)/Makefile
  export KBUILD_DEFCONFIG KBUILD_KCONFIG
  
+# save ARCH & CROSS_COMPILE settings
+$(shell mkdir -p include/generated &&                            \
+        echo $(ARCH)          > include/generated/kernel.arch && \
+        echo $(CROSS_COMPILE) > include/generated/kernel.cross)
+
  config: scripts_basic outputmakefile FORCE
         $(Q)mkdir -p include/linux include/config
         $(Q)$(MAKE) $(build)=scripts/kconfig $@
@@ -571,6 +615,9 @@ KBUILD_CFLAGS       += $(call cc-option,-fno-strict-overflow)
  # revert to pre-gcc-4.4 behaviour of .eh_frame
  KBUILD_CFLAGS  += $(call cc-option,-fno-dwarf2-cfi-asm)
  
+# conserve stack if available
+KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)
+
  # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
  # But warn user when we do so
  warn-assign = \
@@ -591,12 +638,12 @@ endif
  
  # Use --build-id when available.
  LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\
-                             $(call ld-option, -Wl$(comma)--build-id,))
+                             $(call cc-ldoption, -Wl$(comma)--build-id,))
  LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
  LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
  
  ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
-LDFLAGS_vmlinux        += -X
+LDFLAGS_vmlinux        += $(call ld-option, -X,)
  endif
  
  # Default kernel image to build when no specific target is given.
@@ -980,11 +1027,6 @@ prepare0: archprepare FORCE
  # All the preparing..
  prepare: prepare0
  
-# Leave this as default for preprocessing vmlinux.lds.S, which is now
-# done in arch/$(ARCH)/kernel/Makefile
-
-export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH)
-
  # The asm symlink changes when $(ARCH) changes.
  # Detect this and ask user to run make mrproper
  # If asm is a stale symlink (point to dir that does not exist) remove it
diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h

index 25da0017ec87fb2dd675257d7d16a68cc5f89aff..e42823e954aa30a6693e3799749927b111b0a133 100644 (file)
--- a/arch/alpha/include/asm/fcntl.h
+++ b/arch/alpha/include/asm/fcntl.h
@@ -26,6 +26,8 @@
  #define F_GETOWN       6       /*  for sockets. */
  #define F_SETSIG       10      /*  for sockets. */
  #define F_GETSIG       11      /*  for sockets. */
+#define F_SETOWN_EX    12
+#define F_GETOWN_EX    13
  
  /* for posix fcntl() and lockf() */
  #define F_RDLCK                1
diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h

index 547e90951cec07a92bd31d6b9cf37f293612383e..3f390e8cc0b33fdee5070297d99d30e102f90557 100644 (file)
--- a/arch/alpha/include/asm/smp.h
+++ b/arch/alpha/include/asm/smp.h
@@ -47,7 +47,7 @@ extern struct cpuinfo_alpha cpu_data[NR_CPUS];
  extern int smp_num_cpus;
  
  extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
  
  #else /* CONFIG_SMP */
  
diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h

index b4f284c72ff3ed303425ac437df0b74f75defc90..36b3a30ba0e519d078d3c4ab9f349ff42aa84b9a 100644 (file)
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -22,23 +22,6 @@ static inline int cpu_to_node(int cpu)
         return node;
  }
  
-static inline cpumask_t node_to_cpumask(int node)
-{
-       cpumask_t node_cpu_mask = CPU_MASK_NONE;
-       int cpu;
-
-       for_each_online_cpu(cpu) {
-               if (cpu_to_node(cpu) == node)
-                       cpu_set(cpu, node_cpu_mask);
-       }
-
-#ifdef DEBUG_NUMA
-       printk("node %d: cpu_mask: %016lx\n", node, node_cpu_mask);
-#endif
-
-       return node_cpu_mask;
-}
-
  extern struct cpumask node_to_cpumask_map[];
  /* FIXME: This is dumb, recalculating every time.  But simple. */
  static const struct cpumask *cpumask_of_node(int node)
@@ -55,7 +38,6 @@ static const struct cpumask *cpumask_of_node(int node)
         return &node_to_cpumask_map[node];
  }
  
-#define pcibus_to_cpumask(bus) (cpu_online_map)
  #define cpumask_of_pcibus(bus) (cpu_online_mask)
  
  #endif /* !CONFIG_NUMA */
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c

index e302daecbe56cbe5ac5a9cca91046da88fad51da..8e059e58b0acd6eba4aace468da92dd0090e5008 100644 (file)
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -1016,7 +1016,7 @@ marvel_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *m
  {
         struct marvel_agp_aperture *aper = agp->aperture.sysdata;
         return iommu_bind(aper->arena, aper->pg_start + pg_start, 
-                         mem->page_count, mem->memory);
+                         mem->page_count, mem->pages);
  }
  
  static int 
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c

index 319fcb74611e57c9990aaba2e691079e8f1aa038..76686497b1e210992fcf0ee51fd86ed800e5dd14 100644 (file)
--- a/arch/alpha/kernel/core_titan.c
+++ b/arch/alpha/kernel/core_titan.c
@@ -680,7 +680,7 @@ titan_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *me
  {
         struct titan_agp_aperture *aper = agp->aperture.sysdata;
         return iommu_bind(aper->arena, aper->pg_start + pg_start, 
-                         mem->page_count, mem->memory);
+                         mem->page_count, mem->pages);
  }
  
  static int 
diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h

index 00edd04b585ec00724ea42a9aac24fb3b990a449..85457b2d4516dc009ad0407c6b01731add69081b 100644 (file)
--- a/arch/alpha/kernel/pci_impl.h
+++ b/arch/alpha/kernel/pci_impl.h
@@ -198,7 +198,7 @@ extern unsigned long size_for_memory(unsigned long max);
  
  extern int iommu_reserve(struct pci_iommu_arena *, long, long);
  extern int iommu_release(struct pci_iommu_arena *, long, long);
-extern int iommu_bind(struct pci_iommu_arena *, long, long, unsigned long *);
+extern int iommu_bind(struct pci_iommu_arena *, long, long, struct page **);
  extern int iommu_unbind(struct pci_iommu_arena *, long, long);
  
  
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c

index d15aedfe60661a5dc83b8292e959d23f2d4b8ac0..8449504f5e0b1d826841181d476067028b540a92 100644 (file)
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -876,7 +876,7 @@ iommu_release(struct pci_iommu_arena *arena, long pg_start, long pg_count)
  
  int
  iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count, 
-          unsigned long *physaddrs)
+          struct page **pages)
  {
         unsigned long flags;
         unsigned long *ptes;
@@ -896,7 +896,7 @@ iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count,
         }
                 
         for(i = 0, j = pg_start; i < pg_count; i++, j++)
-               ptes[j] = mk_iommu_pte(physaddrs[i]);
+               ptes[j] = mk_iommu_pte(page_to_phys(pages[i]));
  
         spin_unlock_irqrestore(&arena->lock, flags);
  
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c

index 3a2fb7a02db402a75b8eaaea3dfe8a51d02eddd9..289039bb6bb2a07f91b81b2848d035cc39083d88 100644 (file)
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -19,7 +19,6 @@
  #include <linux/ptrace.h>
  #include <linux/slab.h>
  #include <linux/user.h>
-#include <linux/utsname.h>
  #include <linux/time.h>
  #include <linux/major.h>
  #include <linux/stat.h>
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c

index b1fe5674c3a1fb286c3af38559a52009ce054478..42aa078a5e4d3531994144998b6a5c6bfe87ddaf 100644 (file)
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -548,16 +548,16 @@ setup_profiling_timer(unsigned int multiplier)
  
  \f
  static void
-send_ipi_message(cpumask_t to_whom, enum ipi_message_type operation)
+send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
  {
         int i;
  
         mb();
-       for_each_cpu_mask(i, to_whom)
+       for_each_cpu(i, to_whom)
                 set_bit(operation, &ipi_data[i].bits);
  
         mb();
-       for_each_cpu_mask(i, to_whom)
+       for_each_cpu(i, to_whom)
                 wripir(i);
  }
  
@@ -624,7 +624,7 @@ smp_send_reschedule(int cpu)
                 printk(KERN_WARNING
                        "smp_send_reschedule: Sending IPI to self.\n");
  #endif
-       send_ipi_message(cpumask_of_cpu(cpu), IPI_RESCHEDULE);
+       send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
  }
  
  void
@@ -636,17 +636,17 @@ smp_send_stop(void)
         if (hard_smp_processor_id() != boot_cpu_id)
                 printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n");
  #endif
-       send_ipi_message(to_whom, IPI_CPU_STOP);
+       send_ipi_message(&to_whom, IPI_CPU_STOP);
  }
  
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
  {
         send_ipi_message(mask, IPI_CALL_FUNC);
  }
  
  void arch_send_call_function_single_ipi(int cpu)
  {
-       send_ipi_message(cpumask_of_cpu(cpu), IPI_CALL_FUNC_SINGLE);
+       send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
  }
  
  static void
diff --git a/arch/arm/Makefile b/arch/arm/Makefile

index 54661125a8bfff2b69a1daee5d6de3ae5ebd1395..a73caaf667633c286e8812da535aa43203170134 100644 (file)
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -14,7 +14,7 @@ LDFLAGS_vmlinux       :=-p --no-undefined -X
  ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
  LDFLAGS_vmlinux        += --be8
  endif
-CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
+
  OBJCOPYFLAGS   :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
  GZFLAGS                :=-9
  #KBUILD_CFLAGS +=-pipe
@@ -279,7 +279,7 @@ define archhelp
    echo  '                  (supply initrd image via make variable INITRD=<path>)'
    echo  '  install       - Install uncompressed kernel'
    echo  '  zinstall      - Install compressed kernel'
-  echo  '                  Install using (your) ~/bin/installkernel or'
-  echo  '                  (distribution) /sbin/installkernel or'
+  echo  '                  Install using (your) ~/bin/$(INSTALLKERNEL) or'
+  echo  '                  (distribution) /sbin/$(INSTALLKERNEL) or'
    echo  '                  install to $$(INSTALL_PATH) and run lilo'
  endef
diff --git a/arch/arm/boot/install.sh b/arch/arm/boot/install.sh

index 9f9bed207345603d2d99471dcbfeef90ca2d1b70..06ea7d42ce8e6bb9f72633a14abd2ace113fa712 100644 (file)
--- a/arch/arm/boot/install.sh
+++ b/arch/arm/boot/install.sh
@@ -21,8 +21,8 @@
  #
  
  # User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
  
  if [ "$(basename $2)" = "zImage" ]; then
  # Compressed install
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h

index 1a711ea8418b6045c581a576caa3f85496ee2673..fd03fb63a33222ca6ff67a25469414371d68431d 100644 (file)
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -334,14 +334,14 @@ static inline void outer_flush_range(unsigned long start, unsigned long end)
  #ifndef CONFIG_CPU_CACHE_VIPT
  static inline void flush_cache_mm(struct mm_struct *mm)
  {
-       if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
+       if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
                 __cpuc_flush_user_all();
  }
  
  static inline void
  flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
  {
-       if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
+       if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
                 __cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
                                         vma->vm_flags);
  }
@@ -349,7 +349,7 @@ flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long
  static inline void
  flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
  {
-       if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+       if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
                 unsigned long addr = user_addr & PAGE_MASK;
                 __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
         }
@@ -360,7 +360,7 @@ flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
                          unsigned long uaddr, void *kaddr,
                          unsigned long len, int write)
  {
-       if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+       if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
                 unsigned long addr = (unsigned long)kaddr;
                 __cpuc_coherent_kern_range(addr, addr + len);
         }
diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h

index 83e6ba338e2c4c9c13c6e05cd0aadeea5ce461fc..1a8c7279a28b39eb8473d5e5ffb383cfbc5040ec 100644 (file)
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -187,11 +187,74 @@ union iop3xx_desc {
         void *ptr;
  };
  
+/* No support for p+q operations */
+static inline int
+iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op)
+{
+       BUG();
+       return 0;
+}
+
+static inline void
+iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
+                 unsigned long flags)
+{
+       BUG();
+}
+
+static inline void
+iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
+{
+       BUG();
+}
+
+static inline void
+iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
+                        dma_addr_t addr, unsigned char coef)
+{
+       BUG();
+}
+
+static inline int
+iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op)
+{
+       BUG();
+       return 0;
+}
+
+static inline void
+iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+                         unsigned long flags)
+{
+       BUG();
+}
+
+static inline void
+iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
+{
+       BUG();
+}
+
+#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
+
+static inline void
+iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
+                             dma_addr_t *src)
+{
+       BUG();
+}
+
  static inline int iop_adma_get_max_xor(void)
  {
         return 32;
  }
  
+static inline int iop_adma_get_max_pq(void)
+{
+       BUG();
+       return 0;
+}
+
  static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
  {
         int id = chan->device->id;
@@ -332,6 +395,11 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
         return slot_cnt;
  }
  
+static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
+{
+       return 0;
+}
+
  static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
                                         struct iop_adma_chan *chan)
  {
@@ -349,6 +417,14 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
         return 0;
  }
  
+
+static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
+                                         struct iop_adma_chan *chan)
+{
+       BUG();
+       return 0;
+}
+
  static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
                                         struct iop_adma_chan *chan)
  {
@@ -756,13 +832,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
         hw_desc->src[0] = val;
  }
  
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
  {
         struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
         struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
  
         iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
-       return desc_ctrl.zero_result_err;
+       return desc_ctrl.zero_result_err << SUM_CHECK_P;
  }
  
  static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h

index 385c6e8cbbd214357e798270ccac8845cb672cec..59b8c3892f76731608b346d0d100910e9047c481 100644 (file)
--- a/arch/arm/include/asm/hardware/iop_adma.h
+++ b/arch/arm/include/asm/hardware/iop_adma.h
@@ -86,6 +86,7 @@ struct iop_adma_chan {
   * @idx: pool index
   * @unmap_src_cnt: number of xor sources
   * @unmap_len: transaction bytecount
+ * @tx_list: list of descriptors that are associated with one operation
   * @async_tx: support for the async_tx api
   * @group_list: list of slots that make up a multi-descriptor transaction
   *     for example transfer lengths larger than the supported hw max
@@ -102,10 +103,12 @@ struct iop_adma_desc_slot {
         u16 idx;
         u16 unmap_src_cnt;
         size_t unmap_len;
+       struct list_head tx_list;
         struct dma_async_tx_descriptor async_tx;
         union {
                 u32 *xor_check_result;
                 u32 *crc32_result;
+               u32 *pq_check_result;
         };
  };
  
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h

index bcdb9291ef0c3636d18442d9e8ea6fd36cebc425..de6cefb329dd4aa24cd42731730915938a33aaf4 100644 (file)
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -103,14 +103,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
  
  #ifdef CONFIG_SMP
         /* check for possible thread migration */
-       if (!cpus_empty(next->cpu_vm_mask) && !cpu_isset(cpu, next->cpu_vm_mask))
+       if (!cpumask_empty(mm_cpumask(next)) &&
+           !cpumask_test_cpu(cpu, mm_cpumask(next)))
                 __flush_icache_all();
  #endif
-       if (!cpu_test_and_set(cpu, next->cpu_vm_mask) || prev != next) {
+       if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) {
                 check_context(next);
                 cpu_switch_mm(next->pgd, next);
                 if (cache_is_vivt())
-                       cpu_clear(cpu, prev->cpu_vm_mask);
+                       cpumask_clear_cpu(cpu, mm_cpumask(prev));
         }
  #endif
  }
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h

index a06e735b262ad26efe2a79d164670a763cdf2090..e0d763be18465d98fcffcba416e23577fa21f0b4 100644 (file)
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -93,7 +93,6 @@ extern void platform_cpu_enable(unsigned int cpu);
  
  extern void arch_send_call_function_single_ipi(int cpu);
  extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
  
  /*
   * show local interrupt info
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h

index c964f3fc3bc57e906c452cbbe059fed506c19ee8..a45ab5dd82559cc7d21e887f24135193bc2fea6b 100644 (file)
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -350,7 +350,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
         if (tlb_flag(TLB_WB))
                 dsb();
  
-       if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) {
+       if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
                 if (tlb_flag(TLB_V3_FULL))
                         asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (zero) : "cc");
                 if (tlb_flag(TLB_V4_U_FULL))
@@ -388,7 +388,7 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
         if (tlb_flag(TLB_WB))
                 dsb();
  
-       if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+       if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
                 if (tlb_flag(TLB_V3_PAGE))
                         asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (uaddr) : "cc");
                 if (tlb_flag(TLB_V4_U_PAGE))
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile

index 3213c9382b171aa14e7f00d1c7ab86418a5ebef0..c446aeff7b893fb5764cc3716a6c81760cdee218 100644 (file)
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -2,7 +2,8 @@
  # Makefile for the linux kernel.
  #
  
-AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
+CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
+AFLAGS_head.o        := -DTEXT_OFFSET=$(TEXT_OFFSET)
  
  ifdef CONFIG_DYNAMIC_FTRACE
  CFLAGS_REMOVE_ftrace.o = -pg
diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c

index 3f470866bb89ebe4f903db8dfb389313505df502..e7cbb50dc35678ec191bcc026a02e8e8036af612 100644 (file)
--- a/arch/arm/kernel/init_task.c
+++ b/arch/arm/kernel/init_task.c
@@ -24,9 +24,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   *
   * The things we do for performance..
   */
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  /*
   * Initial task structure.
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c

index de885fd256c519b220e6d48d86996e1f7027cd5d..e0d32770bb3d51c061554ba348c9c69beffd25ea 100644 (file)
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -189,7 +189,7 @@ int __cpuexit __cpu_disable(void)
         read_lock(&tasklist_lock);
         for_each_process(p) {
                 if (p->mm)
-                       cpu_clear(cpu, p->mm->cpu_vm_mask);
+                       cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
         }
         read_unlock(&tasklist_lock);
  
@@ -257,7 +257,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
         atomic_inc(&mm->mm_users);
         atomic_inc(&mm->mm_count);
         current->active_mm = mm;
-       cpu_set(cpu, mm->cpu_vm_mask);
+       cpumask_set_cpu(cpu, mm_cpumask(mm));
         cpu_switch_mm(mm->pgd, mm);
         enter_lazy_tlb(mm, current);
         local_flush_tlb_all();
@@ -643,7 +643,7 @@ void flush_tlb_all(void)
  void flush_tlb_mm(struct mm_struct *mm)
  {
         if (tlb_ops_need_broadcast())
-               on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, &mm->cpu_vm_mask);
+               on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm));
         else
                 local_flush_tlb_mm(mm);
  }
@@ -654,7 +654,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
                 struct tlb_args ta;
                 ta.ta_vma = vma;
                 ta.ta_start = uaddr;
-               on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+               on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm));
         } else
                 local_flush_tlb_page(vma, uaddr);
  }
@@ -677,7 +677,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
                 ta.ta_vma = vma;
                 ta.ta_start = start;
                 ta.ta_end = end;
-               on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+               on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm));
         } else
                 local_flush_tlb_range(vma, start, end);
  }
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c

index b3ec641b5cf8a1b8cf7cce2d932315e42235e03d..78ecaac652069217b965eca0cd336b40858698b2 100644 (file)
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -25,7 +25,6 @@
  #include <linux/mman.h>
  #include <linux/fs.h>
  #include <linux/file.h>
-#include <linux/utsname.h>
  #include <linux/ipc.h>
  #include <linux/uaccess.h>
  
diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c

index 55719a974276e315d42173e26118fee4790076a6..fb5c23af1017ed13c480ae324bf90b6b7d566bf1 100644 (file)
--- a/arch/arm/mach-at91/at91sam9263_devices.c
+++ b/arch/arm/mach-at91/at91sam9263_devices.c
@@ -757,6 +757,42 @@ void __init at91_add_device_ac97(struct ac97c_platform_data *data)
  void __init at91_add_device_ac97(struct ac97c_platform_data *data) {}
  #endif
  
+/* --------------------------------------------------------------------
+ *  CAN Controller
+ * -------------------------------------------------------------------- */
+
+#if defined(CONFIG_CAN_AT91) || defined(CONFIG_CAN_AT91_MODULE)
+static struct resource can_resources[] = {
+       [0] = {
+               .start  = AT91SAM9263_BASE_CAN,
+               .end    = AT91SAM9263_BASE_CAN + SZ_16K - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = AT91SAM9263_ID_CAN,
+               .end    = AT91SAM9263_ID_CAN,
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device at91sam9263_can_device = {
+       .name           = "at91_can",
+       .id             = -1,
+       .resource       = can_resources,
+       .num_resources  = ARRAY_SIZE(can_resources),
+};
+
+void __init at91_add_device_can(struct at91_can_data *data)
+{
+       at91_set_A_periph(AT91_PIN_PA13, 0);    /* CANTX */
+       at91_set_A_periph(AT91_PIN_PA14, 0);    /* CANRX */
+       at91sam9263_can_device.dev.platform_data = data;
+
+       platform_device_register(&at91sam9263_can_device);
+}
+#else
+void __init at91_add_device_can(struct at91_can_data *data) {}
+#endif
  
  /* --------------------------------------------------------------------
   *  LCD Controller
diff --git a/arch/arm/mach-at91/board-sam9263ek.c b/arch/arm/mach-at91/board-sam9263ek.c

index 26f1aa6049afac276630a7ae4c17c6d39a36fa17..2d867fb0630f0719723b9aa0fde5ab416725ae5f 100644 (file)
--- a/arch/arm/mach-at91/board-sam9263ek.c
+++ b/arch/arm/mach-at91/board-sam9263ek.c
@@ -400,6 +400,23 @@ static struct gpio_led ek_pwm_led[] = {
         }
  };
  
+/*
+ * CAN
+ */
+static void sam9263ek_transceiver_switch(int on)
+{
+       if (on) {
+               at91_set_gpio_output(AT91_PIN_PA18, 1); /* CANRXEN */
+               at91_set_gpio_output(AT91_PIN_PA19, 0); /* CANRS */
+       } else {
+               at91_set_gpio_output(AT91_PIN_PA18, 0); /* CANRXEN */
+               at91_set_gpio_output(AT91_PIN_PA19, 1); /* CANRS */
+       }
+}
+
+static struct at91_can_data ek_can_data = {
+       .transceiver_switch = sam9263ek_transceiver_switch,
+};
  
  static void __init ek_board_init(void)
  {
@@ -431,6 +448,8 @@ static void __init ek_board_init(void)
         /* LEDs */
         at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds));
         at91_pwm_leds(ek_pwm_led, ARRAY_SIZE(ek_pwm_led));
+       /* CAN */
+       at91_add_device_can(&ek_can_data);
  }
  
  MACHINE_START(AT91SAM9263EK, "Atmel AT91SAM9263-EK")
diff --git a/arch/arm/mach-at91/include/mach/board.h b/arch/arm/mach-at91/include/mach/board.h

index 583f38a38df708862c8cada64d743ed2c6dd4959..2f4fcedc02ba6a55638f5b44c0e5234f8acd887b 100644 (file)
--- a/arch/arm/mach-at91/include/mach/board.h
+++ b/arch/arm/mach-at91/include/mach/board.h
@@ -188,6 +188,12 @@ extern void __init at91_add_device_isi(void);
   /* Touchscreen Controller */
  extern void __init at91_add_device_tsadcc(void);
  
+/* CAN */
+struct at91_can_data {
+       void (*transceiver_switch)(int on);
+};
+extern void __init at91_add_device_can(struct at91_can_data *data);
+
   /* LEDs */
  extern void __init at91_init_leds(u8 cpu_led, u8 timer_led);
  extern void __init at91_gpio_leds(struct gpio_led *leds, int nr);
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h

index 5722e86f2174a93aa4b91975f2c2b608a5b4a1af..6d3782d85a9ff6d2db65a71de2632cc0b1151b33 100644 (file)
--- a/arch/arm/mach-iop13xx/include/mach/adma.h
+++ b/arch/arm/mach-iop13xx/include/mach/adma.h
@@ -150,6 +150,8 @@ static inline int iop_adma_get_max_xor(void)
         return 16;
  }
  
+#define iop_adma_get_max_pq iop_adma_get_max_xor
+
  static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
  {
         return __raw_readl(ADMA_ADAR(chan));
@@ -211,7 +213,10 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
  #define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
  #define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
  #define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
+#define IOP_ADMA_PQ_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
  #define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o)
+#define iop_chan_pq_slot_count iop_chan_xor_slot_count
+#define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count
  
  static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
                                         struct iop_adma_chan *chan)
@@ -220,6 +225,13 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
         return hw_desc->dest_addr;
  }
  
+static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
+                                         struct iop_adma_chan *chan)
+{
+       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+       return hw_desc->q_dest_addr;
+}
+
  static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
                                         struct iop_adma_chan *chan)
  {
@@ -319,6 +331,58 @@ iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
         return 1;
  }
  
+static inline void
+iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
+                 unsigned long flags)
+{
+       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+       union {
+               u32 value;
+               struct iop13xx_adma_desc_ctrl field;
+       } u_desc_ctrl;
+
+       u_desc_ctrl.value = 0;
+       u_desc_ctrl.field.src_select = src_cnt - 1;
+       u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
+       u_desc_ctrl.field.pq_xfer_en = 1;
+       u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
+       u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+       hw_desc->desc_ctrl = u_desc_ctrl.value;
+}
+
+static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
+{
+       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+       union {
+               u32 value;
+               struct iop13xx_adma_desc_ctrl field;
+       } u_desc_ctrl;
+
+       u_desc_ctrl.value = hw_desc->desc_ctrl;
+       return u_desc_ctrl.field.pq_xfer_en;
+}
+
+static inline void
+iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+                         unsigned long flags)
+{
+       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+       union {
+               u32 value;
+               struct iop13xx_adma_desc_ctrl field;
+       } u_desc_ctrl;
+
+       u_desc_ctrl.value = 0;
+       u_desc_ctrl.field.src_select = src_cnt - 1;
+       u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
+       u_desc_ctrl.field.zero_result = 1;
+       u_desc_ctrl.field.status_write_back_en = 1;
+       u_desc_ctrl.field.pq_xfer_en = 1;
+       u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
+       u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+       hw_desc->desc_ctrl = u_desc_ctrl.value;
+}
+
  static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
                                         struct iop_adma_chan *chan,
                                         u32 byte_count)
@@ -351,6 +415,7 @@ iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
         }
  }
  
+#define iop_desc_set_pq_zero_sum_byte_count iop_desc_set_zero_sum_byte_count
  
  static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
                                         struct iop_adma_chan *chan,
@@ -361,6 +426,16 @@ static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
         hw_desc->upper_dest_addr = 0;
  }
  
+static inline void
+iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
+{
+       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+
+       hw_desc->dest_addr = addr[0];
+       hw_desc->q_dest_addr = addr[1];
+       hw_desc->upper_dest_addr = 0;
+}
+
  static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
                                         dma_addr_t addr)
  {
@@ -388,6 +463,29 @@ static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
         } while (slot_cnt);
  }
  
+static inline void
+iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
+                        dma_addr_t addr, unsigned char coef)
+{
+       int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
+       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
+       struct iop13xx_adma_src *src;
+       int i = 0;
+
+       do {
+               iter = iop_hw_desc_slot_idx(hw_desc, i);
+               src = &iter->src[src_idx];
+               src->src_addr = addr;
+               src->pq_upper_src_addr = 0;
+               src->pq_dmlt = coef;
+               slot_cnt -= slots_per_op;
+               if (slot_cnt) {
+                       i += slots_per_op;
+                       addr += IOP_ADMA_PQ_MAX_BYTE_COUNT;
+               }
+       } while (slot_cnt);
+}
+
  static inline void
  iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
         struct iop_adma_chan *chan)
@@ -399,6 +497,15 @@ iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
  }
  
  #define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr
+#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
+
+static inline void
+iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
+                             dma_addr_t *src)
+{
+       iop_desc_set_xor_src_addr(desc, pq_idx, src[pq_idx]);
+       iop_desc_set_xor_src_addr(desc, pq_idx+1, src[pq_idx+1]);
+}
  
  static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
                                         u32 next_desc_addr)
@@ -428,18 +535,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
         hw_desc->block_fill_data = val;
  }
  
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
  {
         struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
         struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
         struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
+       enum sum_check_flags flags;
  
         BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
  
-       if (desc_ctrl.pq_xfer_en)
-               return byte_count.zero_result_err_q;
-       else
-               return byte_count.zero_result_err;
+       flags = byte_count.zero_result_err_q << SUM_CHECK_Q;
+       flags |= byte_count.zero_result_err << SUM_CHECK_P;
+
+       return flags;
  }
  
  static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c

index bee42c609df6c6db3c491e9826fc5c4d5973225d..5c147fb66a013f809dbc92a4180fb2e89f6d4f22 100644 (file)
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -477,10 +477,8 @@ void __init iop13xx_platform_init(void)
                         plat_data = &iop13xx_adma_0_data;
                         dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
                         dma_cap_set(DMA_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
+                       dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
                         dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-                       dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
                         dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
                         break;
                 case IOP13XX_INIT_ADMA_1:
@@ -489,10 +487,8 @@ void __init iop13xx_platform_init(void)
                         plat_data = &iop13xx_adma_1_data;
                         dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
                         dma_cap_set(DMA_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
+                       dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
                         dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-                       dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
                         dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
                         break;
                 case IOP13XX_INIT_ADMA_2:
@@ -501,14 +497,11 @@ void __init iop13xx_platform_init(void)
                         plat_data = &iop13xx_adma_2_data;
                         dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
                         dma_cap_set(DMA_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
+                       dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
                         dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-                       dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
                         dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
-                       dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
-                       dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask);
+                       dma_cap_set(DMA_PQ, plat_data->cap_mask);
+                       dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
                         break;
                 }
         }
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c

index fc84fcc743804d16241a99ee5cb1c86f650dd048..6bda76a431991287cc86e6995b3769bfde2ebf43 100644 (file)
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -59,6 +59,6 @@ void __new_context(struct mm_struct *mm)
         }
         spin_unlock(&cpu_asid_lock);
  
-       mm->cpu_vm_mask = cpumask_of_cpu(smp_processor_id());
+       cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id()));
         mm->context.id = asid;
  }
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c

index 575f3ad722e773fabffaac361fa9e72ecfaea62a..b27942909b239e1c7d8dac35ad10cf5fe89d3353 100644 (file)
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -50,7 +50,7 @@ static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr)
  void flush_cache_mm(struct mm_struct *mm)
  {
         if (cache_is_vivt()) {
-               if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
+               if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
                         __cpuc_flush_user_all();
                 return;
         }
@@ -73,7 +73,7 @@ void flush_cache_mm(struct mm_struct *mm)
  void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
  {
         if (cache_is_vivt()) {
-               if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
+               if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
                         __cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
                                                 vma->vm_flags);
                 return;
@@ -97,7 +97,7 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
  void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
  {
         if (cache_is_vivt()) {
-               if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+               if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
                         unsigned long addr = user_addr & PAGE_MASK;
                         __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
                 }
@@ -113,7 +113,7 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
                          unsigned long len, int write)
  {
         if (cache_is_vivt()) {
-               if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+               if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
                         unsigned long addr = (unsigned long)kaddr;
                         __cpuc_coherent_kern_range(addr, addr + len);
                 }
@@ -126,7 +126,7 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
         }
  
         /* VIPT non-aliasing cache */
-       if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask) &&
+       if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)) &&
             vma->vm_flags & VM_EXEC) {
                 unsigned long addr = (unsigned long)kaddr;
                 /* only flushing the kernel mapping on non-aliasing VIPT */
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c

index 3c127aabe214e4bb8d7a2ff00c37061836a88cff..1ff6a37e893c423da523a945e0cf7c5d8f837152 100644 (file)
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -179,7 +179,6 @@ static int __init iop3xx_adma_cap_init(void)
         dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
         #else
         dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
-       dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
         dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
         #endif
  
@@ -188,7 +187,6 @@ static int __init iop3xx_adma_cap_init(void)
         dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
         #else
         dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
-       dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
         dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
         #endif
  
@@ -198,7 +196,7 @@ static int __init iop3xx_adma_cap_init(void)
         dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
         #else
         dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
-       dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask);
+       dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask);
         dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
         dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
         #endif
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c

index 57ec9f2dcd953b2ebe9270f1c9dbfa6cf99d2401..6b2343e6fe332cd5454bfe88935091ed96d1a482 100644 (file)
--- a/arch/avr32/kernel/init_task.c
+++ b/arch/avr32/kernel/init_task.c
@@ -18,9 +18,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
  /*
   * Initial thread structure. Must be aligned on an 8192-byte boundary.
   */
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  /*
   * Initial task structure.
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c

index 376f18c4a6cb90370ded166c29eaa614d3fcf108..94925641e53e64fbb87b2754b5d927be00a8b3ac 100644 (file)
--- a/arch/avr32/mm/init.c
+++ b/arch/avr32/mm/init.c
@@ -24,11 +24,9 @@
  #include <asm/setup.h>
  #include <asm/sections.h>
  
-#define __page_aligned __attribute__((section(".data.page_aligned")))
-
  DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
  
-pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned;
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_data;
  
  struct page *empty_zero_page;
  EXPORT_SYMBOL(empty_zero_page);
diff --git a/arch/blackfin/Makefile b/arch/blackfin/Makefile

index 6f9533c3d752b6d718ad4712a8de66624ffb49ba..f063b772934bbcf75461276d680feba35c5e1574 100644 (file)
--- a/arch/blackfin/Makefile
+++ b/arch/blackfin/Makefile
@@ -155,7 +155,7 @@ define archhelp
    echo  '* vmImage.gz      - Kernel-only image for U-Boot (arch/$(ARCH)/boot/vmImage.gz)'
    echo  '  vmImage.lzma    - Kernel-only image for U-Boot (arch/$(ARCH)/boot/vmImage.lzma)'
    echo  '  install         - Install kernel using'
-  echo  '                     (your) ~/bin/$(CROSS_COMPILE)installkernel or'
-  echo  '                     (distribution) PATH: $(CROSS_COMPILE)installkernel or'
+  echo  '                     (your) ~/bin/$(INSTALLKERNEL) or'
+  echo  '                     (distribution) PATH: $(INSTALLKERNEL) or'
    echo  '                     install to $$(INSTALL_PATH)'
  endef
diff --git a/arch/blackfin/boot/install.sh b/arch/blackfin/boot/install.sh

index 9560a6b291009db0b04b9256d5bd241a35700b38..e2c6e40902b7e45961c5878b782f7c941cc8d3b8 100644 (file)
--- a/arch/blackfin/boot/install.sh
+++ b/arch/blackfin/boot/install.sh
@@ -36,9 +36,9 @@ verify "$3"
  
  # User may have a custom install script
  
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if which ${CROSS_COMPILE}installkernel >/dev/null 2>&1; then
-       exec ${CROSS_COMPILE}installkernel "$@"
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if which ${INSTALLKERNEL} >/dev/null 2>&1; then
+       exec ${INSTALLKERNEL} "$@"
  fi
  
  # Default install - same as make zlilo
diff --git a/arch/cris/Makefile b/arch/cris/Makefile

index 71e17d3eeddba6d8119378e414a4bc742a26a5d9..29c2ceb38a76dd05fb98e9728e53b2d0753df0df 100644 (file)
--- a/arch/cris/Makefile
+++ b/arch/cris/Makefile
@@ -42,8 +42,6 @@ LD = $(CROSS_COMPILE)ld -mcrislinux
  
  OBJCOPYFLAGS := -O binary -R .note -R .comment -S
  
-CPPFLAGS_vmlinux.lds = -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE)
-
  KBUILD_AFLAGS += -mlinux -march=$(arch-y) $(inc)
  KBUILD_CFLAGS += -mlinux -march=$(arch-y) -pipe $(inc)
  KBUILD_CPPFLAGS += $(inc)
diff --git a/arch/cris/kernel/Makefile b/arch/cris/kernel/Makefile

index ee7bcd4d20b270d0427b62f40c0895a9aa3eec77..b45640b3e6006cf3b5da0c5b5330534d7520fe3a 100644 (file)
--- a/arch/cris/kernel/Makefile
+++ b/arch/cris/kernel/Makefile
@@ -3,6 +3,7 @@
  # Makefile for the linux kernel.
  #
  
+CPPFLAGS_vmlinux.lds := -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE)
  extra-y        := vmlinux.lds
  
  obj-y   := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c

index 51dcd04d2777f01b87a86108d500efb800be1d2d..c99aeab7cef727b0cf6ebf18f93216f715d49216 100644 (file)
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -45,9 +45,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   * way process stacks are handled. This is done by having a special
   * "init_task" linker map entry..
   */
-union thread_union init_thread_union 
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  /*
   * Initial task structure.
diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c

index 1d3df1d9495c3379180ad6b9cf683cc9b5f24b25..3c3e0b336a9d8e1e6e1e04fe32bd4e1f683761f6 100644 (file)
--- a/arch/frv/kernel/init_task.c
+++ b/arch/frv/kernel/init_task.c
@@ -19,9 +19,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   * way process stacks are handled. This is done by having a special
   * "init_task" linker map entry..
   */
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  /*
   * Initial task structure.
diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c

index be722fc1acff523986182dcef23da7b15a3c2534..0d4d3e3a4cfcb72c2ad2538c410635f9a799c524 100644 (file)
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -150,7 +150,7 @@ static int user_atoi(char __user *ubuf, size_t len)
  /*
   * Send us to sleep.
   */
-static int sysctl_pm_do_suspend(ctl_table *ctl, int write, struct file *filp,
+static int sysctl_pm_do_suspend(ctl_table *ctl, int write,
                                 void __user *buffer, size_t *lenp, loff_t *fpos)
  {
         int retval, mode;
@@ -198,13 +198,13 @@ static int try_set_cmode(int new_cmode)
  }
  
  
-static int cmode_procctl(ctl_table *ctl, int write, struct file *filp,
+static int cmode_procctl(ctl_table *ctl, int write,
                          void __user *buffer, size_t *lenp, loff_t *fpos)
  {
         int new_cmode;
  
         if (!write)
-               return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+               return proc_dointvec(ctl, write, buffer, lenp, fpos);
  
         new_cmode = user_atoi(buffer, *lenp);
  
@@ -301,13 +301,13 @@ static int try_set_cm(int new_cm)
         return 0;
  }
  
-static int p0_procctl(ctl_table *ctl, int write, struct file *filp,
+static int p0_procctl(ctl_table *ctl, int write,
                       void __user *buffer, size_t *lenp, loff_t *fpos)
  {
         int new_p0;
  
         if (!write)
-               return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+               return proc_dointvec(ctl, write, buffer, lenp, fpos);
  
         new_p0 = user_atoi(buffer, *lenp);
  
@@ -345,13 +345,13 @@ static int p0_sysctl(ctl_table *table,
         return 1;
  }
  
-static int cm_procctl(ctl_table *ctl, int write, struct file *filp,
+static int cm_procctl(ctl_table *ctl, int write,
                       void __user *buffer, size_t *lenp, loff_t *fpos)
  {
         int new_cm;
  
         if (!write)
-               return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+               return proc_dointvec(ctl, write, buffer, lenp, fpos);
  
         new_cm = user_atoi(buffer, *lenp);
  
diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c

index baadc97f86278ff2060f64932d60977c80d04abc..2b6b5289cdccefbd2d5029d5da4e62ebdc989803 100644 (file)
--- a/arch/frv/kernel/sys_frv.c
+++ b/arch/frv/kernel/sys_frv.c
@@ -21,7 +21,6 @@
  #include <linux/stat.h>
  #include <linux/mman.h>
  #include <linux/file.h>
-#include <linux/utsname.h>
  #include <linux/syscalls.h>
  #include <linux/ipc.h>
  
diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c

index 089c65ed6eb327c5e8ffa39e5a69a5c6e5d86921..54c1062ee80ed431f911d598f40894b470c9b47f 100644 (file)
--- a/arch/h8300/kernel/init_task.c
+++ b/arch/h8300/kernel/init_task.c
@@ -31,7 +31,6 @@ EXPORT_SYMBOL(init_task);
   * way process stacks are handled. This is done by having a special
   * "init_task" linker map entry..
   */
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c

index 2745656dcc52c8ac6ad7f3c804e81c4fb3dd91ce..8cb5d73a0e357cbc530b6d2a408c7a5b8065009b 100644 (file)
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -17,7 +17,6 @@
  #include <linux/syscalls.h>
  #include <linux/mman.h>
  #include <linux/file.h>
-#include <linux/utsname.h>
  #include <linux/fs.h>
  #include <linux/ipc.h>
  
diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h

index d217d1d4e05197351c6bc51c53bb946b3eae85ab..0b3b3997decd1adf397901a4ec4b4ad9d3328cfe 100644 (file)
--- a/arch/ia64/include/asm/smp.h
+++ b/arch/ia64/include/asm/smp.h
@@ -127,7 +127,6 @@ extern int is_multithreading_enabled(void);
  
  extern void arch_send_call_function_single_ipi(int cpu);
  extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
  
  #else /* CONFIG_SMP */
  
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h

index d0141fbf51d0a893626f110814f1f646610750fc..3ddb4e709dbadb1c36523e696a6ce32927c1c459 100644 (file)
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -33,7 +33,6 @@
  /*
   * Returns a bitmask of CPUs on Node 'node'.
   */
-#define node_to_cpumask(node) (node_to_cpu_mask[node])
  #define cpumask_of_node(node) (&node_to_cpu_mask[node])
  
  /*
@@ -104,8 +103,6 @@ void build_cpu_to_node_map(void);
  #ifdef CONFIG_SMP
  #define topology_physical_package_id(cpu)      (cpu_data(cpu)->socket_id)
  #define topology_core_id(cpu)                  (cpu_data(cpu)->core_id)
-#define topology_core_siblings(cpu)            (cpu_core_map[cpu])
-#define topology_thread_siblings(cpu)          (per_cpu(cpu_sibling_map, cpu))
  #define topology_core_cpumask(cpu)             (&cpu_core_map[cpu])
  #define topology_thread_cpumask(cpu)           (&per_cpu(cpu_sibling_map, cpu))
  #define smt_capable()                          (smp_num_siblings > 1)
diff --git a/arch/ia64/install.sh b/arch/ia64/install.sh

index 929e780026d13bbd35786c54fc2433857d101954..0e932f5dcd1a6021b6de9b6826dbfddb771b0088 100644 (file)
--- a/arch/ia64/install.sh
+++ b/arch/ia64/install.sh
@@ -21,8 +21,8 @@
  
  # User may have a custom install script
  
-if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi
-if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
  
  # Default install - same as make zlilo
  
diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate

index 1d87f84069b31420f6d138aa618a49756004e937..ab9b03a9adcc4680fd369730e319eeac8fde4728 100644 (file)
--- a/arch/ia64/kernel/Makefile.gate
+++ b/arch/ia64/kernel/Makefile.gate
@@ -10,7 +10,7 @@ quiet_cmd_gate = GATE $@
        cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
  
  GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
-                    $(call ld-option, -Wl$(comma)--hash-style=sysv)
+                    $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
  $(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
         $(call if_changed,gate)
  
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c

index c475fc281be755accd969935eb8bd278069a1ffe..e253ab8fcbc8923a789dc12e0afae0dcbf7891b7 100644 (file)
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -33,7 +33,8 @@ union {
                 struct thread_info thread_info;
         } s;
         unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
-} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{
+} init_task_mem asm ("init_task") __init_task_data =
+       {{
         .task =         INIT_TASK(init_task_mem.s.task),
         .thread_info =  INIT_THREAD_INFO(init_task_mem.s.task)
  }};
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c

index 93ebfea43c6cecdb66a93372adc73f2825aaae77..dabeefe211347f4d0e3c55d046c411f4060fb488 100644 (file)
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -302,7 +302,7 @@ smp_flush_tlb_mm (struct mm_struct *mm)
                 return;
         }
  
-       smp_call_function_mask(mm->cpu_vm_mask,
+       smp_call_function_many(mm_cpumask(mm),
                 (void (*)(void *))local_finish_flush_tlb_mm, mm, 1);
         local_irq_disable();
         local_finish_flush_tlb_mm(mm);
diff --git a/arch/m32r/boot/compressed/install.sh b/arch/m32r/boot/compressed/install.sh

index 6d72e9e72697c9c3132750527f8abeacc2c97786..16e5a0a134375ec3c7f6f785801bed0d1a45c798 100644 (file)
--- a/arch/m32r/boot/compressed/install.sh
+++ b/arch/m32r/boot/compressed/install.sh
@@ -24,8 +24,8 @@
  
  # User may have a custom install script
  
-if [ -x /sbin/installkernel ]; then
-  exec /sbin/installkernel "$@"
+if [ -x /sbin/${INSTALLKERNEL} ]; then
+  exec /sbin/${INSTALLKERNEL} "$@"
  fi
  
  if [ "$2" = "zImage" ]; then
diff --git a/arch/m32r/include/asm/mmu_context.h b/arch/m32r/include/asm/mmu_context.h

index 91909e5dd9d0a300b5a32ddb4e930c7bbae21066..a70a3df33635f96844fe34c75721336d8cc2fbc0 100644 (file)
--- a/arch/m32r/include/asm/mmu_context.h
+++ b/arch/m32r/include/asm/mmu_context.h
@@ -127,7 +127,7 @@ static inline void switch_mm(struct mm_struct *prev,
  
         if (prev != next) {
  #ifdef CONFIG_SMP
-               cpu_set(cpu, next->cpu_vm_mask);
+               cpumask_set_cpu(cpu, mm_cpumask(next));
  #endif /* CONFIG_SMP */
                 /* Set MPTB = next->pgd */
                 *(volatile unsigned long *)MPTB = (unsigned long)next->pgd;
@@ -135,7 +135,7 @@ static inline void switch_mm(struct mm_struct *prev,
         }
  #ifdef CONFIG_SMP
         else
-               if (!cpu_test_and_set(cpu, next->cpu_vm_mask))
+               if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)))
                         activate_context(next);
  #endif /* CONFIG_SMP */
  }
diff --git a/arch/m32r/include/asm/smp.h b/arch/m32r/include/asm/smp.h

index b96a6d2ffbc3f16161cb7fbc8e85de866dfeaaf3..e67ded1aab9100df5d478573945166b70a7b4105 100644 (file)
--- a/arch/m32r/include/asm/smp.h
+++ b/arch/m32r/include/asm/smp.h
@@ -88,7 +88,7 @@ extern void smp_send_timer(void);
  extern unsigned long send_IPI_mask_phys(cpumask_t, int, int);
  
  extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
  
  #endif /* not __ASSEMBLY__ */
  
diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c

index fce57e5d3f913950b96c5185209bc3e9b066f986..6c42d5f8df50f1dc20c622065f8cbe22be5a64e5 100644 (file)
--- a/arch/m32r/kernel/init_task.c
+++ b/arch/m32r/kernel/init_task.c
@@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   * way process stacks are handled. This is done by having a special
   * "init_task" linker map entry..
   */
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  /*
   * Initial task structure.
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c

index 929e5c9d3ad9c6467a5b28d24cd039a9597f1bc0..1b7598e6f6e815fd2129146a9925cedac2495573 100644 (file)
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -85,7 +85,7 @@ void smp_ipi_timer_interrupt(struct pt_regs *);
  void smp_local_timer_interrupt(void);
  
  static void send_IPI_allbutself(int, int);
-static void send_IPI_mask(cpumask_t, int, int);
+static void send_IPI_mask(const struct cpumask *, int, int);
  unsigned long send_IPI_mask_phys(cpumask_t, int, int);
  
  /*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
@@ -113,7 +113,7 @@ unsigned long send_IPI_mask_phys(cpumask_t, int, int);
  void smp_send_reschedule(int cpu_id)
  {
         WARN_ON(cpu_is_offline(cpu_id));
-       send_IPI_mask(cpumask_of_cpu(cpu_id), RESCHEDULE_IPI, 1);
+       send_IPI_mask(cpumask_of(cpu_id), RESCHEDULE_IPI, 1);
  }
  
  /*==========================================================================*
@@ -168,7 +168,7 @@ void smp_flush_cache_all(void)
         spin_lock(&flushcache_lock);
         mask=cpus_addr(cpumask);
         atomic_set_mask(*mask, (atomic_t *)&flushcache_cpumask);
-       send_IPI_mask(cpumask, INVALIDATE_CACHE_IPI, 0);
+       send_IPI_mask(&cpumask, INVALIDATE_CACHE_IPI, 0);
         _flush_cache_copyback_all();
         while (flushcache_cpumask)
                 mb();
@@ -264,7 +264,7 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
         preempt_disable();
         cpu_id = smp_processor_id();
         mmc = &mm->context[cpu_id];
-       cpu_mask = mm->cpu_vm_mask;
+       cpu_mask = *mm_cpumask(mm);
         cpu_clear(cpu_id, cpu_mask);
  
         if (*mmc != NO_CONTEXT) {
@@ -273,7 +273,7 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
                 if (mm == current->mm)
                         activate_context(mm);
                 else
-                       cpu_clear(cpu_id, mm->cpu_vm_mask);
+                       cpumask_clear_cpu(cpu_id, mm_cpumask(mm));
                 local_irq_restore(flags);
         }
         if (!cpus_empty(cpu_mask))
@@ -334,7 +334,7 @@ void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
         preempt_disable();
         cpu_id = smp_processor_id();
         mmc = &mm->context[cpu_id];
-       cpu_mask = mm->cpu_vm_mask;
+       cpu_mask = *mm_cpumask(mm);
         cpu_clear(cpu_id, cpu_mask);
  
  #ifdef DEBUG_SMP
@@ -424,7 +424,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
          * We have to send the IPI only to
          * CPUs affected.
          */
-       send_IPI_mask(cpumask, INVALIDATE_TLB_IPI, 0);
+       send_IPI_mask(&cpumask, INVALIDATE_TLB_IPI, 0);
  
         while (!cpus_empty(flush_cpumask)) {
                 /* nothing. lockup detection does not belong here */
@@ -469,7 +469,7 @@ void smp_invalidate_interrupt(void)
                 if (flush_mm == current->active_mm)
                         activate_context(flush_mm);
                 else
-                       cpu_clear(cpu_id, flush_mm->cpu_vm_mask);
+                       cpumask_clear_cpu(cpu_id, mm_cpumask(flush_mm));
         } else {
                 unsigned long va = flush_va;
  
@@ -546,14 +546,14 @@ static void stop_this_cpu(void *dummy)
         for ( ; ; );
  }
  
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
  {
         send_IPI_mask(mask, CALL_FUNCTION_IPI, 0);
  }
  
  void arch_send_call_function_single_ipi(int cpu)
  {
-       send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNC_SINGLE_IPI, 0);
+       send_IPI_mask(cpumask_of(cpu), CALL_FUNC_SINGLE_IPI, 0);
  }
  
  /*==========================================================================*
@@ -729,7 +729,7 @@ static void send_IPI_allbutself(int ipi_num, int try)
         cpumask = cpu_online_map;
         cpu_clear(smp_processor_id(), cpumask);
  
-       send_IPI_mask(cpumask, ipi_num, try);
+       send_IPI_mask(&cpumask, ipi_num, try);
  }
  
  /*==========================================================================*
@@ -752,7 +752,7 @@ static void send_IPI_allbutself(int ipi_num, int try)
   * ---------- --- --------------------------------------------------------
   *
   *==========================================================================*/
-static void send_IPI_mask(cpumask_t cpumask, int ipi_num, int try)
+static void send_IPI_mask(const struct cpumask *cpumask, int ipi_num, int try)
  {
         cpumask_t physid_mask, tmp;
         int cpu_id, phys_id;
@@ -761,11 +761,11 @@ static void send_IPI_mask(cpumask_t cpumask, int ipi_num, int try)
         if (num_cpus <= 1)      /* NO MP */
                 return;
  
-       cpus_and(tmp, cpumask, cpu_online_map);
-       BUG_ON(!cpus_equal(cpumask, tmp));
+       cpumask_and(&tmp, cpumask, cpu_online_mask);
+       BUG_ON(!cpumask_equal(cpumask, &tmp));
  
         physid_mask = CPU_MASK_NONE;
-       for_each_cpu_mask(cpu_id, cpumask){
+       for_each_cpu(cpu_id, cpumask) {
                 if ((phys_id = cpu_to_physid(cpu_id)) != -1)
                         cpu_set(phys_id, physid_mask);
         }
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c

index 655ea1c47a0f2efccb8d4f84c58d9c4284eb7138..e034844cfc0d948095104ca874d8a9ca02135184 100644 (file)
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -178,7 +178,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
         for (phys_id = 0 ; phys_id < nr_cpu ; phys_id++)
                 physid_set(phys_id, phys_cpu_present_map);
  #ifndef CONFIG_HOTPLUG_CPU
-       cpu_present_map = cpu_possible_map;
+       init_cpu_present(&cpu_possible_map);
  #endif
  
         show_mp_info(nr_cpu);
diff --git a/arch/m68k/install.sh b/arch/m68k/install.sh

index 9c6bae6112e3ff16a4863a3d09e99d2fa8a500f7..57d640d4382c3883e79961e92c04d8f79aaeadaf 100644 (file)
--- a/arch/m68k/install.sh
+++ b/arch/m68k/install.sh
@@ -33,8 +33,8 @@ verify "$3"
  
  # User may have a custom install script
  
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
  
  # Default install - same as make zlilo
  
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c

index 72bad65dba3a1d3b4091f074c22207aa8f9a277b..41230c595a8ee93f32f101544bf62556be6ffcdd 100644 (file)
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -42,9 +42,9 @@
   */
  static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
  static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-union thread_union init_thread_union
-__attribute__((section(".data.init_task"), aligned(THREAD_SIZE)))
-       = { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data
+       __attribute__((aligned(THREAD_SIZE))) =
+               { INIT_THREAD_INFO(init_task) };
  
  /* initial task structure */
  struct task_struct init_task = INIT_TASK(init_task);
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c

index 7f54efaf60bb12a0b980a0e05c3ae908f322e6d0..7deb402bfc751c9ba7a77f13afdd2c904b02a7ae 100644 (file)
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -20,7 +20,6 @@
  #include <linux/syscalls.h>
  #include <linux/mman.h>
  #include <linux/file.h>
-#include <linux/utsname.h>
  #include <linux/ipc.h>
  
  #include <asm/setup.h>
diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c

index 45e97a207fedaf57a2cab44f97d7d0370cf95e2b..cbf9dc3cc51dc6a8a8d297fa99d7b9d349100a03 100644 (file)
--- a/arch/m68knommu/kernel/init_task.c
+++ b/arch/m68knommu/kernel/init_task.c
@@ -31,7 +31,6 @@ EXPORT_SYMBOL(init_task);
   * way process stacks are handled. This is done by having a special
   * "init_task" linker map entry..
   */
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c

index 70028163862981e5ce0f7082a737b97f73f88163..efdd090778a371d57ff749561a799e263755fc67 100644 (file)
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -17,7 +17,6 @@
  #include <linux/syscalls.h>
  #include <linux/mman.h>
  #include <linux/file.h>
-#include <linux/utsname.h>
  #include <linux/ipc.h>
  #include <linux/fs.h>
  
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig

index 2db722d80d4d19133e8508c046c029a5c2c06cc1..bbd8327f18901d80d5eefe3e27015f65993a099f 100644 (file)
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -6,6 +6,7 @@ mainmenu "Linux/Microblaze Kernel Configuration"
  config MICROBLAZE
         def_bool y
         select HAVE_LMB
+       select USB_ARCH_HAS_EHCI
         select ARCH_WANT_OPTIONAL_GPIOLIB
  
  config SWAP
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile

index 8439598d4655bb8192c79f696b60ac2d1ea05a2d..34187354304a6217d16ff3dab5315e605409ab05 100644 (file)
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -37,12 +37,12 @@ CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare
  CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))
  
  # r31 holds current when in kernel mode
-KBUILD_KERNEL += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2)
+KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2)
  
  LDFLAGS                :=
  LDFLAGS_vmlinux        :=
  
-LIBGCC := $(shell $(CC) $(KBUILD_KERNEL) -print-libgcc-file-name)
+LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
  
  head-y := arch/microblaze/kernel/head.o
  libs-y += arch/microblaze/lib/
@@ -53,22 +53,41 @@ core-y += arch/microblaze/platform/
  
  boot := arch/microblaze/boot
  
+# Are we making a simpleImage.<boardname> target? If so, crack out the boardname
+DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS)))
+
+ifneq ($(DTB),)
+       core-y  += $(boot)/
+endif
+
  # defines filename extension depending memory management type
  ifeq ($(CONFIG_MMU),)
  MMU := -nommu
  endif
  
-export MMU
+export MMU DTB
  
  all: linux.bin
  
+BOOT_TARGETS = linux.bin linux.bin.gz simpleImage.%
+
  archclean:
         $(Q)$(MAKE) $(clean)=$(boot)
  
-linux.bin linux.bin.gz: vmlinux
+$(BOOT_TARGETS): vmlinux
         $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
  
  define archhelp
-  echo  '* linux.bin    - Create raw binary'
-  echo  '  linux.bin.gz - Create compressed raw binary'
+  echo '* linux.bin    - Create raw binary'
+  echo '  linux.bin.gz - Create compressed raw binary'
+  echo '  simpleImage.<dt> - ELF image with $(arch)/boot/dts/<dt>.dts linked in'
+  echo '                   - stripped elf with fdt blob
+  echo '  simpleImage.<dt>.unstrip - full ELF image with fdt blob'
+  echo '  *_defconfig      - Select default config from arch/microblaze/configs'
+  echo ''
+  echo '  Targets with <dt> embed a device tree blob inside the image'
+  echo '  These targets support board with firmware that does not'
+  echo '  support passing a device tree directly. Replace <dt> with the'
+  echo '  name of a dts file from the arch/microblaze/boot/dts/ directory'
+  echo '  (minus the .dts extension).'
  endef
diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile

index c2bb043a029d2df7891d17ae6e92e46e0360657b..21f13322a4cad74da687055e50eb840804dda78b 100644 (file)
--- a/arch/microblaze/boot/Makefile
+++ b/arch/microblaze/boot/Makefile
@@ -2,10 +2,24 @@
  # arch/microblaze/boot/Makefile
  #
  
-targets := linux.bin linux.bin.gz
+obj-y += linked_dtb.o
+
+targets := linux.bin linux.bin.gz simpleImage.%
  
  OBJCOPYFLAGS_linux.bin  := -O binary
  
+# Where the DTS files live
+dtstree         := $(srctree)/$(src)/dts
+
+# Ensure system.dtb exists
+$(obj)/linked_dtb.o: $(obj)/system.dtb
+
+# Generate system.dtb from $(DTB).dtb
+ifneq ($(DTB),system)
+$(obj)/system.dtb: $(obj)/$(DTB).dtb
+       $(call if_changed,cp)
+endif
+
  $(obj)/linux.bin: vmlinux FORCE
         [ -n $(CONFIG_INITRAMFS_SOURCE) ] && [ ! -e $(CONFIG_INITRAMFS_SOURCE) ] && \
         touch $(CONFIG_INITRAMFS_SOURCE) || echo "No CPIO image"
@@ -16,4 +30,27 @@ $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE
         $(call if_changed,gzip)
         @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
  
-clean-kernel += linux.bin linux.bin.gz
+quiet_cmd_cp = CP      $< $@$2
+       cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
+
+quiet_cmd_strip = STRIP   $@
+      cmd_strip = $(STRIP) -K _start -K _end -K __log_buf -K _fdt_start vmlinux -o $@
+
+$(obj)/simpleImage.%: vmlinux FORCE
+       $(call if_changed,cp,.unstrip)
+       $(call if_changed,strip)
+       @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+
+# Rule to build device tree blobs
+DTC = $(objtree)/scripts/dtc/dtc
+
+# Rule to build device tree blobs
+quiet_cmd_dtc = DTC     $@
+       cmd_dtc = $(DTC) -O dtb -o $(obj)/$*.dtb -b 0 -p 1024 $(dtstree)/$*.dts
+
+$(obj)/%.dtb: $(dtstree)/%.dts FORCE
+       $(call if_changed,dtc)
+
+clean-kernel += linux.bin linux.bin.gz simpleImage.*
+
+clean-files += *.dtb
diff --git a/arch/microblaze/boot/dts/system.dts b/arch/microblaze/boot/dts/system.dts

new file mode 120000 (symlink)

index 0000000..7cb6578
--- /dev/null
+++ b/arch/microblaze/boot/dts/system.dts
@@ -0,0 +1 @@
+../../platform/generic/system.dts
+\ No newline at end of file
diff --git a/arch/microblaze/boot/linked_dtb.S b/arch/microblaze/boot/linked_dtb.S

new file mode 100644 (file)

index 0000000..cb2b537
--- /dev/null
+++ b/arch/microblaze/boot/linked_dtb.S
@@ -0,0 +1,3 @@
+.section __fdt_blob,"a"
+.incbin "arch/microblaze/boot/system.dtb"
+
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig

index 09c32962b66f76f4fa24fd9eb76be75a56428b47..bb7c374713adb7ddd40f0384e34ad305b5ad14dc 100644 (file)
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.31-rc6
-# Tue Aug 18 11:00:02 2009
+# Linux kernel version: 2.6.31
+# Thu Sep 24 10:28:50 2009
  #
  CONFIG_MICROBLAZE=y
  # CONFIG_SWAP is not set
@@ -42,11 +42,12 @@ CONFIG_SYSVIPC_SYSCTL=y
  #
  # RCU Subsystem
  #
-CONFIG_CLASSIC_RCU=y
-# CONFIG_TREE_RCU is not set
-# CONFIG_PREEMPT_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
  # CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_PREEMPT_RCU_TRACE is not set
  CONFIG_IKCONFIG=y
  CONFIG_IKCONFIG_PROC=y
  CONFIG_LOG_BUF_SHIFT=17
@@ -260,6 +261,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
  # CONFIG_NETFILTER is not set
  # CONFIG_IP_DCCP is not set
  # CONFIG_IP_SCTP is not set
+# CONFIG_RDS is not set
  # CONFIG_TIPC is not set
  # CONFIG_ATM is not set
  # CONFIG_BRIDGE is not set
@@ -357,12 +359,10 @@ CONFIG_NET_ETHERNET=y
  # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
  # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
  # CONFIG_KS8842 is not set
+CONFIG_XILINX_EMACLITE=y
  CONFIG_NETDEV_1000=y
  CONFIG_NETDEV_10000=y
-
-#
-# Wireless LAN
-#
+CONFIG_WLAN=y
  # CONFIG_WLAN_PRE80211 is not set
  # CONFIG_WLAN_80211 is not set
  
@@ -460,6 +460,7 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
  # CONFIG_DISPLAY_SUPPORT is not set
  # CONFIG_SOUND is not set
  # CONFIG_USB_SUPPORT is not set
+CONFIG_USB_ARCH_HAS_EHCI=y
  # CONFIG_MMC is not set
  # CONFIG_MEMSTICK is not set
  # CONFIG_NEW_LEDS is not set
@@ -488,6 +489,7 @@ CONFIG_EXT2_FS=y
  # CONFIG_GFS2_FS is not set
  # CONFIG_OCFS2_FS is not set
  # CONFIG_BTRFS_FS is not set
+# CONFIG_NILFS2_FS is not set
  CONFIG_FILE_LOCKING=y
  CONFIG_FSNOTIFY=y
  # CONFIG_DNOTIFY is not set
@@ -546,7 +548,6 @@ CONFIG_MISC_FILESYSTEMS=y
  # CONFIG_ROMFS_FS is not set
  # CONFIG_SYSV_FS is not set
  # CONFIG_UFS_FS is not set
-# CONFIG_NILFS2_FS is not set
  CONFIG_NETWORK_FILESYSTEMS=y
  CONFIG_NFS_FS=y
  CONFIG_NFS_V3=y
@@ -671,18 +672,20 @@ CONFIG_DEBUG_INFO=y
  # CONFIG_DEBUG_LIST is not set
  # CONFIG_DEBUG_SG is not set
  # CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CREDENTIALS is not set
  # CONFIG_BOOT_PRINTK_DELAY is not set
  # CONFIG_RCU_TORTURE_TEST is not set
  # CONFIG_RCU_CPU_STALL_DETECTOR is not set
  # CONFIG_BACKTRACE_SELF_TEST is not set
  # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
  # CONFIG_FAULT_INJECTION is not set
  # CONFIG_SYSCTL_SYSCALL_CHECK is not set
  # CONFIG_PAGE_POISONING is not set
  # CONFIG_SAMPLES is not set
  # CONFIG_KMEMCHECK is not set
  CONFIG_EARLY_PRINTK=y
-CONFIG_HEART_BEAT=y
+# CONFIG_HEART_BEAT is not set
  CONFIG_DEBUG_BOOTMEM=y
  
  #
@@ -697,7 +700,6 @@ CONFIG_CRYPTO=y
  #
  # Crypto core or helper
  #
-# CONFIG_CRYPTO_FIPS is not set
  # CONFIG_CRYPTO_MANAGER is not set
  # CONFIG_CRYPTO_MANAGER2 is not set
  # CONFIG_CRYPTO_GF128MUL is not set
@@ -729,11 +731,13 @@ CONFIG_CRYPTO=y
  #
  # CONFIG_CRYPTO_HMAC is not set
  # CONFIG_CRYPTO_XCBC is not set
+# CONFIG_CRYPTO_VMAC is not set
  
  #
  # Digest
  #
  # CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_GHASH is not set
  # CONFIG_CRYPTO_MD4 is not set
  # CONFIG_CRYPTO_MD5 is not set
  # CONFIG_CRYPTO_MICHAEL_MIC is not set
diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig

index 8b638615a972ceff7e7b2883f8ada7407a8cf89f..adb839bab7044ea955b1e1150b6b56c9a3cb9cc4 100644 (file)
--- a/arch/microblaze/configs/nommu_defconfig
+++ b/arch/microblaze/configs/nommu_defconfig
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.31-rc6
-# Tue Aug 18 10:35:30 2009
+# Linux kernel version: 2.6.31
+# Thu Sep 24 10:29:43 2009
  #
  CONFIG_MICROBLAZE=y
  # CONFIG_SWAP is not set
@@ -44,11 +44,12 @@ CONFIG_BSD_PROCESS_ACCT_V3=y
  #
  # RCU Subsystem
  #
-CONFIG_CLASSIC_RCU=y
-# CONFIG_TREE_RCU is not set
-# CONFIG_PREEMPT_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
  # CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_PREEMPT_RCU_TRACE is not set
  CONFIG_IKCONFIG=y
  CONFIG_IKCONFIG_PROC=y
  CONFIG_LOG_BUF_SHIFT=17
@@ -243,6 +244,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
  # CONFIG_NETFILTER is not set
  # CONFIG_IP_DCCP is not set
  # CONFIG_IP_SCTP is not set
+# CONFIG_RDS is not set
  # CONFIG_TIPC is not set
  # CONFIG_ATM is not set
  # CONFIG_BRIDGE is not set
@@ -272,6 +274,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
  # CONFIG_AF_RXRPC is not set
  CONFIG_WIRELESS=y
  # CONFIG_CFG80211 is not set
+CONFIG_CFG80211_DEFAULT_PS_VALUE=0
  CONFIG_WIRELESS_OLD_REGULATORY=y
  # CONFIG_WIRELESS_EXT is not set
  # CONFIG_LIB80211 is not set
@@ -279,7 +282,6 @@ CONFIG_WIRELESS_OLD_REGULATORY=y
  #
  # CFG80211 needs to be enabled for MAC80211
  #
-CONFIG_MAC80211_DEFAULT_PS_VALUE=0
  # CONFIG_WIMAX is not set
  # CONFIG_RFKILL is not set
  # CONFIG_NET_9P is not set
@@ -304,6 +306,7 @@ CONFIG_MTD_PARTITIONS=y
  # CONFIG_MTD_TESTS is not set
  # CONFIG_MTD_REDBOOT_PARTS is not set
  CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_OF_PARTS is not set
  # CONFIG_MTD_AR7_PARTS is not set
  
  #
@@ -349,6 +352,7 @@ CONFIG_MTD_RAM=y
  #
  # CONFIG_MTD_COMPLEX_MAPPINGS is not set
  # CONFIG_MTD_PHYSMAP is not set
+# CONFIG_MTD_PHYSMAP_OF is not set
  CONFIG_MTD_UCLINUX=y
  # CONFIG_MTD_PLATRAM is not set
  
@@ -429,12 +433,10 @@ CONFIG_NET_ETHERNET=y
  # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
  # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
  # CONFIG_KS8842 is not set
+# CONFIG_XILINX_EMACLITE is not set
  CONFIG_NETDEV_1000=y
  CONFIG_NETDEV_10000=y
-
-#
-# Wireless LAN
-#
+CONFIG_WLAN=y
  # CONFIG_WLAN_PRE80211 is not set
  # CONFIG_WLAN_80211 is not set
  
@@ -535,7 +537,7 @@ CONFIG_VIDEO_OUTPUT_CONTROL=y
  CONFIG_USB_SUPPORT=y
  CONFIG_USB_ARCH_HAS_HCD=y
  # CONFIG_USB_ARCH_HAS_OHCI is not set
-# CONFIG_USB_ARCH_HAS_EHCI is not set
+CONFIG_USB_ARCH_HAS_EHCI=y
  # CONFIG_USB is not set
  # CONFIG_USB_OTG_WHITELIST is not set
  # CONFIG_USB_OTG_BLACKLIST_HUB is not set
@@ -579,6 +581,7 @@ CONFIG_FS_POSIX_ACL=y
  # CONFIG_GFS2_FS is not set
  # CONFIG_OCFS2_FS is not set
  # CONFIG_BTRFS_FS is not set
+# CONFIG_NILFS2_FS is not set
  CONFIG_FILE_LOCKING=y
  CONFIG_FSNOTIFY=y
  # CONFIG_DNOTIFY is not set
@@ -639,7 +642,6 @@ CONFIG_ROMFS_BACKED_BY_BLOCK=y
  CONFIG_ROMFS_ON_BLOCK=y
  # CONFIG_SYSV_FS is not set
  # CONFIG_UFS_FS is not set
-# CONFIG_NILFS2_FS is not set
  CONFIG_NETWORK_FILESYSTEMS=y
  CONFIG_NFS_FS=y
  CONFIG_NFS_V3=y
@@ -710,18 +712,20 @@ CONFIG_DEBUG_INFO=y
  CONFIG_DEBUG_LIST=y
  CONFIG_DEBUG_SG=y
  # CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CREDENTIALS is not set
  # CONFIG_BOOT_PRINTK_DELAY is not set
  # CONFIG_RCU_TORTURE_TEST is not set
  # CONFIG_RCU_CPU_STALL_DETECTOR is not set
  # CONFIG_BACKTRACE_SELF_TEST is not set
  # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
  # CONFIG_FAULT_INJECTION is not set
  CONFIG_SYSCTL_SYSCALL_CHECK=y
  # CONFIG_PAGE_POISONING is not set
  # CONFIG_DYNAMIC_DEBUG is not set
  # CONFIG_SAMPLES is not set
  CONFIG_EARLY_PRINTK=y
-CONFIG_HEART_BEAT=y
+# CONFIG_HEART_BEAT is not set
  # CONFIG_DEBUG_BOOTMEM is not set
  
  #
@@ -736,7 +740,6 @@ CONFIG_CRYPTO=y
  #
  # Crypto core or helper
  #
-# CONFIG_CRYPTO_FIPS is not set
  # CONFIG_CRYPTO_MANAGER is not set
  # CONFIG_CRYPTO_MANAGER2 is not set
  # CONFIG_CRYPTO_GF128MUL is not set
@@ -768,11 +771,13 @@ CONFIG_CRYPTO=y
  #
  # CONFIG_CRYPTO_HMAC is not set
  # CONFIG_CRYPTO_XCBC is not set
+# CONFIG_CRYPTO_VMAC is not set
  
  #
  # Digest
  #
  # CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_GHASH is not set
  # CONFIG_CRYPTO_MD4 is not set
  # CONFIG_CRYPTO_MD5 is not set
  # CONFIG_CRYPTO_MICHAEL_MIC is not set
diff --git a/arch/microblaze/include/asm/asm-compat.h b/arch/microblaze/include/asm/asm-compat.h

new file mode 100644 (file)

index 0000000..e7bc9dc
--- /dev/null
+++ b/arch/microblaze/include/asm/asm-compat.h
@@ -0,0 +1,17 @@
+#ifndef _ASM_MICROBLAZE_ASM_COMPAT_H
+#define _ASM_MICROBLAZE_ASM_COMPAT_H
+
+#include <asm/types.h>
+
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)  __VA_ARGS__
+#  define ASM_CONST(x)         x
+#else
+/* This version of stringify will deal with commas... */
+#  define __stringify_in_c(...)        #__VA_ARGS__
+#  define stringify_in_c(...)  __stringify_in_c(__VA_ARGS__) " "
+#  define __ASM_CONST(x)       x##UL
+#  define ASM_CONST(x)         __ASM_CONST(x)
+#endif
+
+#endif /* _ASM_MICROBLAZE_ASM_COMPAT_H */
diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h

index 7c3ec13b44d860c8180a3bc15460e1dbeec7886c..fc9997b73c09de6183f20ceea4631d590c6d4697 100644 (file)
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -210,6 +210,9 @@ static inline void __iomem *__ioremap(phys_addr_t address, unsigned long size,
  #define in_be32(a) __raw_readl((const void __iomem __force *)(a))
  #define in_be16(a) __raw_readw(a)
  
+#define writel_be(v, a)        out_be32((__force unsigned *)a, v)
+#define readl_be(a)    in_be32((__force unsigned *)a)
+
  /*
   * Little endian
   */
diff --git a/arch/microblaze/include/asm/ipc.h b/arch/microblaze/include/asm/ipc.h

deleted file mode 100644 (file)

index a46e3d9..0000000
--- a/arch/microblaze/include/asm/ipc.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipc.h>
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h

index 72aceae88680a2b427253d2941a3f29452f34e26..880c988c2237e2a333bbb7cc51c939a385ea9d07 100644 (file)
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -17,6 +17,7 @@
  
  #include <linux/pfn.h>
  #include <asm/setup.h>
+#include <asm/asm-compat.h>
  #include <linux/const.h>
  
  #ifdef __KERNEL__
@@ -26,6 +27,8 @@
  #define PAGE_SIZE      (_AC(1, UL) << PAGE_SHIFT)
  #define PAGE_MASK      (~(PAGE_SIZE-1))
  
+#define LOAD_OFFSET    ASM_CONST((CONFIG_KERNEL_START-CONFIG_KERNEL_BASE_ADDR))
+
  #ifndef __ASSEMBLY__
  
  #define PAGE_UP(addr)  (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1)))
diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h

index 27f8dafd8c34b25f9f2b2463a62b025611d8de14..ed67c9ed15b8346f8752ff506b59ffe6e43afe17 100644 (file)
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -38,7 +38,7 @@ extern void early_console_reg_tlb_alloc(unsigned int addr);
  void time_init(void);
  void init_IRQ(void);
  void machine_early_init(const char *cmdline, unsigned int ram,
-                                               unsigned int fdt);
+                       unsigned int fdt, unsigned int msr);
  
  void machine_restart(char *cmd);
  void machine_shutdown(void);
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h

new file mode 100644 (file)

index 0000000..048dfcd
--- /dev/null
+++ b/arch/microblaze/include/asm/syscall.h
@@ -0,0 +1,99 @@
+#ifndef __ASM_MICROBLAZE_SYSCALL_H
+#define __ASM_MICROBLAZE_SYSCALL_H
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+
+/* The system call number is given by the user in R12 */
+static inline long syscall_get_nr(struct task_struct *task,
+                                 struct pt_regs *regs)
+{
+       return regs->r12;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+                                   struct pt_regs *regs)
+{
+       /* TODO.  */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+                                    struct pt_regs *regs)
+{
+       return IS_ERR_VALUE(regs->r3) ? regs->r3 : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+                                           struct pt_regs *regs)
+{
+       return regs->r3;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+                                           struct pt_regs *regs,
+                                           int error, long val)
+{
+       if (error)
+               regs->r3 = -error;
+       else
+               regs->r3 = val;
+}
+
+static inline microblaze_reg_t microblaze_get_syscall_arg(struct pt_regs *regs,
+                                                         unsigned int n)
+{
+       switch (n) {
+       case 5: return regs->r10;
+       case 4: return regs->r9;
+       case 3: return regs->r8;
+       case 2: return regs->r7;
+       case 1: return regs->r6;
+       case 0: return regs->r5;
+       default:
+               BUG();
+       }
+       return ~0;
+}
+
+static inline void microblaze_set_syscall_arg(struct pt_regs *regs,
+                                             unsigned int n,
+                                             unsigned long val)
+{
+       switch (n) {
+       case 5:
+               regs->r10 = val;
+       case 4:
+               regs->r9 = val;
+       case 3:
+               regs->r8 = val;
+       case 2:
+               regs->r7 = val;
+       case 1:
+               regs->r6 = val;
+       case 0:
+               regs->r5 = val;
+       default:
+               BUG();
+       }
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+                                        struct pt_regs *regs,
+                                        unsigned int i, unsigned int n,
+                                        unsigned long *args)
+{
+       while (n--)
+               *args++ = microblaze_get_syscall_arg(regs, i++);
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+                                        struct pt_regs *regs,
+                                        unsigned int i, unsigned int n,
+                                        const unsigned long *args)
+{
+       while (n--)
+               microblaze_set_syscall_arg(regs, i++, *args++);
+}
+
+#endif /* __ASM_MICROBLAZE_SYSCALL_H */
diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c

index c411c6757deb845a984ae734e108fa216ceb18c0..3539babc1c18ddb2ef59c3ae3e4c5ee2b571ca12 100644 (file)
--- a/arch/microblaze/kernel/cpu/cpuinfo.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo.c
@@ -28,6 +28,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = {
         {"7.10.d", 0x0b},
         {"7.20.a", 0x0c},
         {"7.20.b", 0x0d},
+       {"7.20.c", 0x0e},
         /* FIXME There is no keycode defined in MBV for these versions */
         {"2.10.a", 0x10},
         {"3.00.a", 0x20},
@@ -49,6 +50,8 @@ const struct family_string_key family_string_lookup[] = {
         {"spartan3a", 0xa},
         {"spartan3an", 0xb},
         {"spartan3adsp", 0xc},
+       {"spartan6", 0xd},
+       {"virtex6", 0xe},
         /* FIXME There is no key code defined for spartan2 */
         {"spartan2", 0xf0},
         {NULL, 0},
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S

index c7353e79f4a22f2ddb70d7b25d5e8c79783d8f0e..acc1f05d1e2c04665f2224d91bb9d61bb9929e48 100644 (file)
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -308,38 +308,69 @@ C_ENTRY(_user_exception):
         swi     r12, r1, PTO+PT_R0;
         tovirt(r1,r1)
  
-       la      r15, r0, ret_from_trap-8
  /* where the trap should return need -8 to adjust for rtsd r15, 8*/
  /* Jump to the appropriate function for the system call number in r12
   * (r12 is not preserved), or return an error if r12 is not valid. The LP
   * register should point to the location where
   * the called function should return.  [note that MAKE_SYS_CALL uses label 1] */
-       /* See if the system call number is valid.  */
+
+       # Step into virtual mode.
+       set_vms;
+       addik   r11, r0, 3f
+       rtid    r11, 0
+       nop
+3:
+       add     r11, r0, CURRENT_TASK    /* Get current task ptr into r11 */
+       lwi     r11, r11, TS_THREAD_INFO /* get thread info */
+       lwi     r11, r11, TI_FLAGS       /* get flags in thread info */
+       andi    r11, r11, _TIF_WORK_SYSCALL_MASK
+       beqi    r11, 4f
+
+       addik   r3, r0, -ENOSYS
+       swi     r3, r1, PTO + PT_R3
+       brlid   r15, do_syscall_trace_enter
+       addik   r5, r1, PTO + PT_R0
+
+       # do_syscall_trace_enter returns the new syscall nr.
+       addk    r12, r0, r3
+       lwi     r5, r1, PTO+PT_R5;
+       lwi     r6, r1, PTO+PT_R6;
+       lwi     r7, r1, PTO+PT_R7;
+       lwi     r8, r1, PTO+PT_R8;
+       lwi     r9, r1, PTO+PT_R9;
+       lwi     r10, r1, PTO+PT_R10;
+4:
+/* Jump to the appropriate function for the system call number in r12
+ * (r12 is not preserved), or return an error if r12 is not valid.
+ * The LP register should point to the location where the called function
+ * should return.  [note that MAKE_SYS_CALL uses label 1] */
+       /* See if the system call number is valid */
         addi    r11, r12, -__NR_syscalls;
-       bgei    r11,1f;
+       bgei    r11,5f;
         /* Figure out which function to use for this system call.  */
         /* Note Microblaze barrel shift is optional, so don't rely on it */
         add     r12, r12, r12;                  /* convert num -> ptr */
         add     r12, r12, r12;
  
         /* Trac syscalls and stored them to r0_ram */
-       lwi     r3, r12, 0x400 + TOPHYS(r0_ram)
+       lwi     r3, r12, 0x400 + r0_ram
         addi    r3, r3, 1
-       swi     r3, r12, 0x400 + TOPHYS(r0_ram)
+       swi     r3, r12, 0x400 + r0_ram
+
+       # Find and jump into the syscall handler.
+       lwi     r12, r12, sys_call_table
+       /* where the trap should return need -8 to adjust for rtsd r15, 8 */
+       la      r15, r0, ret_from_trap-8
+       bra     r12
  
-       lwi     r12, r12, TOPHYS(sys_call_table); /* Function ptr */
-       /* Make the system call.  to r12*/
-       set_vms;
-       rtid    r12, 0;
-       nop;
         /* The syscall number is invalid, return an error.  */
-1:     VM_ON;  /* RETURN() expects virtual mode*/
+5:
         addi    r3, r0, -ENOSYS;
         rtsd    r15,8;          /* looks like a normal subroutine return */
         or      r0, r0, r0
  
  
-/* Entry point used to return from a syscall/trap.  */
+/* Entry point used to return from a syscall/trap */
  /* We re-enable BIP bit before state restore */
  C_ENTRY(ret_from_trap):
         set_bip;                        /*  Ints masked for state restore*/
@@ -347,6 +378,23 @@ C_ENTRY(ret_from_trap):
  /* See if returning to kernel mode, if so, skip resched &c.  */
         bnei    r11, 2f;
  
+       /* We're returning to user mode, so check for various conditions that
+        * trigger rescheduling. */
+       # FIXME: Restructure all these flag checks.
+       add     r11, r0, CURRENT_TASK;  /* Get current task ptr into r11 */
+       lwi     r11, r11, TS_THREAD_INFO;       /* get thread info */
+       lwi     r11, r11, TI_FLAGS;             /* get flags in thread info */
+       andi    r11, r11, _TIF_WORK_SYSCALL_MASK
+       beqi    r11, 1f
+
+       swi     r3, r1, PTO + PT_R3
+       swi     r4, r1, PTO + PT_R4
+       brlid   r15, do_syscall_trace_leave
+       addik   r5, r1, PTO + PT_R0
+       lwi     r3, r1, PTO + PT_R3
+       lwi     r4, r1, PTO + PT_R4
+1:
+
         /* We're returning to user mode, so check for various conditions that
          * trigger rescheduling. */
         /* Get current task ptr into r11 */
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c

index 0cb64a31e89a78870319b3f78ac07ca18d16aa80..d9f70f83097fc5f3c0f010b6a96f1c33f383ba47 100644 (file)
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -72,7 +72,8 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
  #endif
  
  #if 0
-       printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x ESR=%08x\n",
+       printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x " \
+                                                       "ESR=%08x\n",
                         type, user_mode(regs) ? "user" : "kernel", fsr,
                         (unsigned int) regs->pc, (unsigned int) regs->esr);
  #endif
@@ -80,42 +81,50 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
         switch (type & 0x1F) {
         case MICROBLAZE_ILL_OPCODE_EXCEPTION:
                 if (user_mode(regs)) {
-                       printk(KERN_WARNING "Illegal opcode exception in user mode.\n");
+                       pr_debug(KERN_WARNING "Illegal opcode exception " \
+                                                       "in user mode.\n");
                         _exception(SIGILL, regs, ILL_ILLOPC, addr);
                         return;
                 }
-               printk(KERN_WARNING "Illegal opcode exception in kernel mode.\n");
+               printk(KERN_WARNING "Illegal opcode exception " \
+                                                       "in kernel mode.\n");
                 die("opcode exception", regs, SIGBUS);
                 break;
         case MICROBLAZE_IBUS_EXCEPTION:
                 if (user_mode(regs)) {
-                       printk(KERN_WARNING "Instruction bus error exception in user mode.\n");
+                       pr_debug(KERN_WARNING "Instruction bus error " \
+                                               "exception in user mode.\n");
                         _exception(SIGBUS, regs, BUS_ADRERR, addr);
                         return;
                 }
-               printk(KERN_WARNING "Instruction bus error exception in kernel mode.\n");
+               printk(KERN_WARNING "Instruction bus error exception " \
+                                                       "in kernel mode.\n");
                 die("bus exception", regs, SIGBUS);
                 break;
         case MICROBLAZE_DBUS_EXCEPTION:
                 if (user_mode(regs)) {
-                       printk(KERN_WARNING "Data bus error exception in user mode.\n");
+                       pr_debug(KERN_WARNING "Data bus error exception " \
+                                                       "in user mode.\n");
                         _exception(SIGBUS, regs, BUS_ADRERR, addr);
                         return;
                 }
-               printk(KERN_WARNING "Data bus error exception in kernel mode.\n");
+               printk(KERN_WARNING "Data bus error exception " \
+                                                       "in kernel mode.\n");
                 die("bus exception", regs, SIGBUS);
                 break;
         case MICROBLAZE_DIV_ZERO_EXCEPTION:
                 if (user_mode(regs)) {
-                       printk(KERN_WARNING "Divide by zero exception in user mode\n");
-                       _exception(SIGILL, regs, ILL_ILLOPC, addr);
+                       pr_debug(KERN_WARNING "Divide by zero exception " \
+                                                       "in user mode\n");
+                       _exception(SIGILL, regs, FPE_INTDIV, addr);
                         return;
                 }
-               printk(KERN_WARNING "Divide by zero exception in kernel mode.\n");
+               printk(KERN_WARNING "Divide by zero exception " \
+                                                       "in kernel mode.\n");
                 die("Divide by exception", regs, SIGBUS);
                 break;
         case MICROBLAZE_FPU_EXCEPTION:
-               printk(KERN_WARNING "FPU exception\n");
+               pr_debug(KERN_WARNING "FPU exception\n");
                 /* IEEE FP exception */
                 /* I removed fsr variable and use code var for storing fsr */
                 if (fsr & FSR_IO)
@@ -133,7 +142,7 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
  
  #ifdef CONFIG_MMU
         case MICROBLAZE_PRIVILEGED_EXCEPTION:
-               printk(KERN_WARNING "Privileged exception\n");
+               pr_debug(KERN_WARNING "Privileged exception\n");
                 /* "brk r0,r0" - used as debug breakpoint */
                 if (get_user(code, (unsigned long *)regs->pc) == 0
                         && code == 0x980c0000) {
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S

index e41c6ce2a7be3e50589e48e7f661852afabd7709..697ce3007f3046366155e7162bd8f222efdd1fd7 100644 (file)
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -54,6 +54,16 @@ ENTRY(_start)
         mfs     r1, rmsr
         andi    r1, r1, ~2
         mts     rmsr, r1
+/*
+ * Here is checking mechanism which check if Microblaze has msr instructions
+ * We load msr and compare it with previous r1 value - if is the same,
+ * msr instructions works if not - cpu don't have them.
+ */
+       /* r8=0 - I have msr instr, 1 - I don't have them */
+       rsubi   r0, r0, 1       /* set the carry bit */
+       msrclr  r0, 0x4         /* try to clear it */
+       /* read the carry bit, r8 will be '0' if msrclr exists */
+       addik   r8, r0, 0
  
  /* r7 may point to an FDT, or there may be one linked in.
     if it's in r7, we've got to save it away ASAP.
@@ -209,8 +219,8 @@ start_here:
          * Please see $(ARCH)/mach-$(SUBARCH)/setup.c for
          * the function.
          */
-       la      r8, r0, machine_early_init
-       brald   r15, r8
+       la      r9, r0, machine_early_init
+       brald   r15, r9
         nop
  
  #ifndef CONFIG_MMU
diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S

index 3288c9737671adc6362ba6a079a717d342fa487a..6b0288ebccd6674c21c9ed0bee8a095903507412 100644 (file)
--- a/arch/microblaze/kernel/hw_exception_handler.S
+++ b/arch/microblaze/kernel/hw_exception_handler.S
@@ -84,9 +84,10 @@
  #define NUM_TO_REG(num)                r ## num
  
  #ifdef CONFIG_MMU
-/* FIXME you can't change first load of MSR because there is
- * hardcoded jump bri 4 */
         #define RESTORE_STATE                   \
+               lwi     r5, r1, 0;              \
+               mts     rmsr, r5;               \
+               nop;                            \
                 lwi     r3, r1, PT_R3;          \
                 lwi     r4, r1, PT_R4;          \
                 lwi     r5, r1, PT_R5;          \
@@ -309,6 +310,9 @@ _hw_exception_handler:
         lwi     r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */
  #endif
  
+       mfs     r5, rmsr;
+       nop
+       swi     r5, r1, 0;
         mfs     r3, resr
         nop
         mfs     r4, rear;
@@ -380,6 +384,8 @@ handle_other_ex: /* Handle Other exceptions here */
         addk    r8, r17, r0; /* Load exception address */
         bralid  r15, full_exception; /* Branch to the handler */
         nop;
+       mts     r0, rfsr;       /* Clear sticky fsr */
+       nop
  
         /*
          * Trigger execution of the signal handler by enabling
diff --git a/arch/microblaze/kernel/init_task.c b/arch/microblaze/kernel/init_task.c

index 67da22579b62190215d743a2dd33a6c9fe5c0e83..b5d711f94ff85da0f8c28103d7be857635e29010 100644 (file)
--- a/arch/microblaze/kernel/init_task.c
+++ b/arch/microblaze/kernel/init_task.c
@@ -19,9 +19,8 @@
  static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
  static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
  
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"))) =
-{ INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  struct task_struct init_task = INIT_TASK(init_task);
  EXPORT_SYMBOL(init_task);
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c

index 00b12c6d5326767fc9260df77801a699050cdee7..4201c743cc9fb33e17a52ae057940345d79e2681 100644 (file)
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -235,6 +235,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
         regs->pc = pc;
         regs->r1 = usp;
         regs->pt_mode = 0;
+       regs->msr |= MSR_UMS;
  }
  
  #ifdef CONFIG_MMU
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c

index 53ff39af6a5c0eebfcf156dd02969d231896229d..4b3ac32754de1f747d069dc69d2e6e377954fdfe 100644 (file)
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -29,6 +29,10 @@
  #include <linux/sched.h>
  #include <linux/ptrace.h>
  #include <linux/signal.h>
+#include <linux/elf.h>
+#include <linux/audit.h>
+#include <linux/seccomp.h>
+#include <linux/tracehook.h>
  
  #include <linux/errno.h>
  #include <asm/processor.h>
@@ -174,6 +178,64 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
         return rval;
  }
  
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
+{
+       long ret = 0;
+
+       secure_computing(regs->r12);
+
+       if (test_thread_flag(TIF_SYSCALL_TRACE) &&
+           tracehook_report_syscall_entry(regs))
+               /*
+                * Tracing decided this syscall should not happen.
+                * We'll return a bogus call number to get an ENOSYS
+                * error, but leave the original number in regs->regs[0].
+                */
+               ret = -1L;
+
+       if (unlikely(current->audit_context))
+               audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12,
+                                   regs->r5, regs->r6,
+                                   regs->r7, regs->r8);
+
+       return ret ?: regs->r12;
+}
+
+asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
+{
+       int step;
+
+       if (unlikely(current->audit_context))
+               audit_syscall_exit(AUDITSC_RESULT(regs->r3), regs->r3);
+
+       step = test_thread_flag(TIF_SINGLESTEP);
+       if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+               tracehook_report_syscall_exit(regs, step);
+}
+
+#if 0
+static asmlinkage void syscall_trace(void)
+{
+       if (!test_thread_flag(TIF_SYSCALL_TRACE))
+               return;
+       if (!(current->ptrace & PT_PTRACED))
+               return;
+       /* The 0x80 provides a way for the tracing parent to distinguish
+        between a syscall stop and SIGTRAP delivery */
+       ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+                               ? 0x80 : 0));
+       /*
+        * this isn't the same as continuing with a signal, but it will do
+        * for normal use. strace only continues with a signal if the
+        * stopping signal is not SIGTRAP. -brl
+        */
+       if (current->exit_code) {
+               send_sig(current->exit_code, current, 1);
+               current->exit_code = 0;
+       }
+}
+#endif
+
  void ptrace_disable(struct task_struct *child)
  {
         /* nothing to do */
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c

index 2a97bf513b64c691e81b4bd9867f537aefb6f606..8c1e0f4dcf18b63c6323a3b8042c68c3774621e0 100644 (file)
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -94,7 +94,7 @@ inline unsigned get_romfs_len(unsigned *addr)
  #endif /* CONFIG_MTD_UCLINUX_EBSS */
  
  void __init machine_early_init(const char *cmdline, unsigned int ram,
-               unsigned int fdt)
+               unsigned int fdt, unsigned int msr)
  {
         unsigned long *src, *dst = (unsigned long *)0x0;
  
@@ -157,6 +157,16 @@ void __init machine_early_init(const char *cmdline, unsigned int ram,
         early_printk("New klimit: 0x%08x\n", (unsigned)klimit);
  #endif
  
+#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
+       if (msr)
+               early_printk("!!!Your kernel has setup MSR instruction but "
+                               "CPU don't have it %d\n", msr);
+#else
+       if (!msr)
+               early_printk("!!!Your kernel not setup MSR instruction but "
+                               "CPU have it %d\n", msr);
+#endif
+
         for (src = __ivt_start; src < __ivt_end; src++, dst++)
                 *dst = *src;
  
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c

index b96f1682bb24800d8c89316ad18007e27a45ca55..07cabed4b947fafbc4cab9d89201c2ba42aebfdf 100644 (file)
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -23,7 +23,6 @@
  #include <linux/mman.h>
  #include <linux/sys.h>
  #include <linux/ipc.h>
-#include <linux/utsname.h>
  #include <linux/file.h>
  #include <linux/module.h>
  #include <linux/err.h>
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S

index ec5fa91a48d83bf515912011bd0f48e6419113e6..e704188d7855ec6115ddef659505ffba6b6d5578 100644 (file)
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -12,13 +12,16 @@ OUTPUT_FORMAT("elf32-microblaze", "elf32-microblaze", "elf32-microblaze")
  OUTPUT_ARCH(microblaze)
  ENTRY(_start)
  
+#include <asm/page.h>
  #include <asm-generic/vmlinux.lds.h>
+#include <asm/thread_info.h>
  
  jiffies = jiffies_64 + 4;
  
  SECTIONS {
         . = CONFIG_KERNEL_START;
-       .text : {
+       _start = CONFIG_KERNEL_BASE_ADDR;
+       .text : AT(ADDR(.text) - LOAD_OFFSET) {
                 _text = . ;
                 _stext = . ;
                 *(.text .text.*)
@@ -33,24 +36,22 @@ SECTIONS {
         }
  
         . = ALIGN (4) ;
-       _fdt_start = . ; /* place for fdt blob */
-       . = . + 0x4000;
-       _fdt_end = . ;
+       __fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET) {
+               _fdt_start = . ;                /* place for fdt blob */
+               *(__fdt_blob) ;                 /* Any link-placed DTB */
+               . = _fdt_start + 0x4000;        /* Pad up to 16kbyte */
+               _fdt_end = . ;
+       }
  
         . = ALIGN(16);
         RODATA
-       . = ALIGN(16);
-       __ex_table : {
-               __start___ex_table = .;
-               *(__ex_table)
-               __stop___ex_table = .;
-       }
+       EXCEPTION_TABLE(16)
  
         /*
          * sdata2 section can go anywhere, but must be word aligned
          * and SDA2_BASE must point to the middle of it
          */
-       .sdata2 : {
+       .sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) {
                 _ssrw = .;
                 . = ALIGN(4096); /* page aligned when MMU used - origin 0x8 */
                 *(.sdata2)
@@ -61,12 +62,7 @@ SECTIONS {
         }
  
         _sdata = . ;
-       .data ALIGN (4096) : { /* page aligned when MMU used - origin 0x4 */
-               DATA_DATA
-               CONSTRUCTORS
-       }
-       . = ALIGN(32);
-       .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+       RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
         _edata = . ;
  
         /* Reserve some low RAM for r0 based memory references */
@@ -74,18 +70,14 @@ SECTIONS {
         r0_ram = . ;
         . = . +  4096;  /* a page should be enough */
  
-       /* The initial task */
-       . = ALIGN(8192);
-       .data.init_task : { *(.data.init_task) }
-
         /* Under the microblaze ABI, .sdata and .sbss must be contiguous */
         . = ALIGN(8);
-       .sdata : {
+       .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) {
                 _ssro = .;
                 *(.sdata)
         }
  
-       .sbss : {
+       .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) {
                 _ssbss = .;
                 *(.sbss)
                 _esbss = .;
@@ -96,47 +88,36 @@ SECTIONS {
  
         __init_begin = .;
  
-       . = ALIGN(4096);
-       .init.text : {
-               _sinittext = . ;
-               INIT_TEXT
-               _einittext = .;
-       }
+       INIT_TEXT_SECTION(PAGE_SIZE)
  
-       .init.data : {
+       .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
                 INIT_DATA
         }
  
         . = ALIGN(4);
-       .init.ivt : {
+       .init.ivt : AT(ADDR(.init.ivt) - LOAD_OFFSET) {
                 __ivt_start = .;
                 *(.init.ivt)
                 __ivt_end = .;
         }
  
-       .init.setup : {
-               __setup_start = .;
-               *(.init.setup)
-               __setup_end = .;
+       .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+               INIT_SETUP(0)
         }
  
-       .initcall.init : {
-               __initcall_start = .;
-               INITCALLS
-               __initcall_end = .;
+       .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET ) {
+               INIT_CALLS
         }
  
-       .con_initcall.init : {
-               __con_initcall_start = .;
-               *(.con_initcall.init)
-               __con_initcall_end = .;
+       .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+               CON_INITCALL
         }
  
         SECURITY_INIT
  
         __init_end_before_initramfs = .;
  
-       .init.ramfs ALIGN(4096) : {
+       .init.ramfs ALIGN(4096) : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
                 __initramfs_start = .;
                 *(.init.ramfs)
                 __initramfs_end = .;
@@ -152,7 +133,8 @@ SECTIONS {
         }
         __init_end = .;
  
-       .bss ALIGN (4096) : { /* page aligned when MMU used */
+       .bss ALIGN (4096) : AT(ADDR(.bss) - LOAD_OFFSET) {
+               /* page aligned when MMU used */
                 __bss_start = . ;
                         *(.bss*)
                         *(COMMON)
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c

index 1110784eb3f772b28efce57d8ea21ca79e2e9180..a44892e7cd5b7e2839976585e69bc8c274c21509 100644 (file)
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -180,7 +180,8 @@ void free_initrd_mem(unsigned long start, unsigned long end)
                 totalram_pages++;
                 pages++;
         }
-       printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n", pages);
+       printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n",
+                                       (int)(pages * (PAGE_SIZE / 1024)));
  }
  #endif
  
diff --git a/arch/mips/Makefile b/arch/mips/Makefile

index c825b14b4ed05427367a694194f5bc2a82a12d99..77f5021218d3e2dec0e5d30a3544cc64940a7a9c 100644 (file)
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -627,16 +627,6 @@ endif
  cflags-y                       += -I$(srctree)/arch/mips/include/asm/mach-generic
  drivers-$(CONFIG_PCI)          += arch/mips/pci/
  
-ifdef CONFIG_32BIT
-ifdef CONFIG_CPU_LITTLE_ENDIAN
-JIFFIES                        = jiffies_64
-else
-JIFFIES                        = jiffies_64 + 4
-endif
-else
-JIFFIES                        = jiffies_64
-endif
-
  #
  # Automatically detect the build format. By default we choose
  # the elf format according to the load address.
@@ -660,8 +650,9 @@ ifdef CONFIG_64BIT
  endif
  
  KBUILD_AFLAGS  += $(cflags-y)
-KBUILD_CFLAGS  += $(cflags-y) \
-                       -D"VMLINUX_LOAD_ADDRESS=$(load-y)"
+KBUILD_CFLAGS  += $(cflags-y)
+KBUILD_CPPFLAGS += -D"VMLINUX_LOAD_ADDRESS=$(load-y)"
+KBUILD_CPPFLAGS += -D"DATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)"
  
  LDFLAGS                        += -m $(ld-emul)
  
@@ -676,18 +667,6 @@ endif
  
  OBJCOPYFLAGS           += --remove-section=.reginfo
  
-#
-# Choosing incompatible machines durings configuration will result in
-# error messages during linking.  Select a default linkscript if
-# none has been choosen above.
-#
-
-CPPFLAGS_vmlinux.lds := \
-       $(KBUILD_CFLAGS) \
-       -D"LOADADDR=$(load-y)" \
-       -D"JIFFIES=$(JIFFIES)" \
-       -D"DATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)"
-
  head-y := arch/mips/kernel/head.o arch/mips/kernel/init_task.o
  
  libs-y                 += arch/mips/lib/
diff --git a/arch/mips/alchemy/common/time.c b/arch/mips/alchemy/common/time.c

index f34ff86019424ba311f6ff36b79d43265df23772..379a664809b0189458f96706b7cc84b157d70625 100644 (file)
--- a/arch/mips/alchemy/common/time.c
+++ b/arch/mips/alchemy/common/time.c
@@ -88,7 +88,7 @@ static struct clock_event_device au1x_rtcmatch2_clockdev = {
         .irq            = AU1000_RTC_MATCH2_INT,
         .set_next_event = au1x_rtcmatch2_set_next_event,
         .set_mode       = au1x_rtcmatch2_set_mode,
-       .cpumask        = CPU_MASK_ALL_PTR,
+       .cpumask        = cpu_all_mask,
  };
  
  static struct irqaction au1x_rtcmatch2_irqaction = {
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h

index 23059170700593ebad07907d109a8fa8639aeba4..f6837422fe65e201bb760a30e692b81f71a1cfcf 100644 (file)
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -24,12 +24,10 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
  
  #define cpu_to_node(cpu)       (sn_cpu_info[(cpu)].p_nodeid)
  #define parent_node(node)      (node)
-#define node_to_cpumask(node)  (hub_data(node)->h_cpus)
  #define cpumask_of_node(node)  (&hub_data(node)->h_cpus)
  struct pci_bus;
  extern int pcibus_to_node(struct pci_bus *);
  
-#define pcibus_to_cpumask(bus) (cpu_online_map)
  #define cpumask_of_pcibus(bus) (cpu_online_mask)
  
  extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h

index d3bea88d8744ace24ad245062830c6cc64ea7886..d9743536a6217c09e413fec50ff00647e842fadc 100644 (file)
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -178,8 +178,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
          * Mark current->active_mm as not "active" anymore.
          * We don't want to mislead possible IPI tlb flush routines.
          */
-       cpu_clear(cpu, prev->cpu_vm_mask);
-       cpu_set(cpu, next->cpu_vm_mask);
+       cpumask_clear_cpu(cpu, mm_cpumask(prev));
+       cpumask_set_cpu(cpu, mm_cpumask(next));
  
         local_irq_restore(flags);
  }
@@ -235,8 +235,8 @@ activate_mm(struct mm_struct *prev, struct mm_struct *next)
         TLBMISS_HANDLER_SETUP_PGD(next->pgd);
  
         /* mark mmu ownership change */
-       cpu_clear(cpu, prev->cpu_vm_mask);
-       cpu_set(cpu, next->cpu_vm_mask);
+       cpumask_clear_cpu(cpu, mm_cpumask(prev));
+       cpumask_set_cpu(cpu, mm_cpumask(next));
  
         local_irq_restore(flags);
  }
@@ -258,7 +258,7 @@ drop_mmu_context(struct mm_struct *mm, unsigned cpu)
  
         local_irq_save(flags);
  
-       if (cpu_isset(cpu, mm->cpu_vm_mask))  {
+       if (cpumask_test_cpu(cpu, mm_cpumask(mm)))  {
                 get_new_mmu_context(mm, cpu);
  #ifdef CONFIG_MIPS_MT_SMTC
                 /* See comments for similar code above */
diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h

index fd545547b8aa961a544de03b8843d852269be7ca..9e09af34c8a87cbfc03b65c429fc99844edcd69d 100644 (file)
--- a/arch/mips/include/asm/smp-ops.h
+++ b/arch/mips/include/asm/smp-ops.h
@@ -19,7 +19,7 @@ struct task_struct;
  
  struct plat_smp_ops {
         void (*send_ipi_single)(int cpu, unsigned int action);
-       void (*send_ipi_mask)(cpumask_t mask, unsigned int action);
+       void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action);
         void (*init_secondary)(void);
         void (*smp_finish)(void);
         void (*cpus_done)(void);
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h

index aaa2d4ab26dc5589c034a9a86db28fb8b33cbdd8..e15f11a09311c282d4f9b3ccad8a13731ca35f6b 100644 (file)
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -78,6 +78,6 @@ extern void play_dead(void);
  extern asmlinkage void smp_call_function_interrupt(void);
  
  extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
  
  #endif /* __ASM_SMP_H */
diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c

index 5b457a40c784f84997c9bc24c17de910ecbe61b3..6d6ca53058951307ad992cac83c0325c389ff556 100644 (file)
--- a/arch/mips/kernel/init_task.c
+++ b/arch/mips/kernel/init_task.c
@@ -21,9 +21,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   *
   * The things we do for performance..
   */
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"),
-                      __aligned__(THREAD_SIZE))) =
+union thread_union init_thread_union __init_task_data
+       __attribute__((__aligned__(THREAD_SIZE))) =
                 { INIT_THREAD_INFO(init_task) };
  
  /*
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c

index ad0ff5dc4d59bfbcdef6e967f1317b0ea2459c3a..cc81771b882ca6bbf4240c4b3228165ffc9b2506 100644 (file)
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -80,11 +80,11 @@ void cmp_send_ipi_single(int cpu, unsigned int action)
         local_irq_restore(flags);
  }
  
-static void cmp_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void cmp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
  {
         unsigned int i;
  
-       for_each_cpu_mask(i, mask)
+       for_each_cpu(i, mask)
                 cmp_send_ipi_single(i, action);
  }
  
@@ -171,7 +171,7 @@ void __init cmp_smp_setup(void)
  
         for (i = 1; i < NR_CPUS; i++) {
                 if (amon_cpu_avail(i)) {
-                       cpu_set(i, cpu_possible_map);
+                       set_cpu_possible(i, true);
                         __cpu_number_map[i]     = ++ncpu;
                         __cpu_logical_map[ncpu] = i;
                 }
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c

index 6f7ee5ac46ee5d5f1544967d3e1f3c94ea1fc7df..43e7cdc5ded23a42fa1a8f24376ddf60eb96ccac 100644 (file)
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -70,7 +70,7 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0,
                 write_vpe_c0_vpeconf0(tmp);
  
                 /* Record this as available CPU */
-               cpu_set(tc, cpu_possible_map);
+               set_cpu_possible(tc, true);
                 __cpu_number_map[tc]    = ++ncpu;
                 __cpu_logical_map[ncpu] = tc;
         }
@@ -141,11 +141,11 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action)
         local_irq_restore(flags);
  }
  
-static void vsmp_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void vsmp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
  {
         unsigned int i;
  
-       for_each_cpu_mask(i, mask)
+       for_each_cpu(i, mask)
                 vsmp_send_ipi_single(i, action);
  }
  
diff --git a/arch/mips/kernel/smp-up.c b/arch/mips/kernel/smp-up.c

index 2508d55d68fd4b7292013bd6fd8964f865d23250..00500fea275078e837c6f6dcc245720e921f7f03 100644 (file)
--- a/arch/mips/kernel/smp-up.c
+++ b/arch/mips/kernel/smp-up.c
@@ -18,7 +18,8 @@ static void up_send_ipi_single(int cpu, unsigned int action)
         panic(KERN_ERR "%s called", __func__);
  }
  
-static inline void up_send_ipi_mask(cpumask_t mask, unsigned int action)
+static inline void up_send_ipi_mask(const struct cpumask *mask,
+                                   unsigned int action)
  {
         panic(KERN_ERR "%s called", __func__);
  }
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c

index 64668a93248be249a9e376db2f34af05874ccbae..4eb106c6a3ec59c71ad6fcda41e3df2520f31c11 100644 (file)
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -128,7 +128,7 @@ asmlinkage __cpuinit void start_secondary(void)
         cpu_idle();
  }
  
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
  {
         mp_ops->send_ipi_mask(mask, SMP_CALL_FUNCTION);
  }
@@ -183,15 +183,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
         mp_ops->prepare_cpus(max_cpus);
         set_cpu_sibling_map(0);
  #ifndef CONFIG_HOTPLUG_CPU
-       cpu_present_map = cpu_possible_map;
+       init_cpu_present(&cpu_possible_map);
  #endif
  }
  
  /* preload SMP state for boot cpu */
  void __devinit smp_prepare_boot_cpu(void)
  {
-       cpu_set(0, cpu_possible_map);
-       cpu_set(0, cpu_online_map);
+       set_cpu_possible(0, true);
+       set_cpu_online(0, true);
         cpu_set(0, cpu_callin_map);
  }
  
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c

index 1a466baf0edf0eccd6fa200454e63772f78da630..67153a0dc267a28cc3c64014933a0a1567b638e3 100644 (file)
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -305,7 +305,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot)
          */
         ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
         for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) {
-               cpu_set(i, cpu_possible_map);
+               set_cpu_possible(i, true);
                 __cpu_number_map[i] = i;
                 __cpu_logical_map[i] = i;
         }
@@ -525,8 +525,8 @@ void smtc_prepare_cpus(int cpus)
          * Pull any physically present but unused TCs out of circulation.
          */
         while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) {
-               cpu_clear(tc, cpu_possible_map);
-               cpu_clear(tc, cpu_present_map);
+               set_cpu_possible(tc, false);
+               set_cpu_present(tc, false);
                 tc++;
         }
  
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S

index 2769bed3d2afcb0af8b3ebb351ed39c5f5ba609a..9bf0e3df7c5a6191e6d322c3a431b662d739b931 100644 (file)
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -10,7 +10,16 @@ PHDRS {
         text PT_LOAD FLAGS(7);  /* RWX */
         note PT_NOTE FLAGS(4);  /* R__ */
  }
-jiffies = JIFFIES;
+
+ifdef CONFIG_32BIT
+       ifdef CONFIG_CPU_LITTLE_ENDIAN
+               jiffies  = jiffies_64;
+       else
+               jiffies  = jiffies_64 + 4;
+       endif
+else
+       jiffies  = jiffies_64;
+endif
  
  SECTIONS
  {
@@ -29,7 +38,7 @@ SECTIONS
         /* . = 0xa800000000300000; */
         . = 0xffffffff80300000;
  #endif
-       . = LOADADDR;
+       . = VMLINUX_LOAD_ADDRESS;
         /* read-only */
         _text = .;      /* Text and read-only data */
         .text : {
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c

index 3f04d4c406b75f397f8c568a730bbcdf118f33f2..b3deed8db619c93979fa626150f2043482df43ff 100644 (file)
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -56,12 +56,12 @@ int sysctl_lasatstring(ctl_table *table,
  
  
  /* And the same for proc */
-int proc_dolasatstring(ctl_table *table, int write, struct file *filp,
+int proc_dolasatstring(ctl_table *table, int write,
                        void *buffer, size_t *lenp, loff_t *ppos)
  {
         int r;
  
-       r = proc_dostring(table, write, filp, buffer, lenp, ppos);
+       r = proc_dostring(table, write, buffer, lenp, ppos);
         if ((!write) || r)
                 return r;
  
@@ -71,12 +71,12 @@ int proc_dolasatstring(ctl_table *table, int write, struct file *filp,
  }
  
  /* proc function to write EEPROM after changing int entry */
-int proc_dolasatint(ctl_table *table, int write, struct file *filp,
+int proc_dolasatint(ctl_table *table, int write,
                        void *buffer, size_t *lenp, loff_t *ppos)
  {
         int r;
  
-       r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       r = proc_dointvec(table, write, buffer, lenp, ppos);
         if ((!write) || r)
                 return r;
  
@@ -89,7 +89,7 @@ int proc_dolasatint(ctl_table *table, int write, struct file *filp,
  static int rtctmp;
  
  /* proc function to read/write RealTime Clock */
-int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
+int proc_dolasatrtc(ctl_table *table, int write,
                        void *buffer, size_t *lenp, loff_t *ppos)
  {
         struct timespec ts;
@@ -102,7 +102,7 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
                 if (rtctmp < 0)
                         rtctmp = 0;
         }
-       r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       r = proc_dointvec(table, write, buffer, lenp, ppos);
         if (r)
                 return r;
  
@@ -154,7 +154,7 @@ int sysctl_lasat_rtc(ctl_table *table,
  #endif
  
  #ifdef CONFIG_INET
-int proc_lasat_ip(ctl_table *table, int write, struct file *filp,
+int proc_lasat_ip(ctl_table *table, int write,
                        void *buffer, size_t *lenp, loff_t *ppos)
  {
         unsigned int ip;
@@ -231,12 +231,12 @@ static int sysctl_lasat_prid(ctl_table *table,
         return 0;
  }
  
-int proc_lasat_prid(ctl_table *table, int write, struct file *filp,
+int proc_lasat_prid(ctl_table *table, int write,
                        void *buffer, size_t *lenp, loff_t *ppos)
  {
         int r;
  
-       r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       r = proc_dointvec(table, write, buffer, lenp, ppos);
         if (r < 0)
                 return r;
         if (write) {
diff --git a/arch/mips/mipssim/sim_smtc.c b/arch/mips/mipssim/sim_smtc.c

index d6e4f656ad14c985d301226f813dd52c769310eb..5da30b6a65b77c1b7dff68465c3a7b906dbf0216 100644 (file)
--- a/arch/mips/mipssim/sim_smtc.c
+++ b/arch/mips/mipssim/sim_smtc.c
@@ -43,11 +43,12 @@ static void ssmtc_send_ipi_single(int cpu, unsigned int action)
         /* "CPU" may be TC of same VPE, VPE of same CPU, or different CPU */
  }
  
-static inline void ssmtc_send_ipi_mask(cpumask_t mask, unsigned int action)
+static inline void ssmtc_send_ipi_mask(const struct cpumask *mask,
+                                      unsigned int action)
  {
         unsigned int i;
  
-       for_each_cpu_mask(i, mask)
+       for_each_cpu(i, mask)
                 ssmtc_send_ipi_single(i, action);
  }
  
diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c

index 10ab69f7183fc8b69630c3c7add7a06a22b64fe4..94e05e5733c1c4e108baa4076a044c48bdd612e6 100644 (file)
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@@ -79,7 +79,7 @@ static void octeon_flush_icache_all_cores(struct vm_area_struct *vma)
          * cores it has been used on
          */
         if (vma)
-               mask = vma->vm_mm->cpu_vm_mask;
+               mask = *mm_cpumask(vma->vm_mm);
         else
                 mask = cpu_online_map;
         cpu_clear(cpu, mask);
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c

index 499ffe5475dff4fe8254990499d13d6f5ff4a266..192cfd2a539c5a7e8910b53cc54ecbfa2bf1548c 100644 (file)
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -21,11 +21,11 @@ static void msmtc_send_ipi_single(int cpu, unsigned int action)
         smtc_send_ipi(cpu, LINUX_SMP_IPI, action);
  }
  
-static void msmtc_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void msmtc_send_ipi_mask(const struct cpumask *mask, unsigned int action)
  {
         unsigned int i;
  
-       for_each_cpu_mask(i, mask)
+       for_each_cpu(i, mask)
                 msmtc_send_ipi_single(i, action);
  }
  
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c

index 8ace27716232ec496f0a2ba5045e653dc8320765..326fe7a392e88c5e63b47fd843f60728edf6e806 100644 (file)
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -97,11 +97,11 @@ static void yos_send_ipi_single(int cpu, unsigned int action)
         }
  }
  
-static void yos_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void yos_send_ipi_mask(const struct cpumask *mask, unsigned int action)
  {
         unsigned int i;
  
-       for_each_cpu_mask(i, mask)
+       for_each_cpu(i, mask)
                 yos_send_ipi_single(i, action);
  }
  
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c

index 060d853d7b35126a01287d64467e82997bea1107..f61c164d1e67f5dbda40d8d84a9101216e4c359a 100644 (file)
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -421,7 +421,7 @@ static void __init node_mem_init(cnodeid_t node)
  
  /*
   * A node with nothing.  We use it to avoid any special casing in
- * node_to_cpumask
+ * cpumask_of_node
   */
  static struct node_data null_node = {
         .hub = {
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c

index cbcd7eb83bd1a57a17f4157030c09b965cc91322..9aa8f2951df6efd64a79137c83de3109a376bac5 100644 (file)
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -165,11 +165,11 @@ static void ip27_send_ipi_single(int destid, unsigned int action)
         REMOTE_HUB_SEND_INTR(COMPACT_TO_NASID_NODEID(cpu_to_node(destid)), irq);
  }
  
-static void ip27_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void ip27_send_ipi(const struct cpumask *mask, unsigned int action)
  {
         unsigned int i;
  
-       for_each_cpu_mask(i, mask)
+       for_each_cpu(i, mask)
                 ip27_send_ipi_single(i, action);
  }
  
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c

index 314691648c97b5fe4ff0122ed1f46f3239809d29..47b347c992eaf129a3a6df335aeb6739d60a7632 100644 (file)
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -82,11 +82,12 @@ static void bcm1480_send_ipi_single(int cpu, unsigned int action)
         __raw_writeq((((u64)action)<< 48), mailbox_0_set_regs[cpu]);
  }
  
-static void bcm1480_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void bcm1480_send_ipi_mask(const struct cpumask *mask,
+                                 unsigned int action)
  {
         unsigned int i;
  
-       for_each_cpu_mask(i, mask)
+       for_each_cpu(i, mask)
                 bcm1480_send_ipi_single(i, action);
  }
  
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c

index cad14003b84f2efc4a0fb746df799597af7e6f90..c00a5cb1128d324ed678afec5d9c6cc6cd2620df 100644 (file)
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -70,11 +70,12 @@ static void sb1250_send_ipi_single(int cpu, unsigned int action)
         __raw_writeq((((u64)action) << 48), mailbox_set_regs[cpu]);
  }
  
-static inline void sb1250_send_ipi_mask(cpumask_t mask, unsigned int action)
+static inline void sb1250_send_ipi_mask(const struct cpumask *mask,
+                                       unsigned int action)
  {
         unsigned int i;
  
-       for_each_cpu_mask(i, mask)
+       for_each_cpu(i, mask)
                 sb1250_send_ipi_single(i, action);
  }
  
diff --git a/arch/mn10300/include/asm/mmu_context.h b/arch/mn10300/include/asm/mmu_context.h

index a9e2e34f69b0969b38be706d9433e70b6aed74fa..cb294c244de3395e94792912a186e0e36063635d 100644 (file)
--- a/arch/mn10300/include/asm/mmu_context.h
+++ b/arch/mn10300/include/asm/mmu_context.h
@@ -38,13 +38,13 @@ extern unsigned long mmu_context_cache[NR_CPUS];
  #define enter_lazy_tlb(mm, tsk)        do {} while (0)
  
  #ifdef CONFIG_SMP
-#define cpu_ran_vm(cpu, task) \
-       cpu_set((cpu), (task)->cpu_vm_mask)
-#define cpu_maybe_ran_vm(cpu, task) \
-       cpu_test_and_set((cpu), (task)->cpu_vm_mask)
+#define cpu_ran_vm(cpu, mm) \
+       cpumask_set_cpu((cpu), mm_cpumask(mm))
+#define cpu_maybe_ran_vm(cpu, mm) \
+       cpumask_test_and_set_cpu((cpu), mm_cpumask(mm))
  #else
-#define cpu_ran_vm(cpu, task)          do {} while (0)
-#define cpu_maybe_ran_vm(cpu, task)    true
+#define cpu_ran_vm(cpu, mm)            do {} while (0)
+#define cpu_maybe_ran_vm(cpu, mm)      true
  #endif /* CONFIG_SMP */
  
  /*
diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c

index 80d423b80af30cfebbf7364829153c6f3314a3b1..a481b043bea782bfdb804e61c1858ba54cd17229 100644 (file)
--- a/arch/mn10300/kernel/init_task.c
+++ b/arch/mn10300/kernel/init_task.c
@@ -27,9 +27,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   * way process stacks are handled. This is done by having a special
   * "init_task" linker map entry..
   */
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  /*
   * Initial task structure.
diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c

index 3e52a105432791ccab5860dac6fbc4f2b0436db1..8ca5af00334cd1f30567f7c74218040aed7750e6 100644 (file)
--- a/arch/mn10300/kernel/sys_mn10300.c
+++ b/arch/mn10300/kernel/sys_mn10300.c
@@ -19,7 +19,6 @@
  #include <linux/stat.h>
  #include <linux/mman.h>
  #include <linux/file.h>
-#include <linux/utsname.h>
  #include <linux/tty.h>
  
  #include <asm/uaccess.h>
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile

index da6f66901c92191c2069ba2dbd8144951584936b..55cca1dac431bc80c1e9ea5f34274be2022e4494 100644 (file)
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -118,8 +118,8 @@ define archhelp
         @echo  '* vmlinux       - Uncompressed kernel image (./vmlinux)'
         @echo  '  palo          - Bootable image (./lifimage)'
         @echo  '  install       - Install kernel using'
-       @echo  '                  (your) ~/bin/installkernel or'
-       @echo  '                  (distribution) /sbin/installkernel or'
+       @echo  '                  (your) ~/bin/$(INSTALLKERNEL) or'
+       @echo  '                  (distribution) /sbin/$(INSTALLKERNEL) or'
         @echo  '                  copy to $$(INSTALL_PATH)'
  endef
  
diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h

index 1e1c824764ee1f0202c9012090e06f532eb75e42..5f39d5597cedea76f43c3470bcb9998c9dd07b12 100644 (file)
--- a/arch/parisc/include/asm/fcntl.h
+++ b/arch/parisc/include/asm/fcntl.h
@@ -28,6 +28,8 @@
  #define F_SETOWN       12      /*  for sockets. */
  #define F_SETSIG       13      /*  for sockets. */
  #define F_GETSIG       14      /*  for sockets. */
+#define F_GETOWN_EX    15
+#define F_SETOWN_EX    16
  
  /* for posix fcntl() and lockf() */
  #define F_RDLCK                01
diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h

index 21eb45a526299d15884aa7ca11ba545f82a8111f..2e73623feb6ba28d4590eafc02b573f8acb7c2c4 100644 (file)
--- a/arch/parisc/include/asm/smp.h
+++ b/arch/parisc/include/asm/smp.h
@@ -30,7 +30,6 @@ extern void smp_send_all_nop(void);
  
  extern void arch_send_call_function_single_ipi(int cpu);
  extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
  
  #endif /* !ASSEMBLY */
  
diff --git a/arch/parisc/install.sh b/arch/parisc/install.sh

index 9632b3e164c719f0a9e0adc06e8e6d79b121f6a9..e593fc8d58bcd8ef6ccab425aba077a9a91597b7 100644 (file)
--- a/arch/parisc/install.sh
+++ b/arch/parisc/install.sh
@@ -21,8 +21,8 @@
  
  # User may have a custom install script
  
-if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi
-if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
  
  # Default install
  
diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c

index 82974b20fc106b85c0462cc83e78eb2c3201a1c4..d020eae6525c4a200e9bcab86b6b8beb3e2ef5b3 100644 (file)
--- a/arch/parisc/kernel/init_task.c
+++ b/arch/parisc/kernel/init_task.c
@@ -43,8 +43,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   * way process stacks are handled. This is done by having a special
   * "init_task" linker map entry..
   */
-union thread_union init_thread_union
-       __attribute__((aligned(128))) __attribute__((__section__(".data.init_task"))) =
+union thread_union init_thread_union __init_task_data
+       __attribute__((aligned(128))) =
                 { INIT_THREAD_INFO(init_task) };
  
  #if PT_NLEVELS == 3
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c

index 92a0acaa0d1213042bd871e83a2024cc1ae59aa6..561388b17c9170841aeb08bc545e982004199f98 100644 (file)
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -18,7 +18,6 @@
  #include <linux/signal.h>
  #include <linux/resource.h>
  #include <linux/times.h>
-#include <linux/utsname.h>
  #include <linux/time.h>
  #include <linux/smp.h>
  #include <linux/smp_lock.h>
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile

index 952a3963e9e8b607e15fb7ffc21d8f8d5482dd8b..aacf629c1a9f87b3225a0d25877eb0486d4d8d5f 100644 (file)
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -158,8 +158,6 @@ drivers-$(CONFIG_OPROFILE)  += arch/powerpc/oprofile/
  # Default to zImage, override when needed
  all: zImage
  
-CPPFLAGS_vmlinux.lds   := -Upowerpc
-
  BOOT_TARGETS = zImage zImage.initrd uImage zImage% dtbImage% treeImage.% cuImage.% simpleImage.%
  
  PHONY += $(BOOT_TARGETS)
@@ -182,8 +180,8 @@ define archhelp
    @echo '  simpleImage.<dt> - Firmware independent image.'
    @echo '  treeImage.<dt>  - Support for older IBM 4xx firmware (not U-Boot)'
    @echo '  install         - Install kernel using'
-  @echo '                    (your) ~/bin/installkernel or'
-  @echo '                    (distribution) /sbin/installkernel or'
+  @echo '                    (your) ~/bin/$(INSTALLKERNEL) or'
+  @echo '                    (distribution) /sbin/$(INSTALLKERNEL) or'
    @echo '                    install to $$(INSTALL_PATH) and run lilo'
    @echo '  *_defconfig     - Select default config from arch/$(ARCH)/configs'
    @echo ''
diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh

index 98312d169c859926932356cd7018a57ffed23e9f..b6a256bc96ee2c08c0dbc073c35d15118919efd3 100644 (file)
--- a/arch/powerpc/boot/install.sh
+++ b/arch/powerpc/boot/install.sh
@@ -23,8 +23,8 @@ set -e
  
  # User may have a custom install script
  
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
  
  # Default install
  
diff --git a/arch/powerpc/include/asm/fsldma.h b/arch/powerpc/include/asm/fsldma.h

new file mode 100644 (file)

index 0000000..a67aeed
--- /dev/null
+++ b/arch/powerpc/include/asm/fsldma.h
@@ -0,0 +1,136 @@
+/*
+ * Freescale MPC83XX / MPC85XX DMA Controller
+ *
+ * Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ARCH_POWERPC_ASM_FSLDMA_H__
+#define __ARCH_POWERPC_ASM_FSLDMA_H__
+
+#include <linux/dmaengine.h>
+
+/*
+ * Definitions for the Freescale DMA controller's DMA_SLAVE implemention
+ *
+ * The Freescale DMA_SLAVE implementation was designed to handle many-to-many
+ * transfers. An example usage would be an accelerated copy between two
+ * scatterlists. Another example use would be an accelerated copy from
+ * multiple non-contiguous device buffers into a single scatterlist.
+ *
+ * A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This
+ * structure contains a list of hardware addresses that should be copied
+ * to/from the scatterlist passed into device_prep_slave_sg(). The structure
+ * also has some fields to enable hardware-specific features.
+ */
+
+/**
+ * struct fsl_dma_hw_addr
+ * @entry: linked list entry
+ * @address: the hardware address
+ * @length: length to transfer
+ *
+ * Holds a single physical hardware address / length pair for use
+ * with the DMAEngine DMA_SLAVE API.
+ */
+struct fsl_dma_hw_addr {
+       struct list_head entry;
+
+       dma_addr_t address;
+       size_t length;
+};
+
+/**
+ * struct fsl_dma_slave
+ * @addresses: a linked list of struct fsl_dma_hw_addr structures
+ * @request_count: value for DMA request count
+ * @src_loop_size: setup and enable constant source-address DMA transfers
+ * @dst_loop_size: setup and enable constant destination address DMA transfers
+ * @external_start: enable externally started DMA transfers
+ * @external_pause: enable externally paused DMA transfers
+ *
+ * Holds a list of address / length pairs for use with the DMAEngine
+ * DMA_SLAVE API implementation for the Freescale DMA controller.
+ */
+struct fsl_dma_slave {
+
+       /* List of hardware address/length pairs */
+       struct list_head addresses;
+
+       /* Support for extra controller features */
+       unsigned int request_count;
+       unsigned int src_loop_size;
+       unsigned int dst_loop_size;
+       bool external_start;
+       bool external_pause;
+};
+
+/**
+ * fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave
+ * @slave: the &struct fsl_dma_slave to add to
+ * @address: the hardware address to add
+ * @length: the length of bytes to transfer from @address
+ *
+ * Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on
+ * success, -ERRNO otherwise.
+ */
+static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave,
+                                      dma_addr_t address, size_t length)
+{
+       struct fsl_dma_hw_addr *addr;
+
+       addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
+       if (!addr)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&addr->entry);
+       addr->address = address;
+       addr->length = length;
+
+       list_add_tail(&addr->entry, &slave->addresses);
+       return 0;
+}
+
+/**
+ * fsl_dma_slave_free - free a struct fsl_dma_slave
+ * @slave: the struct fsl_dma_slave to free
+ *
+ * Free a struct fsl_dma_slave and all associated address/length pairs
+ */
+static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave)
+{
+       struct fsl_dma_hw_addr *addr, *tmp;
+
+       if (slave) {
+               list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) {
+                       list_del(&addr->entry);
+                       kfree(addr);
+               }
+
+               kfree(slave);
+       }
+}
+
+/**
+ * fsl_dma_slave_alloc - allocate a struct fsl_dma_slave
+ * @gfp: the flags to pass to kmalloc when allocating this structure
+ *
+ * Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new
+ * struct fsl_dma_slave on success, or NULL on failure.
+ */
+static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp)
+{
+       struct fsl_dma_slave *slave;
+
+       slave = kzalloc(sizeof(*slave), gfp);
+       if (!slave)
+               return NULL;
+
+       INIT_LIST_HEAD(&slave->addresses);
+       return slave;
+}
+
+#endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h

index c0d3b8af93190bc01f6e962dae3a0a76fb8a8179..d9ea8d39c3428fa88ef2c88fbab5e40d12e83fe5 100644 (file)
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -146,7 +146,7 @@ extern void smp_generic_take_timebase(void);
  extern struct smp_ops_t *smp_ops;
  
  extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
  
  /* Definitions relative to the secondary CPU spin loop
   * and entry point. Not all of them exist on both 32 and
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h

index 394edcbcce711cbf8181b4dff00a7eed2bfc153e..22f738d12ad966b293d25a7036696db79a954606 100644 (file)
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -17,11 +17,6 @@ static inline int cpu_to_node(int cpu)
  
  #define parent_node(node)      (node)
  
-static inline cpumask_t node_to_cpumask(int node)
-{
-       return numa_cpumask_lookup_table[node];
-}
-
  #define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
  
  int of_node_to_nid(struct device_node *device);
@@ -36,11 +31,6 @@ static inline int pcibus_to_node(struct pci_bus *bus)
  }
  #endif
  
-#define pcibus_to_cpumask(bus) (pcibus_to_node(bus) == -1 ? \
-                                       CPU_MASK_ALL : \
-                                       node_to_cpumask(pcibus_to_node(bus)) \
-                               )
-
  #define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ?            \
                                  cpu_all_mask :                         \
                                  cpumask_of_node(pcibus_to_node(bus)))
@@ -104,8 +94,6 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
  #ifdef CONFIG_PPC64
  #include <asm/smp.h>
  
-#define topology_thread_siblings(cpu)  (per_cpu(cpu_sibling_map, cpu))
-#define topology_core_siblings(cpu)    (per_cpu(cpu_core_map, cpu))
  #define topology_thread_cpumask(cpu)   (&per_cpu(cpu_sibling_map, cpu))
  #define topology_core_cpumask(cpu)     (&per_cpu(cpu_core_map, cpu))
  #define topology_core_id(cpu)          (cpu_to_core_id(cpu))
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c

index ffc4253fef55e99a59070b09f1b912e51bfb4de8..2375b7eb1c7601e00c102bc538be1ebd54dc4db2 100644 (file)
--- a/arch/powerpc/kernel/init_task.c
+++ b/arch/powerpc/kernel/init_task.c
@@ -16,9 +16,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   * way process stacks are handled. This is done by having a special
   * "init_task" linker map entry..
   */
-union thread_union init_thread_union 
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  /*
   * Initial task structure.
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c

index 49e705fcee6df804545dd790b6cd0dc7fb6d7ada..040bd1de8d99b68b46d36008aca44420ffd180c3 100644 (file)
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -13,6 +13,7 @@
  #include <linux/kexec.h>
  #include <linux/smp.h>
  #include <linux/thread_info.h>
+#include <linux/init_task.h>
  #include <linux/errno.h>
  
  #include <asm/page.h>
@@ -249,8 +250,8 @@ static void kexec_prepare_cpus(void)
   * We could use a smaller stack if we don't care about anything using
   * current, but that audit has not been performed.
   */
-static union thread_union kexec_stack
-       __attribute__((__section__(".data.init_task"))) = { };
+static union thread_union kexec_stack __init_task_data =
+       { };
  
  /* Our assembly helper, in kexec_stub.S */
  extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c

index 1d5570a1e456abdc1c651dee6c443b2852b00680..4271f7a655a3adfb5e0636e83c8f6e59b61e1868 100644 (file)
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -24,7 +24,6 @@
  #include <linux/seq_file.h>
  #include <linux/ioport.h>
  #include <linux/console.h>
-#include <linux/utsname.h>
  #include <linux/screen_info.h>
  #include <linux/root_dev.h>
  #include <linux/notifier.h>
@@ -432,9 +431,9 @@ void __init smp_setup_cpu_maps(void)
                 for (j = 0; j < nthreads && cpu < NR_CPUS; j++) {
                         DBG("    thread %d -> cpu %d (hard id %d)\n",
                             j, cpu, intserv[j]);
-                       cpu_set(cpu, cpu_present_map);
+                       set_cpu_present(cpu, true);
                         set_hard_smp_processor_id(cpu, intserv[j]);
-                       cpu_set(cpu, cpu_possible_map);
+                       set_cpu_possible(cpu, true);
                         cpu++;
                 }
         }
@@ -480,7 +479,7 @@ void __init smp_setup_cpu_maps(void)
                                maxcpus);
  
                 for (cpu = 0; cpu < maxcpus; cpu++)
-                       cpu_set(cpu, cpu_possible_map);
+                       set_cpu_possible(cpu, true);
         out:
                 of_node_put(dn);
         }
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c

index d387b3937ccceb16bd84d707b8949a3200338d20..9b86a74d281504cf255ae9a2f2b3e7b5009076fb 100644 (file)
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -189,11 +189,11 @@ void arch_send_call_function_single_ipi(int cpu)
         smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
  }
  
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
  {
         unsigned int cpu;
  
-       for_each_cpu_mask(cpu, mask)
+       for_each_cpu(cpu, mask)
                 smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
  }
  
@@ -287,7 +287,7 @@ void __devinit smp_prepare_boot_cpu(void)
  {
         BUG_ON(smp_processor_id() != boot_cpuid);
  
-       cpu_set(boot_cpuid, cpu_online_map);
+       set_cpu_online(boot_cpuid, true);
         cpu_set(boot_cpuid, per_cpu(cpu_sibling_map, boot_cpuid));
         cpu_set(boot_cpuid, per_cpu(cpu_core_map, boot_cpuid));
  #ifdef CONFIG_PPC64
@@ -307,7 +307,7 @@ int generic_cpu_disable(void)
         if (cpu == boot_cpuid)
                 return -EBUSY;
  
-       cpu_clear(cpu, cpu_online_map);
+       set_cpu_online(cpu, false);
  #ifdef CONFIG_PPC64
         vdso_data->processorCount--;
         fixup_irqs(cpu_online_map);
@@ -361,7 +361,7 @@ void generic_mach_cpu_die(void)
         smp_wmb();
         while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
                 cpu_relax();
-       cpu_set(cpu, cpu_online_map);
+       set_cpu_online(cpu, true);
         local_irq_enable();
  }
  #endif
@@ -508,7 +508,7 @@ int __devinit start_secondary(void *unused)
  
         ipi_call_lock();
         notify_cpu_starting(cpu);
-       cpu_set(cpu, cpu_online_map);
+       set_cpu_online(cpu, true);
         /* Update sibling maps */
         base = cpu_first_thread_in_core(cpu);
         for (i = 0; i < threads_per_core; i++) {
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c

index 1cc5e9e5da96a78b6a5c58ac678583be63e2b4b2..b97c2d67f4ac38d5ece9f0e8f002db8b7648b0f2 100644 (file)
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -22,7 +22,6 @@
  #include <linux/signal.h>
  #include <linux/resource.h>
  #include <linux/times.h>
-#include <linux/utsname.h>
  #include <linux/smp.h>
  #include <linux/smp_lock.h>
  #include <linux/sem.h>
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c

index a0abce251d0a0ae1021e1012fdc9b37fff31c7c6..3faaf29bdb29def5748cf54beef11a285764e438 100644 (file)
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -1,3 +1,4 @@
+
  /*
   *    Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
   *                      <benh@kernel.crashing.org>
@@ -74,7 +75,7 @@ static int vdso_ready;
  static union {
         struct vdso_data        data;
         u8                      page[PAGE_SIZE];
-} vdso_data_store __attribute__((__section__(".data.page_aligned")));
+} vdso_data_store __page_aligned_data;
  struct vdso_data *vdso_data = &vdso_data_store.data;
  
  /* Format of the patch table */
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile

index b54b81688132bab49851646f7b3697ee65bae5c0..51ead52141bd083774ed3ed13d8c71dc831f0f23 100644 (file)
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -16,7 +16,7 @@ GCOV_PROFILE := n
  
  EXTRA_CFLAGS := -shared -fno-common -fno-builtin
  EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
-               $(call ld-option, -Wl$(comma)--hash-style=sysv)
+               $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
  EXTRA_AFLAGS := -D__VDSO32__ -s
  
  obj-y += vdso32_wrapper.o
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S

index 556f0caa5d842a27885a6c2417787c18315037dc..6e8f507ed32bb5f74f9d9f5574f09636ae15f9a3 100644 (file)
--- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
+++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
@@ -1,7 +1,8 @@
  #include <linux/init.h>
+#include <linux/linkage.h>
  #include <asm/page.h>
  
-       .section ".data.page_aligned"
+       __PAGE_ALIGNED_DATA
  
         .globl vdso32_start, vdso32_end
         .balign PAGE_SIZE
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile

index dd0c8e9367751eb2cd3bc55795a819dbde4bc085..79da65d44a2a78a01bf11f2c0bb5b2003560ae6c 100644 (file)
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -11,7 +11,7 @@ GCOV_PROFILE := n
  
  EXTRA_CFLAGS := -shared -fno-common -fno-builtin
  EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
-               $(call ld-option, -Wl$(comma)--hash-style=sysv)
+               $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
  EXTRA_AFLAGS := -D__VDSO64__ -s
  
  obj-y += vdso64_wrapper.o
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S

index 0529cb9e3b97bfaef233858595f91cebb820ac96..b8553d62b792ffb2953a400fabdf8fc746c62501 100644 (file)
--- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
+++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
@@ -1,7 +1,8 @@
  #include <linux/init.h>
+#include <linux/linkage.h>
  #include <asm/page.h>
  
-       .section ".data.page_aligned"
+       __PAGE_ALIGNED_DATA
  
         .globl vdso64_start, vdso64_end
         .balign PAGE_SIZE
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c

index 937a38e73178c0c7168bf27b75913e69c3d2f9c0..b40c22d697f006d06f2e952a5a602dee999bc333 100644 (file)
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -320,7 +320,7 @@ static int __init smp_psurge_probe(void)
         if (ncpus > NR_CPUS)
                 ncpus = NR_CPUS;
         for (i = 1; i < ncpus ; ++i)
-               cpu_set(i, cpu_present_map);
+               set_cpu_present(i, true);
  
         if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352);
  
@@ -867,7 +867,7 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr)
  
  int smp_core99_cpu_disable(void)
  {
-       cpu_clear(smp_processor_id(), cpu_online_map);
+       set_cpu_online(smp_processor_id(), false);
  
         /* XXX reset cpu affinity here */
         mpic_cpu_set_priority(0xf);
@@ -952,7 +952,7 @@ void __init pmac_setup_smp(void)
                 int cpu;
  
                 for (cpu = 1; cpu < 4 && cpu < NR_CPUS; ++cpu)
-                       cpu_set(cpu, cpu_possible_map);
+                       set_cpu_possible(cpu, true);
                 smp_ops = &psurge_smp_ops;
         }
  #endif /* CONFIG_PPC32 */
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c

index a20ead87153db18566a2034179a9041ea6cc456d..ebff6d9a4e395ae0088da5a26e99459cedc2f0f5 100644 (file)
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -94,7 +94,7 @@ static int pseries_cpu_disable(void)
  {
         int cpu = smp_processor_id();
  
-       cpu_clear(cpu, cpu_online_map);
+       set_cpu_online(cpu, false);
         vdso_data->processorCount--;
  
         /*fix boot_cpuid here*/
@@ -185,7 +185,7 @@ static int pseries_add_processor(struct device_node *np)
  
         for_each_cpu_mask(cpu, tmp) {
                 BUG_ON(cpu_isset(cpu, cpu_present_map));
-               cpu_set(cpu, cpu_present_map);
+               set_cpu_present(cpu, true);
                 set_hard_smp_processor_id(cpu, *intserv++);
         }
         err = 0;
@@ -217,7 +217,7 @@ static void pseries_remove_processor(struct device_node *np)
                         if (get_hard_smp_processor_id(cpu) != intserv[i])
                                 continue;
                         BUG_ON(cpu_online(cpu));
-                       cpu_clear(cpu, cpu_present_map);
+                       set_cpu_present(cpu, false);
                         set_hard_smp_processor_id(cpu, -1);
                         break;
                 }
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c

index 264528e4f58d5ea3fa0ff12f534c925df21001a6..b55fd7ed1c31011c10e7393575473dffefeab596 100644 (file)
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -50,10 +50,9 @@ static struct platform_device *appldata_pdev;
   * /proc entries (sysctl)
   */
  static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
-static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+static int appldata_timer_handler(ctl_table *ctl, int write,
                                   void __user *buffer, size_t *lenp, loff_t *ppos);
  static int appldata_interval_handler(ctl_table *ctl, int write,
-                                        struct file *filp,
                                          void __user *buffer,
                                          size_t *lenp, loff_t *ppos);
  
@@ -247,7 +246,7 @@ __appldata_vtimer_setup(int cmd)
   * Start/Stop timer, show status of timer (0 = not active, 1 = active)
   */
  static int
-appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_timer_handler(ctl_table *ctl, int write,
                            void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         int len;
@@ -289,7 +288,7 @@ out:
   * current timer interval.
   */
  static int
-appldata_interval_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_interval_handler(ctl_table *ctl, int write,
                            void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         int len, interval;
@@ -335,7 +334,7 @@ out:
   * monitoring (0 = not in process, 1 = in process)
   */
  static int
-appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_generic_handler(ctl_table *ctl, int write,
                            void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct appldata_ops *ops = NULL, *tmp_ops;
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh

index d4026f62cb06d2ef804c63d63aa23079fd24325b..aed3069699bd5abf94ffbeb71a7db756b210b12f 100644 (file)
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -21,8 +21,8 @@
  
  # User may have a custom install script
  
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
  
  # Default install - same as make zlilo
  
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h

index c991fe6473c990c95f794b627d0173f99c423262..a868b272c25791bbfe80621a1294b2aeb394d31b 100644 (file)
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -62,7 +62,7 @@ extern struct mutex smp_cpu_state_mutex;
  extern int smp_cpu_polarization[];
  
  extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
  
  #endif
  
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h

index 5e0ad618dc45d2b3ea111f8f494c95e6fa8b99e9..6e7211abd950c8fc3e742c2ee644ca6dcd388dfe 100644 (file)
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -9,7 +9,6 @@ const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
  
  extern cpumask_t cpu_core_map[NR_CPUS];
  
-#define topology_core_siblings(cpu)    (cpu_core_map[cpu])
  #define topology_core_cpumask(cpu)     (&cpu_core_map[cpu])
  
  int topology_set_cpu_management(int fc);
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c

index 5519cb7451065595baddd0ef027ef6e5e1f64305..0debcec23a39b543798ac523b00f772b895223f4 100644 (file)
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -24,7 +24,6 @@
  #include <linux/signal.h>
  #include <linux/resource.h>
  #include <linux/times.h>
-#include <linux/utsname.h>
  #include <linux/smp.h>
  #include <linux/smp_lock.h>
  #include <linux/sem.h>
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c

index 4c512561687df1e84d557564ef90ca9c65497605..20f282c911c28b6ae44c64bebcb7fee85c110062 100644 (file)
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -881,11 +881,11 @@ static int debug_active=1;
   * if debug_active is already off
   */
  static int
-s390dbf_procactive(ctl_table *table, int write, struct file *filp,
+s390dbf_procactive(ctl_table *table, int write,
                       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         if (!write || debug_stoppable || !debug_active)
-               return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+               return proc_dointvec(table, write, buffer, lenp, ppos);
         else
                 return 0;
  }
diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c

index fe787f9e5f3f375753291b01f1e135546b5875b6..4d1c9fb0b54086e852fdb2de11f88b247b1ca3a0 100644 (file)
--- a/arch/s390/kernel/init_task.c
+++ b/arch/s390/kernel/init_task.c
@@ -25,9 +25,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   * way process stacks are handled. This is done by having a special
   * "init_task" linker map entry..
   */
-union thread_union init_thread_union 
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  /*
   * Initial task structure.
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c

index 59fe6ecc6ed33389fed8947f996b2a9cd5c39e6f..5417eb57271aed29ac041f948a27d20a9f42a25e 100644 (file)
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -27,7 +27,6 @@
  #include <linux/init.h>
  #include <linux/module.h>
  #include <linux/notifier.h>
-#include <linux/utsname.h>
  #include <linux/tick.h>
  #include <linux/elfcore.h>
  #include <linux/kernel_stat.h>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c

index b4b6396e6cf00fcc97347362eb882ae15d7d386f..c932caa5e8504e3e026708f2efe16f44ad0ce316 100644 (file)
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -147,11 +147,11 @@ static void smp_ext_bitcall(int cpu, ec_bit_sig sig)
                 udelay(10);
  }
  
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
  {
         int cpu;
  
-       for_each_cpu_mask(cpu, mask)
+       for_each_cpu(cpu, mask)
                 smp_ext_bitcall(cpu, ec_call_function);
  }
  
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c

index 45e1708b70fd028fdd9b6b432f156f886855d0ae..45a3e9a7ae21db6f5705eac145fe62790e4c5b0f 100644 (file)
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -75,7 +75,7 @@ __setup("vdso=", vdso_setup);
  static union {
         struct vdso_data        data;
         u8                      page[PAGE_SIZE];
-} vdso_data_store __attribute__((__section__(".data.page_aligned")));
+} vdso_data_store __page_aligned_data;
  struct vdso_data *vdso_data = &vdso_data_store.data;
  
  /*
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile

index ca78ad60ba2490c9d2bb7682ad23aa7b9e922ddb..d13e8755a8cce46e1a97a260893aaa6fd10c8767 100644 (file)
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -13,7 +13,7 @@ KBUILD_AFLAGS_31 += -m31 -s
  KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
  KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
  KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
-                       $(call ld-option, -Wl$(comma)--hash-style=sysv)
+                       $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
  
  $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
  $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S

index 61639a89e70b93e7c7355eb080c7afcf07e79b2d..ae42f8ce350bcb5047e6b553749fa6dc453295c7 100644 (file)
--- a/arch/s390/kernel/vdso32/vdso32_wrapper.S
+++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S
@@ -1,7 +1,8 @@
  #include <linux/init.h>
+#include <linux/linkage.h>
  #include <asm/page.h>
  
-       .section ".data.page_aligned"
+       __PAGE_ALIGNED_DATA
  
         .globl vdso32_start, vdso32_end
         .balign PAGE_SIZE
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile

index 6fc8e829258c50be575bf1870d40ca4879b88a00..449352dda9cdb9a81209d3ff3f49cf61c0356a82 100644 (file)
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -13,7 +13,7 @@ KBUILD_AFLAGS_64 += -m64 -s
  KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
  KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
  KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
-                       $(call ld-option, -Wl$(comma)--hash-style=sysv)
+                       $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
  
  $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
  $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S

index d8e2ac14d564d057bef615266bcf17abf90083cf..c245842b516fdfa490e2c877252730cfc8fb5589 100644 (file)
--- a/arch/s390/kernel/vdso64/vdso64_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S
@@ -1,7 +1,8 @@
  #include <linux/init.h>
+#include <linux/linkage.h>
  #include <asm/page.h>
  
-       .section ".data.page_aligned"
+       __PAGE_ALIGNED_DATA
  
         .globl vdso64_start, vdso64_end
         .balign PAGE_SIZE
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c

index 413c240cbca773bb964b01cfc75506d0345180e1..b201135cc18c25d38b5b486ca3ffa2729cffbc4d 100644 (file)
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -262,7 +262,7 @@ cmm_skip_blanks(char *cp, char **endp)
  static struct ctl_table cmm_table[];
  
  static int
-cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
+cmm_pages_handler(ctl_table *ctl, int write,
                   void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         char buf[16], *p;
@@ -303,7 +303,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
  }
  
  static int
-cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
+cmm_timeout_handler(ctl_table *ctl, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         char buf[64], *p;
diff --git a/arch/score/kernel/init_task.c b/arch/score/kernel/init_task.c

index ff952f6c63fd645a66f6e7614b22013cd51798c0..baa03ee217d1abdaaeb1081e0274090a6a1cf544 100644 (file)
--- a/arch/score/kernel/init_task.c
+++ b/arch/score/kernel/init_task.c
@@ -34,9 +34,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   * way process stacks are handled. This is done by having a special
   * "init_task" linker map entry..
   */
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"), __aligned__(THREAD_SIZE))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  /*
   * Initial task structure.
diff --git a/arch/sh/boot/compressed/install.sh b/arch/sh/boot/compressed/install.sh

index 90589f0fec1249fad44a41cae898852c97cbd0f0..f9f41818b17e7e0ae9b4f0dcb00f2dfedd3080b6 100644 (file)
--- a/arch/sh/boot/compressed/install.sh
+++ b/arch/sh/boot/compressed/install.sh
@@ -23,8 +23,8 @@
  
  # User may have a custom install script
  
-if [ -x /sbin/installkernel ]; then
-  exec /sbin/installkernel "$@"
+if [ -x /sbin/${INSTALLKERNEL} ]; then
+  exec /sbin/${INSTALLKERNEL} "$@"
  fi
  
  if [ "$2" = "zImage" ]; then
diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig

index b91fa8dbf047b374cf31e6d3878067d3941df635..4d58eb0973d4f5ea9d048d4e53e949afb02bdfec 100644 (file)
--- a/arch/sh/drivers/dma/Kconfig
+++ b/arch/sh/drivers/dma/Kconfig
@@ -1,12 +1,9 @@
  menu "DMA support"
  
-config SH_DMA_API
-       bool
  
  config SH_DMA
         bool "SuperH on-chip DMA controller (DMAC) support"
         depends on CPU_SH3 || CPU_SH4
-       select SH_DMA_API
         default n
  
  config SH_DMA_IRQ_MULTI
@@ -19,6 +16,15 @@ config SH_DMA_IRQ_MULTI
                      CPU_SUBTYPE_SH7780  || CPU_SUBTYPE_SH7785  || \
                      CPU_SUBTYPE_SH7760
  
+config SH_DMA_API
+       depends on SH_DMA
+       bool "SuperH DMA API support"
+       default n
+       help
+         SH_DMA_API always enabled DMA API of used SuperH.
+         If you want to use DMA ENGINE, you must not enable this.
+         Please enable DMA_ENGINE and SH_DMAE.
+
  config NR_ONCHIP_DMA_CHANNELS
         int
         depends on SH_DMA
diff --git a/arch/sh/drivers/dma/Makefile b/arch/sh/drivers/dma/Makefile

index c6068137b46f42aa13f8491a3a8a298a4514d01b..d88c9484762c941b54ce5166c544f0b7e767819b 100644 (file)
--- a/arch/sh/drivers/dma/Makefile
+++ b/arch/sh/drivers/dma/Makefile
@@ -2,8 +2,7 @@
  # Makefile for the SuperH DMA specific kernel interface routines under Linux.
  #
  
-obj-$(CONFIG_SH_DMA_API)       += dma-api.o dma-sysfs.o
-obj-$(CONFIG_SH_DMA)           += dma-sh.o
+obj-$(CONFIG_SH_DMA_API)       += dma-sh.o dma-api.o dma-sysfs.o
  obj-$(CONFIG_PVR2_DMA)         += dma-pvr2.o
  obj-$(CONFIG_G2_DMA)           += dma-g2.o
  obj-$(CONFIG_SH_DMABRG)                += dmabrg.o
diff --git a/arch/sh/include/asm/dma-sh.h b/arch/sh/include/asm/dma-sh.h

index 68a5f4cb0343eeffa2750543a87f7ea3bbd11d0d..78eed3e0bdf548dc6c5f1761b51eefeb17cdb0a0 100644 (file)
--- a/arch/sh/include/asm/dma-sh.h
+++ b/arch/sh/include/asm/dma-sh.h
@@ -116,4 +116,17 @@ static u32 dma_base_addr[] __maybe_unused = {
  #define CHCR    0x0C
  #define DMAOR  0x40
  
+/*
+ * for dma engine
+ *
+ * SuperH DMA mode
+ */
+#define SHDMA_MIX_IRQ  (1 << 1)
+#define SHDMA_DMAOR1   (1 << 2)
+#define SHDMA_DMAE1            (1 << 3)
+
+struct sh_dmae_pdata {
+       unsigned int mode;
+};
+
  #endif /* __DMA_SH_H */
diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h

index ca64f43abe67e3e50debcd068c35c2c6eb96f96e..53ef26ced75fddfba4ae5a837a66b8638fd2a5a4 100644 (file)
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -44,7 +44,6 @@ void plat_send_ipi(unsigned int cpu, unsigned int message);
  
  void arch_send_call_function_single_ipi(int cpu);
  extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
  
  #else
  
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h

index f8c40cc65054656fc2d7a62d311d015ed45e3da8..65e7bd2f2240c1a0dfcd168cccf8c101e67103d0 100644 (file)
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -31,7 +31,6 @@
  #define cpu_to_node(cpu)       ((void)(cpu),0)
  #define parent_node(node)      ((void)(node),0)
  
-#define node_to_cpumask(node)  ((void)node, cpu_online_map)
  #define cpumask_of_node(node)  ((void)node, cpu_online_mask)
  
  #define pcibus_to_node(bus)    ((void)(bus), -1)
diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c

index 1719957c0a691202b192da5c9a6329e810dcc44f..11f2ea556a6bc506f5c2cc3083a7cfff9a643801 100644 (file)
--- a/arch/sh/kernel/init_task.c
+++ b/arch/sh/kernel/init_task.c
@@ -17,9 +17,8 @@ struct pt_regs fake_swapper_regs;
   * way process stacks are handled. This is done by having a special
   * "init_task" linker map entry..
   */
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  /*
   * Initial task structure.
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c

index 60f8af4497c78fc5fc051947f766966b1a19f3e2..7cb933ba49579d7c97391119cb18d1e6cbfe08d5 100644 (file)
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -165,11 +165,9 @@ asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs)
  }
  
  #ifdef CONFIG_IRQSTACKS
-static char softirq_stack[NR_CPUS * THREAD_SIZE]
-               __attribute__((__section__(".bss.page_aligned")));
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
  
-static char hardirq_stack[NR_CPUS * THREAD_SIZE]
-               __attribute__((__section__(".bss.page_aligned")));
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
  
  /*
   * allocate per-cpu stacks for hardirq and for softirq processing
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c

index 63ba12836eae293e6fc2885ed8f8706852b281ae..eb68bfdd86e66be57464425896b887abd63c3af4 100644 (file)
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -9,7 +9,6 @@
  #include <linux/syscalls.h>
  #include <linux/mman.h>
  #include <linux/file.h>
-#include <linux/utsname.h>
  #include <linux/module.h>
  #include <linux/fs.h>
  #include <linux/ipc.h>
diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c

index 91fb8445a5a094ac8c5f2191b9bca308b5e1b8e4..287235768bc5028bb0da239e5b5b46a10d891f8b 100644 (file)
--- a/arch/sh/kernel/sys_sh64.c
+++ b/arch/sh/kernel/sys_sh64.c
@@ -23,7 +23,6 @@
  #include <linux/stat.h>
  #include <linux/mman.h>
  #include <linux/file.h>
-#include <linux/utsname.h>
  #include <linux/syscalls.h>
  #include <linux/ipc.h>
  #include <asm/uaccess.h>
diff --git a/arch/sh/kernel/vsyscall/Makefile b/arch/sh/kernel/vsyscall/Makefile

index 4bbce1cfa359991511127b6f53a3f58e2522f1d9..8f0ea5fc835cad01bd8caa79763a2023c28af0ab 100644 (file)
--- a/arch/sh/kernel/vsyscall/Makefile
+++ b/arch/sh/kernel/vsyscall/Makefile
@@ -15,7 +15,7 @@ quiet_cmd_syscall = SYSCALL $@
  export CPPFLAGS_vsyscall.lds += -P -C -Ush
  
  vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \
-               $(call ld-option, -Wl$(comma)--hash-style=sysv)
+               $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
  
  SYSCFLAGS_vsyscall-trapa.so    = $(vsyscall-flags)
  
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile

index 467221dd57024107f544a320fa99daefe2212b63..dfe272d1446592e11d3fac78f2e402dcd35f868e 100644 (file)
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -31,7 +31,6 @@ export BITS    := 32
  #KBUILD_CFLAGS += -g -pipe -fcall-used-g5 -fcall-used-g7
  KBUILD_CFLAGS += -m32 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
  KBUILD_AFLAGS += -m32
-CPPFLAGS_vmlinux.lds += -m32
  
  #LDFLAGS_vmlinux = -N -Ttext 0xf0004000
  #  Since 2.5.40, the first stage is left not btfix-ed.
@@ -45,9 +44,6 @@ else
  
  CHECKFLAGS      += -D__sparc__ -D__sparc_v9__ -D__arch64__ -m64
  
-# Undefine sparc when processing vmlinux.lds - it is used
-# And teach CPP we are doing 64 bit builds (for this case)
-CPPFLAGS_vmlinux.lds += -m64 -Usparc
  LDFLAGS              := -m elf64_sparc
  export BITS          := 64
  
diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h

index becb6bf353a9c36de3b40c3d7dcbc4b80a5e6c47..f49e11cd4ded282c7084a86672c6ee8929fa4989 100644 (file)
--- a/arch/sparc/include/asm/smp_64.h
+++ b/arch/sparc/include/asm/smp_64.h
@@ -36,7 +36,6 @@ extern int sparc64_multi_core;
  
  extern void arch_send_call_function_single_ipi(int cpu);
  extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
  
  /*
   *     General functions that each host system must provide.
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h

index 26cd25c0839977ae3ad1562d1806d9c52b42a5e9..600a79035fa1066038861b0f435e9a44f8c4e49d 100644 (file)
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -12,22 +12,8 @@ static inline int cpu_to_node(int cpu)
  
  #define parent_node(node)      (node)
  
-static inline cpumask_t node_to_cpumask(int node)
-{
-       return numa_cpumask_lookup_table[node];
-}
  #define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
  
-/*
- * Returns a pointer to the cpumask of CPUs on Node 'node'.
- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
- */
-#define node_to_cpumask_ptr(v, node)           \
-               cpumask_t *v = &(numa_cpumask_lookup_table[node])
-
-#define node_to_cpumask_ptr_next(v, node)      \
-                          v = &(numa_cpumask_lookup_table[node])
-
  struct pci_bus;
  #ifdef CONFIG_PCI
  extern int pcibus_to_node(struct pci_bus *pbus);
@@ -71,8 +57,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
  #ifdef CONFIG_SMP
  #define topology_physical_package_id(cpu)      (cpu_data(cpu).proc_id)
  #define topology_core_id(cpu)                  (cpu_data(cpu).core_id)
-#define topology_core_siblings(cpu)            (cpu_core_map[cpu])
-#define topology_thread_siblings(cpu)          (per_cpu(cpu_sibling_map, cpu))
  #define topology_core_cpumask(cpu)             (&cpu_core_map[cpu])
  #define topology_thread_cpumask(cpu)           (&per_cpu(cpu_sibling_map, cpu))
  #define mc_capable()                           (sparc64_multi_core)
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile

index 3a048fad7ee238b1253d1c7de2312a462dad36d6..5b47fab9966e34e8030db1628a47e22a966dedb5 100644 (file)
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -7,7 +7,11 @@ ccflags-y := -Werror
  
  extra-y     := head_$(BITS).o
  extra-y     += init_task.o
-extra-y     += vmlinux.lds
+
+# Undefine sparc when processing vmlinux.lds - it is used
+# And teach CPP we are doing $(BITS) builds (for this case)
+CPPFLAGS_vmlinux.lds := -Usparc -m$(BITS)
+extra-y              += vmlinux.lds
  
  obj-$(CONFIG_SPARC32)   += entry.o wof.o wuf.o
  obj-$(CONFIG_SPARC32)   += etrap_32.o
diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c

index 28125c5b3d3c306e216012ac7b529fa6b0f33e0a..5fe3d65581f7627885b0f3cce00a2c1e51c21670 100644 (file)
--- a/arch/sparc/kernel/init_task.c
+++ b/arch/sparc/kernel/init_task.c
@@ -18,6 +18,5 @@ EXPORT_SYMBOL(init_task);
   * If this is not aligned on a 8k boundry, then you should change code
   * in etrap.S which assumes it.
   */
-union thread_union init_thread_union
-       __attribute__((section (".data.init_task")))
-       = { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c

index f5000a460c05d95fcc1b843af72e9701e97ab131..04e28b2671c8450808097169c1df30be58798706 100644 (file)
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -16,7 +16,6 @@
  #include <linux/signal.h>
  #include <linux/resource.h>
  #include <linux/times.h>
-#include <linux/utsname.h>
  #include <linux/smp.h>
  #include <linux/smp_lock.h>
  #include <linux/sem.h>
diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h

index 15c2d752b2bcaec194903d3eac70a34f1c3b2458..a63c5d2d984917e242441d8b85f856f109f07661 100644 (file)
--- a/arch/sparc/kernel/systbls.h
+++ b/arch/sparc/kernel/systbls.h
@@ -3,10 +3,11 @@
  
  #include <linux/kernel.h>
  #include <linux/types.h>
-#include <linux/utsname.h>
  #include <asm/utrap.h>
  #include <asm/signal.h>
  
+struct new_utsname;
+
  extern asmlinkage unsigned long sys_getpagesize(void);
  extern asmlinkage unsigned long sparc_brk(unsigned long brk);
  extern asmlinkage long sparc_pipe(struct pt_regs *regs);
diff --git a/arch/um/Makefile b/arch/um/Makefile

index 0728def322342b8d0438cf4129893883f9daddea..fc633dbacf84b818e1f6ac64fff887852431526d 100644 (file)
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -96,11 +96,10 @@ CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,)
         $(call cc-option, -fno-stack-protector,) \
         $(call cc-option, -fno-stack-protector-all,)
  
-CONFIG_KERNEL_STACK_ORDER ?= 2
-STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] )
-
-CPPFLAGS_vmlinux.lds = -U$(SUBARCH) -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \
-       -DELF_FORMAT="$(ELF_FORMAT)" -DKERNEL_STACK_SIZE=$(STACK_SIZE)
+# Options used by linker script
+export LDS_START      := $(START)
+export LDS_ELF_ARCH   := $(ELF_ARCH)
+export LDS_ELF_FORMAT := $(ELF_FORMAT)
  
  # The wrappers will select whether using "malloc" or the kernel allocator.
  LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h

index 54f42e8b0105b7112c9d42a47d41f5fc4fc68312..34d813011b7a803ab3ee5ef2ae5221f918a61f90 100644 (file)
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
         unsigned cpu = smp_processor_id();
  
         if(prev != next){
-               cpu_clear(cpu, prev->cpu_vm_mask);
-               cpu_set(cpu, next->cpu_vm_mask);
+               cpumask_clear_cpu(cpu, mm_cpumask(prev));
+               cpumask_set_cpu(cpu, mm_cpumask(next));
                 if(next != &init_mm)
                         __switch_mm(&next->context.id);
         }
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile

index 388ec0a3ea9b0f57cfffe0ad14eb411934069df5..1119233597a1eb2f3ac26a22bb4a05183f32b9d9 100644 (file)
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -3,6 +3,9 @@
  # Licensed under the GPL
  #
  
+CPPFLAGS_vmlinux.lds := -U$(SUBARCH) -DSTART=$(LDS_START) \
+                        -DELF_ARCH=$(LDS_ELF_ARCH)        \
+                        -DELF_FORMAT=$(LDS_ELF_FORMAT)
  extra-y := vmlinux.lds
  clean-files :=
  
diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c

index b25121b537d8dfb6d0588d1aaf4ebc9b915cc663..8aa77b61a5ff09458786c249b1419c8b9dac68ef 100644 (file)
--- a/arch/um/kernel/init_task.c
+++ b/arch/um/kernel/init_task.c
@@ -30,9 +30,8 @@ EXPORT_SYMBOL(init_task);
   * "init_task" linker map entry..
   */
  
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  union thread_union cpu0_irqstack
         __attribute__((__section__(".data.init_irqstack"))) =
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c

index 98351c78bc814de7062dc5e9c213810ee4811de5..106bf27e2a9a5fc96abf5056bfbe070a6e4f709f 100644 (file)
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -111,7 +111,7 @@ void smp_prepare_cpus(unsigned int maxcpus)
         int i;
  
         for (i = 0; i < ncpus; ++i)
-               cpu_set(i, cpu_possible_map);
+               set_cpu_possible(i, true);
  
         cpu_clear(me, cpu_online_map);
         cpu_set(me, cpu_online_map);
diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S

index f8aeb448aab62ba76584f9c0e497e1897e490c6b..16e49bfa2b426c6a324b7102a877a0fb14e94a4e 100644 (file)
--- a/arch/um/kernel/vmlinux.lds.S
+++ b/arch/um/kernel/vmlinux.lds.S
@@ -1,3 +1,6 @@
+
+KERNEL_STACK_SIZE = 4096 * (1 << CONFIG_KERNEL_STACK_ORDER);
+
  #ifdef CONFIG_LD_SCRIPT_STATIC
  #include "uml.lds.S"
  #else
diff --git a/arch/x86/Makefile b/arch/x86/Makefile

index 7983c420eaf2a304843b9b0563b4811626720c71..a012ee8ef803302642a5eac1daa791fe8f7c617a 100644 (file)
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -179,8 +179,8 @@ archclean:
  define archhelp
    echo  '* bzImage      - Compressed kernel image (arch/x86/boot/bzImage)'
    echo  '  install      - Install kernel using'
-  echo  '                  (your) ~/bin/installkernel or'
-  echo  '                  (distribution) /sbin/installkernel or'
+  echo  '                  (your) ~/bin/$(INSTALLKERNEL) or'
+  echo  '                  (distribution) /sbin/$(INSTALLKERNEL) or'
    echo  '                  install to $$(INSTALL_PATH) and run lilo'
    echo  '  fdimage      - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
    echo  '  fdimage144   - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh

index 8d60ee15dfd9b2e5e4ff3808976ea8143e94c05a..d13ec1c386407b294d8baf4bdb1c4d07f11ab691 100644 (file)
--- a/arch/x86/boot/install.sh
+++ b/arch/x86/boot/install.sh
@@ -33,8 +33,8 @@ verify "$3"
  
  # User may have a custom install script
  
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
  
  # Default install - same as make zlilo
  
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h

index 5d367caa0e36c437e17cb29f76dc285d9b6d4a49..549860d3be8f1e8bb856e084e7fac225e6d53045 100644 (file)
--- a/arch/x86/include/asm/cache.h
+++ b/arch/x86/include/asm/cache.h
@@ -1,6 +1,8 @@
  #ifndef _ASM_X86_CACHE_H
  #define _ASM_X86_CACHE_H
  
+#include <linux/linkage.h>
+
  /* L1 cache line size */
  #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
  #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
@@ -13,7 +15,7 @@
  #ifdef CONFIG_SMP
  #define __cacheline_aligned_in_smp                                     \
         __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT))))      \
-       __attribute__((__section__(".data.page_aligned")))
+       __page_aligned_data
  #endif
  #endif
  
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h

index f923203dc39a68f04e6c0ea05fe7915c25b0b969..4a2d4e0c18d99cf635b02820070330f82dccbe63 100644 (file)
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -37,12 +37,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  
         if (likely(prev != next)) {
                 /* stop flush ipis for the previous mm */
-               cpu_clear(cpu, prev->cpu_vm_mask);
+               cpumask_clear_cpu(cpu, mm_cpumask(prev));
  #ifdef CONFIG_SMP
                 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
                 percpu_write(cpu_tlbstate.active_mm, next);
  #endif
-               cpu_set(cpu, next->cpu_vm_mask);
+               cpumask_set_cpu(cpu, mm_cpumask(next));
  
                 /* Re-load page tables */
                 load_cr3(next->pgd);
@@ -58,7 +58,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
                 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
                 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
  
-               if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+               if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
                         /* We were in lazy tlb mode and leave_mm disabled
                          * tlb flush IPI delivery. We must reload CR3
                          * to make sure to use no freed page tables.
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h

index e63cf7d441e1351071997f4524315171c677e952..139d4c1a33a7c85a2989a8ab02ee1f6cee52d601 100644 (file)
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog;
  #define NMI_INVALID    3
  
  struct ctl_table;
-struct file;
-extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
+extern int proc_nmi_enabled(struct ctl_table *, int ,
                         void __user *, size_t *, loff_t *);
  extern int unknown_nmi_panic;
  
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h

index f76a162c082c70f882fb6cdd0a8e73c955d2ef02..ada8c201d5133482b0163fe289699ae61a7e88c5 100644 (file)
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -143,7 +143,11 @@ static inline int __pcibus_to_node(const struct pci_bus *bus)
  static inline const struct cpumask *
  cpumask_of_pcibus(const struct pci_bus *bus)
  {
-       return cpumask_of_node(__pcibus_to_node(bus));
+       int node;
+
+       node = __pcibus_to_node(bus);
+       return (node == -1) ? cpu_online_mask :
+                             cpumask_of_node(node);
  }
  #endif
  
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h

index 6a84ed166aec136334a644cc4fbaa676368ae983..1e796782cd7b9e606e59f2f5c39b4819fb9131c7 100644 (file)
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -121,7 +121,6 @@ static inline void arch_send_call_function_single_ipi(int cpu)
         smp_ops.send_call_func_single_ipi(cpu);
  }
  
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
  static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
  {
         smp_ops.send_call_func_ipi(mask);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c

index 64970b9885f248d7d0388203477611142ba1b918..dc69f28489f5bc9a2873a8d22b084bec313d9912 100644 (file)
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -227,17 +227,14 @@ static struct irq_cfg *get_one_free_irq_cfg(int node)
  
         cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
         if (cfg) {
-               if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
+               if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
                         kfree(cfg);
                         cfg = NULL;
-               } else if (!alloc_cpumask_var_node(&cfg->old_domain,
+               } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
                                                           GFP_ATOMIC, node)) {
                         free_cpumask_var(cfg->domain);
                         kfree(cfg);
                         cfg = NULL;
-               } else {
-                       cpumask_clear(cfg->domain);
-                       cpumask_clear(cfg->old_domain);
                 }
         }
  
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c

index cb66a22d98ad72a3ad9eae5fc5b606beb15bcb94..7ff61d6a188ab2d1779270a6ff937c0a1a40b82a 100644 (file)
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
  /*
   * proc handler for /proc/sys/kernel/nmi
   */
-int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+int proc_nmi_enabled(struct ctl_table *table, int write,
                         void __user *buffer, size_t *length, loff_t *ppos)
  {
         int old_state;
  
         nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
         old_state = nmi_watchdog_enabled;
-       proc_dointvec(table, write, file, buffer, length, ppos);
+       proc_dointvec(table, write, buffer, length, ppos);
         if (!!old_state == !!nmi_watchdog_enabled)
                 return 0;
  
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c

index bca5fba91c9ea3d62eed95643cacdb90e2a32770..f7dd2a7c3bf42b51bb9368a926885c8d55a4153a 100644 (file)
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -5,7 +5,6 @@
  #include <linux/kallsyms.h>
  #include <linux/kprobes.h>
  #include <linux/uaccess.h>
-#include <linux/utsname.h>
  #include <linux/hardirq.h>
  #include <linux/kdebug.h>
  #include <linux/module.h>
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c

index 54b0a3276766c3ac5180d7d6c11469a2c5f00979..a071e6be177e7d94f0b77426006e2f5127761548 100644 (file)
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -5,7 +5,6 @@
  #include <linux/kallsyms.h>
  #include <linux/kprobes.h>
  #include <linux/uaccess.h>
-#include <linux/utsname.h>
  #include <linux/hardirq.h>
  #include <linux/kdebug.h>
  #include <linux/module.h>
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S

index b766e8c7252d4f7815154ad2afd13baff874ea78..218aad7ee76e050202b95a7fc303c19d29697cc9 100644 (file)
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -608,7 +608,7 @@ ENTRY(initial_code)
  /*
   * BSS section
   */
-.section ".bss.page_aligned","wa"
+__PAGE_ALIGNED_BSS
         .align PAGE_SIZE_asm
  #ifdef CONFIG_X86_PAE
  swapper_pg_pmd:
@@ -626,7 +626,7 @@ ENTRY(empty_zero_page)
   * This starts the data section.
   */
  #ifdef CONFIG_X86_PAE
-.section ".data.page_aligned","wa"
+__PAGE_ALIGNED_DATA
         /* Page-aligned for the benefit of paravirt? */
         .align PAGE_SIZE_asm
  ENTRY(swapper_pg_dir)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S

index fa54f78e2a05c94862c56df144160e31474f481c..d0bc0a13a43789edd7f3d7d3d3ac5bebce95f209 100644 (file)
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -418,7 +418,7 @@ ENTRY(phys_base)
  ENTRY(idt_table)
         .skip IDT_ENTRIES * 16
  
-       .section .bss.page_aligned, "aw", @nobits
+       __PAGE_ALIGNED_BSS
         .align PAGE_SIZE
  ENTRY(empty_zero_page)
         .skip PAGE_SIZE
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c

index 270ff83efc11d8dc27089a3fec085fdec9074b5b..3a54dcb9cd0e6c82b8b9231e88afd7305be6e5da 100644 (file)
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   * way process stacks are handled. This is done by having a special
   * "init_task" linker map entry..
   */
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  /*
   * Initial task structure.
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c

index 71f1d99a635d75dd7f30ee1f1876b91112d2a82c..ec6ef60cbd170b0809d20287a60c226fb8fc25ed 100644 (file)
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -67,8 +67,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
  #ifdef CONFIG_SMP
                 preempt_disable();
                 load_LDT(pc);
-               if (!cpus_equal(current->mm->cpu_vm_mask,
-                               cpumask_of_cpu(smp_processor_id())))
+               if (!cpumask_equal(mm_cpumask(current->mm),
+                                  cpumask_of(smp_processor_id())))
                         smp_call_function(flush_ldt, current->mm, 1);
                 preempt_enable();
  #else
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c

index 847ab41603155ad493df143b24e6c9055f0f10e8..5284cd2b57769f53e79f520ecc6f8199720497cf 100644 (file)
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -555,10 +555,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
  void __init init_c1e_mask(void)
  {
         /* If we're using c1e_idle, we need to allocate c1e_mask. */
-       if (pm_idle == c1e_idle) {
-               alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
-               cpumask_clear(c1e_mask);
-       }
+       if (pm_idle == c1e_idle)
+               zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
  }
  
  static int __init idle_setup(char *str)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c

index 09c5e077dff7e0aea7f11cbe3f3c5ca196ef688b..565ebc65920e3e685161758acb03c4f8106c6b40 100644 (file)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1059,12 +1059,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
  #endif
         current_thread_info()->cpu = 0;  /* needed? */
         for_each_possible_cpu(i) {
-               alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
-               alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
-               alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
-               cpumask_clear(per_cpu(cpu_core_map, i));
-               cpumask_clear(per_cpu(cpu_sibling_map, i));
-               cpumask_clear(cpu_data(i).llc_shared_map);
+               zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
+               zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+               zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
         }
         set_cpu_sibling_map(0);
  
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c

index e293ac56c72316a6f3bbaa2192230c1f08f25e7c..dcb00d2785127df39f3814ec3478c64658733582 100644 (file)
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -93,7 +93,6 @@ static struct irqaction irq0  = {
  
  void __init setup_default_timer_irq(void)
  {
-       irq0.mask = cpumask_of_cpu(0);
         setup_irq(0, &irq0);
  }
  
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c

index 9346e102338dfcdab484711c21447433b196bd69..a665c71352b84359d01863bfd3e4864ab28ea85f 100644 (file)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -14,7 +14,6 @@
  #include <linux/spinlock.h>
  #include <linux/kprobes.h>
  #include <linux/uaccess.h>
-#include <linux/utsname.h>
  #include <linux/kdebug.h>
  #include <linux/kernel.h>
  #include <linux/module.h>
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c

index cf53a78e2dcf1b6639dd569b3810da809e437e1b..8cb4974ff5990c19267077d473caeeb504fe5bae 100644 (file)
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void)
  }
  
  #ifdef CONFIG_SYSCTL
-
-static int
-vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
-                      void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-       return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-}
-
  static ctl_table kernel_table2[] = {
         { .procname = "vsyscall64",
           .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
           .mode = 0644,
-         .proc_handler = vsyscall_sysctl_change },
+         .proc_handler = proc_dointvec },
         {}
  };
  
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c

index 82728f2c6d5599ccda0c4cb1dee132ce3305f5ca..f4cee9028cf0b01e11951662b625f63371f627e6 100644 (file)
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
         info.si_errno   = 0;
         info.si_code    = si_code;
         info.si_addr    = (void __user *)address;
+       info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
  
         force_sig_info(si_signo, &info, tsk);
  }
@@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code,
  }
  
  static void
-do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+         unsigned int fault)
  {
         struct task_struct *tsk = current;
         struct mm_struct *mm = tsk->mm;
+       int code = BUS_ADRERR;
  
         up_read(&mm->mmap_sem);
  
@@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
         tsk->thread.error_code  = error_code;
         tsk->thread.trap_no     = 14;
  
-       force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+#ifdef CONFIG_MEMORY_FAILURE
+       if (fault & VM_FAULT_HWPOISON) {
+               printk(KERN_ERR
+       "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+                       tsk->comm, tsk->pid, address);
+               code = BUS_MCEERR_AR;
+       }
+#endif
+       force_sig_info_fault(SIGBUS, code, address, tsk);
  }
  
  static noinline void
@@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
         if (fault & VM_FAULT_OOM) {
                 out_of_memory(regs, error_code, address);
         } else {
-               if (fault & VM_FAULT_SIGBUS)
-                       do_sigbus(regs, error_code, address);
+               if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+                       do_sigbus(regs, error_code, address, fault);
                 else
                         BUG();
         }
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c

index 24952fdc7e407a7cd2f9fe1237012c1c265da11a..dd38bfbefd1fa1972f47403fa6b35e20663e3590 100644 (file)
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -144,6 +144,7 @@ void clflush_cache_range(void *vaddr, unsigned int size)
  
         mb();
  }
+EXPORT_SYMBOL_GPL(clflush_cache_range);
  
  static void __cpa_flush_all(void *arg)
  {
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index c814e144a3f0724b1d4ce56a74ca03b70cc82f25..36fe08eeb5c3e0de7c51094c111a58230351f1d1 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -59,7 +59,8 @@ void leave_mm(int cpu)
  {
         if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
                 BUG();
-       cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
+       cpumask_clear_cpu(cpu,
+                         mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
         load_cr3(swapper_pg_dir);
  }
  EXPORT_SYMBOL_GPL(leave_mm);
@@ -234,8 +235,8 @@ void flush_tlb_current_task(void)
         preempt_disable();
  
         local_flush_tlb();
-       if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-               flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
+       if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+               flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
         preempt_enable();
  }
  
@@ -249,8 +250,8 @@ void flush_tlb_mm(struct mm_struct *mm)
                 else
                         leave_mm(smp_processor_id());
         }
-       if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-               flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
+       if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+               flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
  
         preempt_enable();
  }
@@ -268,8 +269,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
                         leave_mm(smp_processor_id());
         }
  
-       if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-               flush_tlb_others(&mm->cpu_vm_mask, mm, va);
+       if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+               flush_tlb_others(mm_cpumask(mm), mm, va);
  
         preempt_enable();
  }
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c

index 5db96d4304de76d276a2feb933044c3673cb0abe..1331fcf261433b72c9d0a4620326a3898f1761fa 100644 (file)
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -646,7 +646,7 @@ int get_mp_bus_to_node(int busnum)
  
  #else /* CONFIG_X86_32 */
  
-static unsigned char mp_bus_to_node[BUS_NR] = {
+static int mp_bus_to_node[BUS_NR] = {
         [0 ... BUS_NR - 1] = -1
  };
  
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile

index 88112b49f02c163ce245039397c251725779fe0e..6b4ffedb93c9f62e2d65874c7d27ef056fd9e8b9 100644 (file)
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -122,7 +122,7 @@ quiet_cmd_vdso = VDSO    $@
                        $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
                        -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
  
-VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
  GCOV_PROFILE := n
  
  #
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c

index 093dd59b53856c8629cbd48ac7d9f031162d060e..3bf7b1d250ce986d02bc44de0bd2086f6e37b824 100644 (file)
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1165,14 +1165,14 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
         /* Get the "official" set of cpus referring to our pagetable. */
         if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
                 for_each_online_cpu(cpu) {
-                       if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
+                       if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
                             && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
                                 continue;
                         smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
                 }
                 return;
         }
-       cpumask_copy(mask, &mm->cpu_vm_mask);
+       cpumask_copy(mask, mm_cpumask(mm));
  
         /* It's possible that a vcpu may have a stale reference to our
            cr3, because its in lazy mode, and it hasn't yet flushed
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile

index fe3186de6a336abf59cfd2f72b63d1bbc78d4721..6f56d95f2c1ea48f6c9c522d503fc78109c9492b 100644 (file)
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -27,7 +27,8 @@ sed-y = -e 's/(\(\.[a-z]*it\|\.ref\|\)\.text)/(\1.literal \1.text)/g' \
         -e 's/(\(\.text\.[a-z]*\))/(\1.literal \1)/g'
  
  quiet_cmd__cpp_lds_S = LDS     $@
-      cmd__cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ $< | sed $(sed-y) >$@
+      cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ $< \
+                       | sed $(sed-y) >$@
  
  $(obj)/vmlinux.lds: $(src)/vmlinux.lds.S FORCE
         $(call if_changed_dep,_cpp_lds_S)
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S

index d9ddc1ba761cf1d53f59cfb83c2c0405c4ab87cc..d215adcfd4ea19635fcbaec87a82b919592c57d6 100644 (file)
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S
@@ -235,7 +235,7 @@ should_never_return:
   * BSS section
   */
         
-.section ".bss.page_aligned", "w"
+__PAGE_ALIGNED_BSS
  #ifdef CONFIG_MMU
  ENTRY(swapper_pg_dir)
         .fill   PAGE_SIZE, 1, 0
diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c

index c4302f0e4ba08a66b39023d13986dda288a4f487..cd122fb7e48affb0c98758462aa813dd4a382043 100644 (file)
--- a/arch/xtensa/kernel/init_task.c
+++ b/arch/xtensa/kernel/init_task.c
@@ -23,9 +23,8 @@
  
  static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
  static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-union thread_union init_thread_union
-       __attribute__((__section__(".data.init_task"))) =
-{ INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
  
  struct task_struct init_task = INIT_TASK(init_task);
  
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig

index d8fb39145986569d6ac0fc327881aa95edf52e62..e5aeb2b79e6f956998b8bd0b2bc3bc882ab5067c 100644 (file)
--- a/crypto/async_tx/Kconfig
+++ b/crypto/async_tx/Kconfig
@@ -14,3 +14,12 @@ config ASYNC_MEMSET
         tristate
         select ASYNC_CORE
  
+config ASYNC_PQ
+       tristate
+       select ASYNC_CORE
+
+config ASYNC_RAID6_RECOV
+       tristate
+       select ASYNC_CORE
+       select ASYNC_PQ
+
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile

index 27baa7d52fbcf863873d9fd55507115d77e46c28..d1e0e6f72bc14e651c2a9c0a1c1f7936ead53970 100644 (file)
--- a/crypto/async_tx/Makefile
+++ b/crypto/async_tx/Makefile
@@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o
  obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
  obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
  obj-$(CONFIG_ASYNC_XOR) += async_xor.o
+obj-$(CONFIG_ASYNC_PQ) += async_pq.o
+obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
+obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c

index ddccfb01c416b9a636324ee273244b147c644a16..0ec1fb69d4eacc2a310504caa9109186f9e89370 100644 (file)
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -33,28 +33,31 @@
   * async_memcpy - attempt to copy memory with a dma engine.
   * @dest: destination page
   * @src: src page
- * @offset: offset in pages to start transaction
+ * @dest_offset: offset into 'dest' to start transaction
+ * @src_offset: offset into 'src' to start transaction
   * @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
- * @depend_tx: memcpy depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
   */
  struct dma_async_tx_descriptor *
  async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
-       unsigned int src_offset, size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+            unsigned int src_offset, size_t len,
+            struct async_submit_ctl *submit)
  {
-       struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY,
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
                                                       &dest, 1, &src, 1, len);
         struct dma_device *device = chan ? chan->device : NULL;
         struct dma_async_tx_descriptor *tx = NULL;
  
-       if (device) {
+       if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
                 dma_addr_t dma_dest, dma_src;
-               unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+               unsigned long dma_prep_flags = 0;
  
+               if (submit->cb_fn)
+                       dma_prep_flags |= DMA_PREP_INTERRUPT;
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_prep_flags |= DMA_PREP_FENCE;
                 dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
                                         DMA_FROM_DEVICE);
  
@@ -67,13 +70,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
  
         if (tx) {
                 pr_debug("%s: (async) len: %zu\n", __func__, len);
-               async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+               async_tx_submit(chan, tx, submit);
         } else {
                 void *dest_buf, *src_buf;
                 pr_debug("%s: (sync) len: %zu\n", __func__, len);
  
                 /* wait for any prerequisite operations */
-               async_tx_quiesce(&depend_tx);
+               async_tx_quiesce(&submit->depend_tx);
  
                 dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
                 src_buf = kmap_atomic(src, KM_USER1) + src_offset;
@@ -83,26 +86,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
                 kunmap_atomic(dest_buf, KM_USER0);
                 kunmap_atomic(src_buf, KM_USER1);
  
-               async_tx_sync_epilog(cb_fn, cb_param);
+               async_tx_sync_epilog(submit);
         }
  
         return tx;
  }
  EXPORT_SYMBOL_GPL(async_memcpy);
  
-static int __init async_memcpy_init(void)
-{
-       return 0;
-}
-
-static void __exit async_memcpy_exit(void)
-{
-       do { } while (0);
-}
-
-module_init(async_memcpy_init);
-module_exit(async_memcpy_exit);
-
  MODULE_AUTHOR("Intel Corporation");
  MODULE_DESCRIPTION("asynchronous memcpy api");
  MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c

index 5b5eb99bb244311bb1456c62080249d812967be4..58e4a8752aee52c06681bac43055f53586e3b3e1 100644 (file)
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -35,26 +35,26 @@
   * @val: fill value
   * @offset: offset in pages to start transaction
   * @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: memset depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ *
+ * honored flags: ASYNC_TX_ACK
   */
  struct dma_async_tx_descriptor *
-async_memset(struct page *dest, int val, unsigned int offset,
-       size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+async_memset(struct page *dest, int val, unsigned int offset, size_t len,
+            struct async_submit_ctl *submit)
  {
-       struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET,
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
                                                       &dest, 1, NULL, 0, len);
         struct dma_device *device = chan ? chan->device : NULL;
         struct dma_async_tx_descriptor *tx = NULL;
  
-       if (device) {
+       if (device && is_dma_fill_aligned(device, offset, 0, len)) {
                 dma_addr_t dma_dest;
-               unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+               unsigned long dma_prep_flags = 0;
  
+               if (submit->cb_fn)
+                       dma_prep_flags |= DMA_PREP_INTERRUPT;
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_prep_flags |= DMA_PREP_FENCE;
                 dma_dest = dma_map_page(device->dev, dest, offset, len,
                                         DMA_FROM_DEVICE);
  
@@ -64,38 +64,25 @@ async_memset(struct page *dest, int val, unsigned int offset,
  
         if (tx) {
                 pr_debug("%s: (async) len: %zu\n", __func__, len);
-               async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+               async_tx_submit(chan, tx, submit);
         } else { /* run the memset synchronously */
                 void *dest_buf;
                 pr_debug("%s: (sync) len: %zu\n", __func__, len);
  
-               dest_buf = (void *) (((char *) page_address(dest)) + offset);
+               dest_buf = page_address(dest) + offset;
  
                 /* wait for any prerequisite operations */
-               async_tx_quiesce(&depend_tx);
+               async_tx_quiesce(&submit->depend_tx);
  
                 memset(dest_buf, val, len);
  
-               async_tx_sync_epilog(cb_fn, cb_param);
+               async_tx_sync_epilog(submit);
         }
  
         return tx;
  }
  EXPORT_SYMBOL_GPL(async_memset);
  
-static int __init async_memset_init(void)
-{
-       return 0;
-}
-
-static void __exit async_memset_exit(void)
-{
-       do { } while (0);
-}
-
-module_init(async_memset_init);
-module_exit(async_memset_exit);
-
  MODULE_AUTHOR("Intel Corporation");
  MODULE_DESCRIPTION("asynchronous memset api");
  MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c

new file mode 100644 (file)

index 0000000..b88db6d
--- /dev/null
+++ b/crypto/async_tx/async_pq.c
@@ -0,0 +1,395 @@
+/*
+ * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com>
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+/**
+ * scribble - space to hold throwaway P buffer for synchronous gen_syndrome
+ */
+static struct page *scribble;
+
+static bool is_raid6_zero_block(struct page *p)
+{
+       return p == (void *) raid6_empty_zero_page;
+}
+
+/* the struct page *blocks[] parameter passed to async_gen_syndrome()
+ * and async_syndrome_val() contains the 'P' destination address at
+ * blocks[disks-2] and the 'Q' destination address at blocks[disks-1]
+ *
+ * note: these are macros as they are used as lvalues
+ */
+#define P(b, d) (b[d-2])
+#define Q(b, d) (b[d-1])
+
+/**
+ * do_async_gen_syndrome - asynchronously calculate P and/or Q
+ */
+static __async_inline struct dma_async_tx_descriptor *
+do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
+                     const unsigned char *scfs, unsigned int offset, int disks,
+                     size_t len, dma_addr_t *dma_src,
+                     struct async_submit_ctl *submit)
+{
+       struct dma_async_tx_descriptor *tx = NULL;
+       struct dma_device *dma = chan->device;
+       enum dma_ctrl_flags dma_flags = 0;
+       enum async_tx_flags flags_orig = submit->flags;
+       dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+       dma_async_tx_callback cb_param_orig = submit->cb_param;
+       int src_cnt = disks - 2;
+       unsigned char coefs[src_cnt];
+       unsigned short pq_src_cnt;
+       dma_addr_t dma_dest[2];
+       int src_off = 0;
+       int idx;
+       int i;
+
+       /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
+       if (P(blocks, disks))
+               dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
+                                          len, DMA_BIDIRECTIONAL);
+       else
+               dma_flags |= DMA_PREP_PQ_DISABLE_P;
+       if (Q(blocks, disks))
+               dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
+                                          len, DMA_BIDIRECTIONAL);
+       else
+               dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+
+       /* convert source addresses being careful to collapse 'empty'
+        * sources and update the coefficients accordingly
+        */
+       for (i = 0, idx = 0; i < src_cnt; i++) {
+               if (is_raid6_zero_block(blocks[i]))
+                       continue;
+               dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
+                                           DMA_TO_DEVICE);
+               coefs[idx] = scfs[i];
+               idx++;
+       }
+       src_cnt = idx;
+
+       while (src_cnt > 0) {
+               submit->flags = flags_orig;
+               pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
+               /* if we are submitting additional pqs, leave the chain open,
+                * clear the callback parameters, and leave the destination
+                * buffers mapped
+                */
+               if (src_cnt > pq_src_cnt) {
+                       submit->flags &= ~ASYNC_TX_ACK;
+                       submit->flags |= ASYNC_TX_FENCE;
+                       dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
+                       submit->cb_fn = NULL;
+                       submit->cb_param = NULL;
+               } else {
+                       dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
+                       submit->cb_fn = cb_fn_orig;
+                       submit->cb_param = cb_param_orig;
+                       if (cb_fn_orig)
+                               dma_flags |= DMA_PREP_INTERRUPT;
+               }
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_flags |= DMA_PREP_FENCE;
+
+               /* Since we have clobbered the src_list we are committed
+                * to doing this asynchronously.  Drivers force forward
+                * progress in case they can not provide a descriptor
+                */
+               for (;;) {
+                       tx = dma->device_prep_dma_pq(chan, dma_dest,
+                                                    &dma_src[src_off],
+                                                    pq_src_cnt,
+                                                    &coefs[src_off], len,
+                                                    dma_flags);
+                       if (likely(tx))
+                               break;
+                       async_tx_quiesce(&submit->depend_tx);
+                       dma_async_issue_pending(chan);
+               }
+
+               async_tx_submit(chan, tx, submit);
+               submit->depend_tx = tx;
+
+               /* drop completed sources */
+               src_cnt -= pq_src_cnt;
+               src_off += pq_src_cnt;
+
+               dma_flags |= DMA_PREP_CONTINUE;
+       }
+
+       return tx;
+}
+
+/**
+ * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
+ */
+static void
+do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+                    size_t len, struct async_submit_ctl *submit)
+{
+       void **srcs;
+       int i;
+
+       if (submit->scribble)
+               srcs = submit->scribble;
+       else
+               srcs = (void **) blocks;
+
+       for (i = 0; i < disks; i++) {
+               if (is_raid6_zero_block(blocks[i])) {
+                       BUG_ON(i > disks - 3); /* P or Q can't be zero */
+                       srcs[i] = blocks[i];
+               } else
+                       srcs[i] = page_address(blocks[i]) + offset;
+       }
+       raid6_call.gen_syndrome(disks, len, srcs);
+       async_tx_sync_epilog(submit);
+}
+
+/**
+ * async_gen_syndrome - asynchronously calculate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @submit: submission/completion modifiers
+ *
+ * General note: This routine assumes a field of GF(2^8) with a
+ * primitive polynomial of 0x11d and a generator of {02}.
+ *
+ * 'disks' note: callers can optionally omit either P or Q (but not
+ * both) from the calculation by setting blocks[disks-2] or
+ * blocks[disks-1] to NULL.  When P or Q is omitted 'len' must be <=
+ * PAGE_SIZE as a temporary buffer of this size is used in the
+ * synchronous path.  'disks' always accounts for both destination
+ * buffers.
+ *
+ * 'blocks' note: if submit->scribble is NULL then the contents of
+ * 'blocks' may be overridden
+ */
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+                  size_t len, struct async_submit_ctl *submit)
+{
+       int src_cnt = disks - 2;
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+                                                     &P(blocks, disks), 2,
+                                                     blocks, src_cnt, len);
+       struct dma_device *device = chan ? chan->device : NULL;
+       dma_addr_t *dma_src = NULL;
+
+       BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
+
+       if (submit->scribble)
+               dma_src = submit->scribble;
+       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+               dma_src = (dma_addr_t *) blocks;
+
+       if (dma_src && device &&
+           (src_cnt <= dma_maxpq(device, 0) ||
+            dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
+           is_dma_pq_aligned(device, offset, 0, len)) {
+               /* run the p+q asynchronously */
+               pr_debug("%s: (async) disks: %d len: %zu\n",
+                        __func__, disks, len);
+               return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
+                                            disks, len, dma_src, submit);
+       }
+
+       /* run the pq synchronously */
+       pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
+
+       /* wait for any prerequisite operations */
+       async_tx_quiesce(&submit->depend_tx);
+
+       if (!P(blocks, disks)) {
+               P(blocks, disks) = scribble;
+               BUG_ON(len + offset > PAGE_SIZE);
+       }
+       if (!Q(blocks, disks)) {
+               Q(blocks, disks) = scribble;
+               BUG_ON(len + offset > PAGE_SIZE);
+       }
+       do_sync_gen_syndrome(blocks, offset, disks, len, submit);
+
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(async_gen_syndrome);
+
+/**
+ * async_syndrome_val - asynchronously validate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
+ * @spare: temporary result buffer for the synchronous case
+ * @submit: submission / completion modifiers
+ *
+ * The same notes from async_gen_syndrome apply to the 'blocks',
+ * and 'disks' parameters of this routine.  The synchronous path
+ * requires a temporary result buffer and submit->scribble to be
+ * specified.
+ */
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
+                  size_t len, enum sum_check_flags *pqres, struct page *spare,
+                  struct async_submit_ctl *submit)
+{
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
+                                                     NULL, 0,  blocks, disks,
+                                                     len);
+       struct dma_device *device = chan ? chan->device : NULL;
+       struct dma_async_tx_descriptor *tx;
+       enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+       dma_addr_t *dma_src = NULL;
+
+       BUG_ON(disks < 4);
+
+       if (submit->scribble)
+               dma_src = submit->scribble;
+       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+               dma_src = (dma_addr_t *) blocks;
+
+       if (dma_src && device && disks <= dma_maxpq(device, 0) &&
+           is_dma_pq_aligned(device, offset, 0, len)) {
+               struct device *dev = device->dev;
+               dma_addr_t *pq = &dma_src[disks-2];
+               int i;
+
+               pr_debug("%s: (async) disks: %d len: %zu\n",
+                        __func__, disks, len);
+               if (!P(blocks, disks))
+                       dma_flags |= DMA_PREP_PQ_DISABLE_P;
+               if (!Q(blocks, disks))
+                       dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_flags |= DMA_PREP_FENCE;
+               for (i = 0; i < disks; i++)
+                       if (likely(blocks[i])) {
+                               BUG_ON(is_raid6_zero_block(blocks[i]));
+                               dma_src[i] = dma_map_page(dev, blocks[i],
+                                                         offset, len,
+                                                         DMA_TO_DEVICE);
+                       }
+
+               for (;;) {
+                       tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
+                                                           disks - 2,
+                                                           raid6_gfexp,
+                                                           len, pqres,
+                                                           dma_flags);
+                       if (likely(tx))
+                               break;
+                       async_tx_quiesce(&submit->depend_tx);
+                       dma_async_issue_pending(chan);
+               }
+               async_tx_submit(chan, tx, submit);
+
+               return tx;
+       } else {
+               struct page *p_src = P(blocks, disks);
+               struct page *q_src = Q(blocks, disks);
+               enum async_tx_flags flags_orig = submit->flags;
+               dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+               void *scribble = submit->scribble;
+               void *cb_param_orig = submit->cb_param;
+               void *p, *q, *s;
+
+               pr_debug("%s: (sync) disks: %d len: %zu\n",
+                        __func__, disks, len);
+
+               /* caller must provide a temporary result buffer and
+                * allow the input parameters to be preserved
+                */
+               BUG_ON(!spare || !scribble);
+
+               /* wait for any prerequisite operations */
+               async_tx_quiesce(&submit->depend_tx);
+
+               /* recompute p and/or q into the temporary buffer and then
+                * check to see the result matches the current value
+                */
+               tx = NULL;
+               *pqres = 0;
+               if (p_src) {
+                       init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+                                         NULL, NULL, scribble);
+                       tx = async_xor(spare, blocks, offset, disks-2, len, submit);
+                       async_tx_quiesce(&tx);
+                       p = page_address(p_src) + offset;
+                       s = page_address(spare) + offset;
+                       *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
+               }
+
+               if (q_src) {
+                       P(blocks, disks) = NULL;
+                       Q(blocks, disks) = spare;
+                       init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
+                       tx = async_gen_syndrome(blocks, offset, disks, len, submit);
+                       async_tx_quiesce(&tx);
+                       q = page_address(q_src) + offset;
+                       s = page_address(spare) + offset;
+                       *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
+               }
+
+               /* restore P, Q and submit */
+               P(blocks, disks) = p_src;
+               Q(blocks, disks) = q_src;
+
+               submit->cb_fn = cb_fn_orig;
+               submit->cb_param = cb_param_orig;
+               submit->flags = flags_orig;
+               async_tx_sync_epilog(submit);
+
+               return NULL;
+       }
+}
+EXPORT_SYMBOL_GPL(async_syndrome_val);
+
+static int __init async_pq_init(void)
+{
+       scribble = alloc_page(GFP_KERNEL);
+
+       if (scribble)
+               return 0;
+
+       pr_err("%s: failed to allocate required spare page\n", __func__);
+
+       return -ENOMEM;
+}
+
+static void __exit async_pq_exit(void)
+{
+       put_page(scribble);
+}
+
+module_init(async_pq_init);
+module_exit(async_pq_exit);
+
+MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c

new file mode 100644 (file)

index 0000000..6d73dde
--- /dev/null
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -0,0 +1,468 @@
+/*
+ * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * based on raid6recov.c:
+ *   Copyright 2002 H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+static struct dma_async_tx_descriptor *
+async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
+                 size_t len, struct async_submit_ctl *submit)
+{
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+                                                     &dest, 1, srcs, 2, len);
+       struct dma_device *dma = chan ? chan->device : NULL;
+       const u8 *amul, *bmul;
+       u8 ax, bx;
+       u8 *a, *b, *c;
+
+       if (dma) {
+               dma_addr_t dma_dest[2];
+               dma_addr_t dma_src[2];
+               struct device *dev = dma->dev;
+               struct dma_async_tx_descriptor *tx;
+               enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_flags |= DMA_PREP_FENCE;
+               dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+               dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
+               dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
+               tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
+                                            len, dma_flags);
+               if (tx) {
+                       async_tx_submit(chan, tx, submit);
+                       return tx;
+               }
+
+               /* could not get a descriptor, unmap and fall through to
+                * the synchronous path
+                */
+               dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
+               dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+               dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
+       }
+
+       /* run the operation synchronously */
+       async_tx_quiesce(&submit->depend_tx);
+       amul = raid6_gfmul[coef[0]];
+       bmul = raid6_gfmul[coef[1]];
+       a = page_address(srcs[0]);
+       b = page_address(srcs[1]);
+       c = page_address(dest);
+
+       while (len--) {
+               ax    = amul[*a++];
+               bx    = bmul[*b++];
+               *c++ = ax ^ bx;
+       }
+
+       return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
+          struct async_submit_ctl *submit)
+{
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+                                                     &dest, 1, &src, 1, len);
+       struct dma_device *dma = chan ? chan->device : NULL;
+       const u8 *qmul; /* Q multiplier table */
+       u8 *d, *s;
+
+       if (dma) {
+               dma_addr_t dma_dest[2];
+               dma_addr_t dma_src[1];
+               struct device *dev = dma->dev;
+               struct dma_async_tx_descriptor *tx;
+               enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_flags |= DMA_PREP_FENCE;
+               dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+               dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
+               tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
+                                            len, dma_flags);
+               if (tx) {
+                       async_tx_submit(chan, tx, submit);
+                       return tx;
+               }
+
+               /* could not get a descriptor, unmap and fall through to
+                * the synchronous path
+                */
+               dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
+               dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+       }
+
+       /* no channel available, or failed to allocate a descriptor, so
+        * perform the operation synchronously
+        */
+       async_tx_quiesce(&submit->depend_tx);
+       qmul  = raid6_gfmul[coef];
+       d = page_address(dest);
+       s = page_address(src);
+
+       while (len--)
+               *d++ = qmul[*s++];
+
+       return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
+             struct async_submit_ctl *submit)
+{
+       struct dma_async_tx_descriptor *tx = NULL;
+       struct page *p, *q, *a, *b;
+       struct page *srcs[2];
+       unsigned char coef[2];
+       enum async_tx_flags flags = submit->flags;
+       dma_async_tx_callback cb_fn = submit->cb_fn;
+       void *cb_param = submit->cb_param;
+       void *scribble = submit->scribble;
+
+       p = blocks[4-2];
+       q = blocks[4-1];
+
+       a = blocks[faila];
+       b = blocks[failb];
+
+       /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
+       /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+       srcs[0] = p;
+       srcs[1] = q;
+       coef[0] = raid6_gfexi[failb-faila];
+       coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_sum_product(b, srcs, coef, bytes, submit);
+
+       /* Dy = P+Pxy+Dx */
+       srcs[0] = p;
+       srcs[1] = b;
+       init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn,
+                         cb_param, scribble);
+       tx = async_xor(a, srcs, 0, 2, bytes, submit);
+
+       return tx;
+
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
+             struct async_submit_ctl *submit)
+{
+       struct dma_async_tx_descriptor *tx = NULL;
+       struct page *p, *q, *g, *dp, *dq;
+       struct page *srcs[2];
+       unsigned char coef[2];
+       enum async_tx_flags flags = submit->flags;
+       dma_async_tx_callback cb_fn = submit->cb_fn;
+       void *cb_param = submit->cb_param;
+       void *scribble = submit->scribble;
+       int uninitialized_var(good);
+       int i;
+
+       for (i = 0; i < 3; i++) {
+               if (i == faila || i == failb)
+                       continue;
+               else {
+                       good = i;
+                       break;
+               }
+       }
+       BUG_ON(i >= 3);
+
+       p = blocks[5-2];
+       q = blocks[5-1];
+       g = blocks[good];
+
+       /* Compute syndrome with zero for the missing data pages
+        * Use the dead data pages as temporary storage for delta p and
+        * delta q
+        */
+       dp = blocks[faila];
+       dq = blocks[failb];
+
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_memcpy(dp, g, 0, 0, bytes, submit);
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+
+       /* compute P + Pxy */
+       srcs[0] = dp;
+       srcs[1] = p;
+       init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+                         NULL, NULL, scribble);
+       tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+       /* compute Q + Qxy */
+       srcs[0] = dq;
+       srcs[1] = q;
+       init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+                         NULL, NULL, scribble);
+       tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+       /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+       srcs[0] = dp;
+       srcs[1] = dq;
+       coef[0] = raid6_gfexi[failb-faila];
+       coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+       /* Dy = P+Pxy+Dx */
+       srcs[0] = dp;
+       srcs[1] = dq;
+       init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+                         cb_param, scribble);
+       tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+       return tx;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_n(int disks, size_t bytes, int faila, int failb,
+             struct page **blocks, struct async_submit_ctl *submit)
+{
+       struct dma_async_tx_descriptor *tx = NULL;
+       struct page *p, *q, *dp, *dq;
+       struct page *srcs[2];
+       unsigned char coef[2];
+       enum async_tx_flags flags = submit->flags;
+       dma_async_tx_callback cb_fn = submit->cb_fn;
+       void *cb_param = submit->cb_param;
+       void *scribble = submit->scribble;
+
+       p = blocks[disks-2];
+       q = blocks[disks-1];
+
+       /* Compute syndrome with zero for the missing data pages
+        * Use the dead data pages as temporary storage for
+        * delta p and delta q
+        */
+       dp = blocks[faila];
+       blocks[faila] = (void *)raid6_empty_zero_page;
+       blocks[disks-2] = dp;
+       dq = blocks[failb];
+       blocks[failb] = (void *)raid6_empty_zero_page;
+       blocks[disks-1] = dq;
+
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+
+       /* Restore pointer table */
+       blocks[faila]   = dp;
+       blocks[failb]   = dq;
+       blocks[disks-2] = p;
+       blocks[disks-1] = q;
+
+       /* compute P + Pxy */
+       srcs[0] = dp;
+       srcs[1] = p;
+       init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+                         NULL, NULL, scribble);
+       tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+       /* compute Q + Qxy */
+       srcs[0] = dq;
+       srcs[1] = q;
+       init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+                         NULL, NULL, scribble);
+       tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+       /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+       srcs[0] = dp;
+       srcs[1] = dq;
+       coef[0] = raid6_gfexi[failb-faila];
+       coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+       /* Dy = P+Pxy+Dx */
+       srcs[0] = dp;
+       srcs[1] = dq;
+       init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+                         cb_param, scribble);
+       tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+       return tx;
+}
+
+/**
+ * async_raid6_2data_recov - asynchronously calculate two missing data blocks
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: first failed drive index
+ * @failb: second failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+                       struct page **blocks, struct async_submit_ctl *submit)
+{
+       BUG_ON(faila == failb);
+       if (failb < faila)
+               swap(faila, failb);
+
+       pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+       /* we need to preserve the contents of 'blocks' for the async
+        * case, so punt to synchronous if a scribble buffer is not available
+        */
+       if (!submit->scribble) {
+               void **ptrs = (void **) blocks;
+               int i;
+
+               async_tx_quiesce(&submit->depend_tx);
+               for (i = 0; i < disks; i++)
+                       ptrs[i] = page_address(blocks[i]);
+
+               raid6_2data_recov(disks, bytes, faila, failb, ptrs);
+
+               async_tx_sync_epilog(submit);
+
+               return NULL;
+       }
+
+       switch (disks) {
+       case 4:
+               /* dma devices do not uniformly understand a zero source pq
+                * operation (in contrast to the synchronous case), so
+                * explicitly handle the 4 disk special case
+                */
+               return __2data_recov_4(bytes, faila, failb, blocks, submit);
+       case 5:
+               /* dma devices do not uniformly understand a single
+                * source pq operation (in contrast to the synchronous
+                * case), so explicitly handle the 5 disk special case
+                */
+               return __2data_recov_5(bytes, faila, failb, blocks, submit);
+       default:
+               return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
+       }
+}
+EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
+
+/**
+ * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_datap_recov(int disks, size_t bytes, int faila,
+                       struct page **blocks, struct async_submit_ctl *submit)
+{
+       struct dma_async_tx_descriptor *tx = NULL;
+       struct page *p, *q, *dq;
+       u8 coef;
+       enum async_tx_flags flags = submit->flags;
+       dma_async_tx_callback cb_fn = submit->cb_fn;
+       void *cb_param = submit->cb_param;
+       void *scribble = submit->scribble;
+       struct page *srcs[2];
+
+       pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+       /* we need to preserve the contents of 'blocks' for the async
+        * case, so punt to synchronous if a scribble buffer is not available
+        */
+       if (!scribble) {
+               void **ptrs = (void **) blocks;
+               int i;
+
+               async_tx_quiesce(&submit->depend_tx);
+               for (i = 0; i < disks; i++)
+                       ptrs[i] = page_address(blocks[i]);
+
+               raid6_datap_recov(disks, bytes, faila, ptrs);
+
+               async_tx_sync_epilog(submit);
+
+               return NULL;
+       }
+
+       p = blocks[disks-2];
+       q = blocks[disks-1];
+
+       /* Compute syndrome with zero for the missing data page
+        * Use the dead data page as temporary storage for delta q
+        */
+       dq = blocks[faila];
+       blocks[faila] = (void *)raid6_empty_zero_page;
+       blocks[disks-1] = dq;
+
+       /* in the 4 disk case we only need to perform a single source
+        * multiplication
+        */
+       if (disks == 4) {
+               int good = faila == 0 ? 1 : 0;
+               struct page *g = blocks[good];
+
+               init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+                                 scribble);
+               tx = async_memcpy(p, g, 0, 0, bytes, submit);
+
+               init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+                                 scribble);
+               tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+       } else {
+               init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+                                 scribble);
+               tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+       }
+
+       /* Restore pointer table */
+       blocks[faila]   = dq;
+       blocks[disks-1] = q;
+
+       /* calculate g^{-faila} */
+       coef = raid6_gfinv[raid6_gfexp[faila]];
+
+       srcs[0] = dq;
+       srcs[1] = q;
+       init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+                         NULL, NULL, scribble);
+       tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_mult(dq, dq, coef, bytes, submit);
+
+       srcs[0] = p;
+       srcs[1] = dq;
+       init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+                         cb_param, scribble);
+       tx = async_xor(p, srcs, 0, 2, bytes, submit);
+
+       return tx;
+}
+EXPORT_SYMBOL_GPL(async_raid6_datap_recov);
+
+MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
+MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c

index 06eb6cc09fef97714d12ebf8859e5e712d798df5..f9cdf04fe7c0a370908c8f6f5f3c04c9fb3d35f7 100644 (file)
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -42,16 +42,21 @@ static void __exit async_tx_exit(void)
         async_dmaengine_put();
  }
  
+module_init(async_tx_init);
+module_exit(async_tx_exit);
+
  /**
   * __async_tx_find_channel - find a channel to carry out the operation or let
   *     the transaction execute synchronously
- * @depend_tx: transaction dependency
+ * @submit: transaction dependency and submission modifiers
   * @tx_type: transaction type
   */
  struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-       enum dma_transaction_type tx_type)
+__async_tx_find_channel(struct async_submit_ctl *submit,
+                       enum dma_transaction_type tx_type)
  {
+       struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
         /* see if we can keep the chain on one channel */
         if (depend_tx &&
             dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
@@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
         return async_dma_find_channel(tx_type);
  }
  EXPORT_SYMBOL_GPL(__async_tx_find_channel);
-#else
-static int __init async_tx_init(void)
-{
-       printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
-       return 0;
-}
-
-static void __exit async_tx_exit(void)
-{
-       do { } while (0);
-}
  #endif
  
  
@@ -83,10 +77,14 @@ static void
  async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
                         struct dma_async_tx_descriptor *tx)
  {
-       struct dma_chan *chan;
-       struct dma_device *device;
+       struct dma_chan *chan = depend_tx->chan;
+       struct dma_device *device = chan->device;
         struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
  
+       #ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+       BUG();
+       #endif
+
         /* first check to see if we can still append to depend_tx */
         spin_lock_bh(&depend_tx->lock);
         if (depend_tx->parent && depend_tx->chan == tx->chan) {
@@ -96,11 +94,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
         }
         spin_unlock_bh(&depend_tx->lock);
  
-       if (!intr_tx)
+       /* attached dependency, flush the parent channel */
+       if (!intr_tx) {
+               device->device_issue_pending(chan);
                 return;
-
-       chan = depend_tx->chan;
-       device = chan->device;
+       }
  
         /* see if we can schedule an interrupt
          * otherwise poll for completion
@@ -134,6 +132,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
                         intr_tx->tx_submit(intr_tx);
                         async_tx_ack(intr_tx);
                 }
+               device->device_issue_pending(chan);
         } else {
                 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
                         panic("%s: DMA_ERROR waiting for depend_tx\n",
@@ -144,13 +143,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
  
  
  /**
- * submit_disposition - while holding depend_tx->lock we must avoid submitting
- *     new operations to prevent a circular locking dependency with
- *     drivers that already hold a channel lock when calling
- *     async_tx_run_dependencies.
+ * submit_disposition - flags for routing an incoming operation
   * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
   * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
   * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
+ *
+ * while holding depend_tx->lock we must avoid submitting new operations
+ * to prevent a circular locking dependency with drivers that already
+ * hold a channel lock when calling async_tx_run_dependencies.
   */
  enum submit_disposition {
         ASYNC_TX_SUBMITTED,
@@ -160,11 +160,12 @@ enum submit_disposition {
  
  void
  async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
-       enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+               struct async_submit_ctl *submit)
  {
-       tx->callback = cb_fn;
-       tx->callback_param = cb_param;
+       struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
+       tx->callback = submit->cb_fn;
+       tx->callback_param = submit->cb_param;
  
         if (depend_tx) {
                 enum submit_disposition s;
@@ -220,30 +221,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
                 tx->tx_submit(tx);
         }
  
-       if (flags & ASYNC_TX_ACK)
+       if (submit->flags & ASYNC_TX_ACK)
                 async_tx_ack(tx);
  
-       if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
+       if (depend_tx)
                 async_tx_ack(depend_tx);
  }
  EXPORT_SYMBOL_GPL(async_tx_submit);
  
  /**
- * async_trigger_callback - schedules the callback function to be run after
- * any dependent operations have been completed.
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: 'callback' requires the completion of this transaction
- * @cb_fn: function to call after depend_tx completes
- * @cb_param: parameter to pass to the callback routine
+ * async_trigger_callback - schedules the callback function to be run
+ * @submit: submission and completion parameters
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * The callback is run after any dependent operations have completed.
   */
  struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+async_trigger_callback(struct async_submit_ctl *submit)
  {
         struct dma_chan *chan;
         struct dma_device *device;
         struct dma_async_tx_descriptor *tx;
+       struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
  
         if (depend_tx) {
                 chan = depend_tx->chan;
@@ -262,14 +262,14 @@ async_trigger_callback(enum async_tx_flags flags,
         if (tx) {
                 pr_debug("%s: (async)\n", __func__);
  
-               async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+               async_tx_submit(chan, tx, submit);
         } else {
                 pr_debug("%s: (sync)\n", __func__);
  
                 /* wait for any prerequisite operations */
-               async_tx_quiesce(&depend_tx);
+               async_tx_quiesce(&submit->depend_tx);
  
-               async_tx_sync_epilog(cb_fn, cb_param);
+               async_tx_sync_epilog(submit);
         }
  
         return tx;
@@ -295,9 +295,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
  }
  EXPORT_SYMBOL_GPL(async_tx_quiesce);
  
-module_init(async_tx_init);
-module_exit(async_tx_exit);
-
  MODULE_AUTHOR("Intel Corporation");
  MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
  MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c

index 90dd3f8bd283171ca725d75f7daae52fd0089ab1..b459a9034aace5270b4e2d7f73387ca4bac4eb05 100644 (file)
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -33,19 +33,16 @@
  /* do_async_xor - dma map the pages and perform the xor with an engine */
  static __async_inline struct dma_async_tx_descriptor *
  do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
-            unsigned int offset, int src_cnt, size_t len,
-            enum async_tx_flags flags,
-            struct dma_async_tx_descriptor *depend_tx,
-            dma_async_tx_callback cb_fn, void *cb_param)
+            unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
+            struct async_submit_ctl *submit)
  {
         struct dma_device *dma = chan->device;
-       dma_addr_t *dma_src = (dma_addr_t *) src_list;
         struct dma_async_tx_descriptor *tx = NULL;
         int src_off = 0;
         int i;
-       dma_async_tx_callback _cb_fn;
-       void *_cb_param;
-       enum async_tx_flags async_flags;
+       dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+       void *cb_param_orig = submit->cb_param;
+       enum async_tx_flags flags_orig = submit->flags;
         enum dma_ctrl_flags dma_flags;
         int xor_src_cnt;
         dma_addr_t dma_dest;
@@ -63,25 +60,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
         }
  
         while (src_cnt) {
-               async_flags = flags;
+               submit->flags = flags_orig;
                 dma_flags = 0;
-               xor_src_cnt = min(src_cnt, dma->max_xor);
+               xor_src_cnt = min(src_cnt, (int)dma->max_xor);
                 /* if we are submitting additional xors, leave the chain open,
                  * clear the callback parameters, and leave the destination
                  * buffer mapped
                  */
                 if (src_cnt > xor_src_cnt) {
-                       async_flags &= ~ASYNC_TX_ACK;
+                       submit->flags &= ~ASYNC_TX_ACK;
+                       submit->flags |= ASYNC_TX_FENCE;
                         dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
-                       _cb_fn = NULL;
-                       _cb_param = NULL;
+                       submit->cb_fn = NULL;
+                       submit->cb_param = NULL;
                 } else {
-                       _cb_fn = cb_fn;
-                       _cb_param = cb_param;
+                       submit->cb_fn = cb_fn_orig;
+                       submit->cb_param = cb_param_orig;
                 }
-               if (_cb_fn)
+               if (submit->cb_fn)
                         dma_flags |= DMA_PREP_INTERRUPT;
-
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_flags |= DMA_PREP_FENCE;
                 /* Since we have clobbered the src_list we are committed
                  * to doing this asynchronously.  Drivers force forward progress
                  * in case they can not provide a descriptor
@@ -90,7 +89,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
                                               xor_src_cnt, len, dma_flags);
  
                 if (unlikely(!tx))
-                       async_tx_quiesce(&depend_tx);
+                       async_tx_quiesce(&submit->depend_tx);
  
                 /* spin wait for the preceeding transactions to complete */
                 while (unlikely(!tx)) {
@@ -101,11 +100,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
                                                       dma_flags);
                 }
  
-               async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn,
-                               _cb_param);
-
-               depend_tx = tx;
-               flags |= ASYNC_TX_DEP_ACK;
+               async_tx_submit(chan, tx, submit);
+               submit->depend_tx = tx;
  
                 if (src_cnt > xor_src_cnt) {
                         /* drop completed sources */
@@ -124,23 +120,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
  
  static void
  do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
-           int src_cnt, size_t len, enum async_tx_flags flags,
-           dma_async_tx_callback cb_fn, void *cb_param)
+           int src_cnt, size_t len, struct async_submit_ctl *submit)
  {
         int i;
         int xor_src_cnt;
         int src_off = 0;
         void *dest_buf;
-       void **srcs = (void **) src_list;
+       void **srcs;
+
+       if (submit->scribble)
+               srcs = submit->scribble;
+       else
+               srcs = (void **) src_list;
  
-       /* reuse the 'src_list' array to convert to buffer pointers */
+       /* convert to buffer pointers */
         for (i = 0; i < src_cnt; i++)
                 srcs[i] = page_address(src_list[i]) + offset;
  
         /* set destination address */
         dest_buf = page_address(dest) + offset;
  
-       if (flags & ASYNC_TX_XOR_ZERO_DST)
+       if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
                 memset(dest_buf, 0, len);
  
         while (src_cnt > 0) {
@@ -153,61 +153,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
                 src_off += xor_src_cnt;
         }
  
-       async_tx_sync_epilog(cb_fn, cb_param);
+       async_tx_sync_epilog(submit);
  }
  
  /**
   * async_xor - attempt to xor a set of blocks with a dma engine.
- *     xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
- *     flag must be set to not include dest data in the calculation.  The
- *     assumption with dma eninges is that they only use the destination
- *     buffer as a source when it is explicity specified in the source list.
   * @dest: destination page
- * @src_list: array of source pages (if the dest is also a source it must be
- *     at index zero).  The contents of this array may be overwritten.
- * @offset: offset in pages to start transaction
+ * @src_list: array of source pages
+ * @offset: common src/dst offset to start transaction
   * @src_cnt: number of source pages
   * @len: length in bytes
- * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
- *     ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
+ *
+ * xor_blocks always uses the dest as a source so the
+ * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
+ * the calculation.  The assumption with dma eninges is that they only
+ * use the destination buffer as a source when it is explicity specified
+ * in the source list.
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
   */
  struct dma_async_tx_descriptor *
  async_xor(struct page *dest, struct page **src_list, unsigned int offset,
-       int src_cnt, size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+         int src_cnt, size_t len, struct async_submit_ctl *submit)
  {
-       struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
                                                       &dest, 1, src_list,
                                                       src_cnt, len);
+       dma_addr_t *dma_src = NULL;
+
         BUG_ON(src_cnt <= 1);
  
-       if (chan) {
+       if (submit->scribble)
+               dma_src = submit->scribble;
+       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+               dma_src = (dma_addr_t *) src_list;
+
+       if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
                 /* run the xor asynchronously */
                 pr_debug("%s (async): len: %zu\n", __func__, len);
  
                 return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
-                                   flags, depend_tx, cb_fn, cb_param);
+                                   dma_src, submit);
         } else {
                 /* run the xor synchronously */
                 pr_debug("%s (sync): len: %zu\n", __func__, len);
+               WARN_ONCE(chan, "%s: no space for dma address conversion\n",
+                         __func__);
  
                 /* in the sync case the dest is an implied source
                  * (assumes the dest is the first source)
                  */
-               if (flags & ASYNC_TX_XOR_DROP_DST) {
+               if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
                         src_cnt--;
                         src_list++;
                 }
  
                 /* wait for any prerequisite operations */
-               async_tx_quiesce(&depend_tx);
+               async_tx_quiesce(&submit->depend_tx);
  
-               do_sync_xor(dest, src_list, offset, src_cnt, len,
-                           flags, cb_fn, cb_param);
+               do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
  
                 return NULL;
         }
@@ -222,104 +231,94 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
  }
  
  /**
- * async_xor_zero_sum - attempt a xor parity check with a dma engine.
+ * async_xor_val - attempt a xor parity check with a dma engine.
   * @dest: destination page used if the xor is performed synchronously
- * @src_list: array of source pages.  The dest page must be listed as a source
- *     at index zero.  The contents of this array may be overwritten.
+ * @src_list: array of source pages
   * @offset: offset in pages to start transaction
   * @src_cnt: number of source pages
   * @len: length in bytes
   * @result: 0 if sum == 0 else non-zero
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
   */
  struct dma_async_tx_descriptor *
-async_xor_zero_sum(struct page *dest, struct page **src_list,
-       unsigned int offset, int src_cnt, size_t len,
-       u32 *result, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+             int src_cnt, size_t len, enum sum_check_flags *result,
+             struct async_submit_ctl *submit)
  {
-       struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM,
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
                                                       &dest, 1, src_list,
                                                       src_cnt, len);
         struct dma_device *device = chan ? chan->device : NULL;
         struct dma_async_tx_descriptor *tx = NULL;
+       dma_addr_t *dma_src = NULL;
  
         BUG_ON(src_cnt <= 1);
  
-       if (device && src_cnt <= device->max_xor) {
-               dma_addr_t *dma_src = (dma_addr_t *) src_list;
-               unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+       if (submit->scribble)
+               dma_src = submit->scribble;
+       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+               dma_src = (dma_addr_t *) src_list;
+
+       if (dma_src && device && src_cnt <= device->max_xor &&
+           is_dma_xor_aligned(device, offset, 0, len)) {
+               unsigned long dma_prep_flags = 0;
                 int i;
  
                 pr_debug("%s: (async) len: %zu\n", __func__, len);
  
+               if (submit->cb_fn)
+                       dma_prep_flags |= DMA_PREP_INTERRUPT;
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_prep_flags |= DMA_PREP_FENCE;
                 for (i = 0; i < src_cnt; i++)
                         dma_src[i] = dma_map_page(device->dev, src_list[i],
                                                   offset, len, DMA_TO_DEVICE);
  
-               tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt,
-                                                     len, result,
-                                                     dma_prep_flags);
+               tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
+                                                    len, result,
+                                                    dma_prep_flags);
                 if (unlikely(!tx)) {
-                       async_tx_quiesce(&depend_tx);
+                       async_tx_quiesce(&submit->depend_tx);
  
                         while (!tx) {
                                 dma_async_issue_pending(chan);
-                               tx = device->device_prep_dma_zero_sum(chan,
+                               tx = device->device_prep_dma_xor_val(chan,
                                         dma_src, src_cnt, len, result,
                                         dma_prep_flags);
                         }
                 }
  
-               async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+               async_tx_submit(chan, tx, submit);
         } else {
-               unsigned long xor_flags = flags;
+               enum async_tx_flags flags_orig = submit->flags;
  
                 pr_debug("%s: (sync) len: %zu\n", __func__, len);
+               WARN_ONCE(device && src_cnt <= device->max_xor,
+                         "%s: no space for dma address conversion\n",
+                         __func__);
  
-               xor_flags |= ASYNC_TX_XOR_DROP_DST;
-               xor_flags &= ~ASYNC_TX_ACK;
+               submit->flags |= ASYNC_TX_XOR_DROP_DST;
+               submit->flags &= ~ASYNC_TX_ACK;
  
-               tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
-                       depend_tx, NULL, NULL);
+               tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
  
                 async_tx_quiesce(&tx);
  
-               *result = page_is_zero(dest, offset, len) ? 0 : 1;
+               *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
  
-               async_tx_sync_epilog(cb_fn, cb_param);
+               async_tx_sync_epilog(submit);
+               submit->flags = flags_orig;
         }
  
         return tx;
  }
-EXPORT_SYMBOL_GPL(async_xor_zero_sum);
-
-static int __init async_xor_init(void)
-{
-       #ifdef CONFIG_ASYNC_TX_DMA
-       /* To conserve stack space the input src_list (array of page pointers)
-        * is reused to hold the array of dma addresses passed to the driver.
-        * This conversion is only possible when dma_addr_t is less than the
-        * the size of a pointer.  HIGHMEM64G is known to violate this
-        * assumption.
-        */
-       BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *));
-       #endif
-
-       return 0;
-}
-
-static void __exit async_xor_exit(void)
-{
-       do { } while (0);
-}
-
-module_init(async_xor_init);
-module_exit(async_xor_exit);
+EXPORT_SYMBOL_GPL(async_xor_val);
  
  MODULE_AUTHOR("Intel Corporation");
  MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c

new file mode 100644 (file)

index 0000000..3ec27c7
--- /dev/null
+++ b/crypto/async_tx/raid6test.c
@@ -0,0 +1,240 @@
+/*
+ * asynchronous raid6 recovery self test
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * based on drivers/md/raid6test/test.c:
+ *     Copyright 2002-2007 H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/async_tx.h>
+#include <linux/random.h>
+
+#undef pr
+#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
+
+#define NDISKS 16 /* Including P and Q */
+
+static struct page *dataptrs[NDISKS];
+static addr_conv_t addr_conv[NDISKS];
+static struct page *data[NDISKS+3];
+static struct page *spare;
+static struct page *recovi;
+static struct page *recovj;
+
+static void callback(void *param)
+{
+       struct completion *cmp = param;
+
+       complete(cmp);
+}
+
+static void makedata(int disks)
+{
+       int i, j;
+
+       for (i = 0; i < disks; i++) {
+               for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) {
+                       u32 *p = page_address(data[i]) + j;
+
+                       *p = random32();
+               }
+
+               dataptrs[i] = data[i];
+       }
+}
+
+static char disk_type(int d, int disks)
+{
+       if (d == disks - 2)
+               return 'P';
+       else if (d == disks - 1)
+               return 'Q';
+       else
+               return 'D';
+}
+
+/* Recover two failed blocks. */
+static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs)
+{
+       struct async_submit_ctl submit;
+       struct completion cmp;
+       struct dma_async_tx_descriptor *tx = NULL;
+       enum sum_check_flags result = ~0;
+
+       if (faila > failb)
+               swap(faila, failb);
+
+       if (failb == disks-1) {
+               if (faila == disks-2) {
+                       /* P+Q failure.  Just rebuild the syndrome. */
+                       init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
+                       tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
+               } else {
+                       struct page *blocks[disks];
+                       struct page *dest;
+                       int count = 0;
+                       int i;
+
+                       /* data+Q failure.  Reconstruct data from P,
+                        * then rebuild syndrome
+                        */
+                       for (i = disks; i-- ; ) {
+                               if (i == faila || i == failb)
+                                       continue;
+                               blocks[count++] = ptrs[i];
+                       }
+                       dest = ptrs[faila];
+                       init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+                                         NULL, NULL, addr_conv);
+                       tx = async_xor(dest, blocks, 0, count, bytes, &submit);
+
+                       init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv);
+                       tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
+               }
+       } else {
+               if (failb == disks-2) {
+                       /* data+P failure. */
+                       init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
+                       tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
+               } else {
+                       /* data+data failure. */
+                       init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
+                       tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
+               }
+       }
+       init_completion(&cmp);
+       init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv);
+       tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit);
+       async_tx_issue_pending(tx);
+
+       if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0)
+               pr("%s: timeout! (faila: %d failb: %d disks: %d)\n",
+                  __func__, faila, failb, disks);
+
+       if (result != 0)
+               pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n",
+                  __func__, faila, failb, result);
+}
+
+static int test_disks(int i, int j, int disks)
+{
+       int erra, errb;
+
+       memset(page_address(recovi), 0xf0, PAGE_SIZE);
+       memset(page_address(recovj), 0xba, PAGE_SIZE);
+
+       dataptrs[i] = recovi;
+       dataptrs[j] = recovj;
+
+       raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs);
+
+       erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE);
+       errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE);
+
+       pr("%s(%d, %d): faila=%3d(%c)  failb=%3d(%c)  %s\n",
+          __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks),
+          (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB");
+
+       dataptrs[i] = data[i];
+       dataptrs[j] = data[j];
+
+       return erra || errb;
+}
+
+static int test(int disks, int *tests)
+{
+       struct dma_async_tx_descriptor *tx;
+       struct async_submit_ctl submit;
+       struct completion cmp;
+       int err = 0;
+       int i, j;
+
+       recovi = data[disks];
+       recovj = data[disks+1];
+       spare  = data[disks+2];
+
+       makedata(disks);
+
+       /* Nuke syndromes */
+       memset(page_address(data[disks-2]), 0xee, PAGE_SIZE);
+       memset(page_address(data[disks-1]), 0xee, PAGE_SIZE);
+
+       /* Generate assumed good syndrome */
+       init_completion(&cmp);
+       init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv);
+       tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit);
+       async_tx_issue_pending(tx);
+
+       if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) {
+               pr("error: initial gen_syndrome(%d) timed out\n", disks);
+               return 1;
+       }
+
+       pr("testing the %d-disk case...\n", disks);
+       for (i = 0; i < disks-1; i++)
+               for (j = i+1; j < disks; j++) {
+                       (*tests)++;
+                       err += test_disks(i, j, disks);
+               }
+
+       return err;
+}
+
+
+static int raid6_test(void)
+{
+       int err = 0;
+       int tests = 0;
+       int i;
+
+       for (i = 0; i < NDISKS+3; i++) {
+               data[i] = alloc_page(GFP_KERNEL);
+               if (!data[i]) {
+                       while (i--)
+                               put_page(data[i]);
+                       return -ENOMEM;
+               }
+       }
+
+       /* the 4-disk and 5-disk cases are special for the recovery code */
+       if (NDISKS > 4)
+               err += test(4, &tests);
+       if (NDISKS > 5)
+               err += test(5, &tests);
+       err += test(NDISKS, &tests);
+
+       pr("\n");
+       pr("complete (%d tests, %d failure%s)\n",
+          tests, err, err == 1 ? "" : "s");
+
+       for (i = 0; i < NDISKS+3; i++)
+               put_page(data[i]);
+
+       return 0;
+}
+
+static void raid6_test_exit(void)
+{
+}
+
+/* when compiled-in wait for drivers to load first (assumes dma drivers
+ * are also compliled-in)
+ */
+late_initcall(raid6_test);
+module_exit(raid6_test_exit);
+MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
+MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c

index d295bdccc09ccc4c8c459dfeb2a10be64ec2f019..9335b87c51747a6f480017e5bf94d92c97b173cc 100644 (file)
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -115,6 +115,9 @@ static const struct file_operations acpi_button_state_fops = {
         .release = single_release,
  };
  
+static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier);
+static struct acpi_device *lid_device;
+
  /* --------------------------------------------------------------------------
                                FS Interface (/proc)
     -------------------------------------------------------------------------- */
@@ -231,11 +234,38 @@ static int acpi_button_remove_fs(struct acpi_device *device)
  /* --------------------------------------------------------------------------
                                  Driver Interface
     -------------------------------------------------------------------------- */
+int acpi_lid_notifier_register(struct notifier_block *nb)
+{
+       return blocking_notifier_chain_register(&acpi_lid_notifier, nb);
+}
+EXPORT_SYMBOL(acpi_lid_notifier_register);
+
+int acpi_lid_notifier_unregister(struct notifier_block *nb)
+{
+       return blocking_notifier_chain_unregister(&acpi_lid_notifier, nb);
+}
+EXPORT_SYMBOL(acpi_lid_notifier_unregister);
+
+int acpi_lid_open(void)
+{
+       acpi_status status;
+       unsigned long long state;
+
+       status = acpi_evaluate_integer(lid_device->handle, "_LID", NULL,
+                                      &state);
+       if (ACPI_FAILURE(status))
+               return -ENODEV;
+
+       return !!state;
+}
+EXPORT_SYMBOL(acpi_lid_open);
+
  static int acpi_lid_send_state(struct acpi_device *device)
  {
         struct acpi_button *button = acpi_driver_data(device);
         unsigned long long state;
         acpi_status status;
+       int ret;
  
         status = acpi_evaluate_integer(device->handle, "_LID", NULL, &state);
         if (ACPI_FAILURE(status))
@@ -244,7 +274,12 @@ static int acpi_lid_send_state(struct acpi_device *device)
         /* input layer checks if event is redundant */
         input_report_switch(button->input, SW_LID, !state);
         input_sync(button->input);
-       return 0;
+
+       ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
+       if (ret == NOTIFY_DONE)
+               ret = blocking_notifier_call_chain(&acpi_lid_notifier, state,
+                                                  device);
+       return ret;
  }
  
  static void acpi_button_notify(struct acpi_device *device, u32 event)
@@ -366,8 +401,14 @@ static int acpi_button_add(struct acpi_device *device)
         error = input_register_device(input);
         if (error)
                 goto err_remove_fs;
-       if (button->type == ACPI_BUTTON_TYPE_LID)
+       if (button->type == ACPI_BUTTON_TYPE_LID) {
                 acpi_lid_send_state(device);
+               /*
+                * This assumes there's only one lid device, or if there are
+                * more we only care about the last one...
+                */
+               lid_device = device;
+       }
  
         if (device->wakeup.flags.valid) {
                 /* Button's GPE is run-wake GPE */
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c

index 56071b67bed58f055150fbf0d23ead4ebe369bb5..5633b86e3ed1cb97dab2777294beafd0802480cb 100644 (file)
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -193,7 +193,7 @@ acpi_status __init acpi_os_initialize(void)
  
  static void bind_to_cpu0(struct work_struct *work)
  {
-       set_cpus_allowed(current, cpumask_of_cpu(0));
+       set_cpus_allowed_ptr(current, cpumask_of(0));
         kfree(work);
  }
  
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c

index 11088cf103197895e79a94359692d16552c4be73..8ba0ed0b9ddbc912684d398e2eda14357920aed4 100644 (file)
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -511,7 +511,7 @@ int acpi_processor_preregister_performance(
         struct acpi_processor *match_pr;
         struct acpi_psd_package *match_pdomain;
  
-       if (!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))
+       if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))
                 return -ENOMEM;
  
         mutex_lock(&performance_mutex);
@@ -558,7 +558,6 @@ int acpi_processor_preregister_performance(
          * Now that we have _PSD data from all CPUs, lets setup P-state 
          * domain info.
          */
-       cpumask_clear(covered_cpus);
         for_each_possible_cpu(i) {
                 pr = per_cpu(processors, i);
                 if (!pr)
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c

index ce7cf3bc5101fb37743740f9c9b34def03ac82ae..4c6c14c1e30788d6b025aaaabd511350dc9b678e 100644 (file)
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -77,7 +77,7 @@ static int acpi_processor_update_tsd_coord(void)
         struct acpi_tsd_package *pdomain, *match_pdomain;
         struct acpi_processor_throttling *pthrottling, *match_pthrottling;
  
-       if (!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))
+       if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))
                 return -ENOMEM;
  
         /*
@@ -105,7 +105,6 @@ static int acpi_processor_update_tsd_coord(void)
         if (retval)
                 goto err_ret;
  
-       cpumask_clear(covered_cpus);
         for_each_possible_cpu(i) {
                 pr = per_cpu(processors, i);
                 if (!pr)
diff --git a/drivers/atm/he.c b/drivers/atm/he.c

index 2de64065aa1b203b113f62e8428319f140b0b382..29e66d603d3c6977d55a44611a0baa05f106331e 100644 (file)
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -790,11 +790,15 @@ he_init_group(struct he_dev *he_dev, int group)
         he_dev->rbps_base = pci_alloc_consistent(he_dev->pci_dev,
                 CONFIG_RBPS_SIZE * sizeof(struct he_rbp), &he_dev->rbps_phys);
         if (he_dev->rbps_base == NULL) {
-               hprintk("failed to alloc rbps\n");
-               return -ENOMEM;
+               hprintk("failed to alloc rbps_base\n");
+               goto out_destroy_rbps_pool;
         }
         memset(he_dev->rbps_base, 0, CONFIG_RBPS_SIZE * sizeof(struct he_rbp));
         he_dev->rbps_virt = kmalloc(CONFIG_RBPS_SIZE * sizeof(struct he_virt), GFP_KERNEL);
+       if (he_dev->rbps_virt == NULL) {
+               hprintk("failed to alloc rbps_virt\n");
+               goto out_free_rbps_base;
+       }
  
         for (i = 0; i < CONFIG_RBPS_SIZE; ++i) {
                 dma_addr_t dma_handle;
@@ -802,7 +806,7 @@ he_init_group(struct he_dev *he_dev, int group)
  
                 cpuaddr = pci_pool_alloc(he_dev->rbps_pool, GFP_KERNEL|GFP_DMA, &dma_handle);
                 if (cpuaddr == NULL)
-                       return -ENOMEM;
+                       goto out_free_rbps_virt;
  
                 he_dev->rbps_virt[i].virt = cpuaddr;
                 he_dev->rbps_base[i].status = RBP_LOANED | RBP_SMALLBUF | (i << RBP_INDEX_OFF);
@@ -827,17 +831,21 @@ he_init_group(struct he_dev *he_dev, int group)
                         CONFIG_RBPL_BUFSIZE, 8, 0);
         if (he_dev->rbpl_pool == NULL) {
                 hprintk("unable to create rbpl pool\n");
-               return -ENOMEM;
+               goto out_free_rbps_virt;
         }
  
         he_dev->rbpl_base = pci_alloc_consistent(he_dev->pci_dev,
                 CONFIG_RBPL_SIZE * sizeof(struct he_rbp), &he_dev->rbpl_phys);
         if (he_dev->rbpl_base == NULL) {
-               hprintk("failed to alloc rbpl\n");
-               return -ENOMEM;
+               hprintk("failed to alloc rbpl_base\n");
+               goto out_destroy_rbpl_pool;
         }
         memset(he_dev->rbpl_base, 0, CONFIG_RBPL_SIZE * sizeof(struct he_rbp));
         he_dev->rbpl_virt = kmalloc(CONFIG_RBPL_SIZE * sizeof(struct he_virt), GFP_KERNEL);
+       if (he_dev->rbpl_virt == NULL) {
+               hprintk("failed to alloc rbpl_virt\n");
+               goto out_free_rbpl_base;
+       }
  
         for (i = 0; i < CONFIG_RBPL_SIZE; ++i) {
                 dma_addr_t dma_handle;
@@ -845,7 +853,7 @@ he_init_group(struct he_dev *he_dev, int group)
  
                 cpuaddr = pci_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL|GFP_DMA, &dma_handle);
                 if (cpuaddr == NULL)
-                       return -ENOMEM;
+                       goto out_free_rbpl_virt;
  
                 he_dev->rbpl_virt[i].virt = cpuaddr;
                 he_dev->rbpl_base[i].status = RBP_LOANED | (i << RBP_INDEX_OFF);
@@ -870,7 +878,7 @@ he_init_group(struct he_dev *he_dev, int group)
                 CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq), &he_dev->rbrq_phys);
         if (he_dev->rbrq_base == NULL) {
                 hprintk("failed to allocate rbrq\n");
-               return -ENOMEM;
+               goto out_free_rbpl_virt;
         }
         memset(he_dev->rbrq_base, 0, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq));
  
@@ -894,7 +902,7 @@ he_init_group(struct he_dev *he_dev, int group)
                 CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq), &he_dev->tbrq_phys);
         if (he_dev->tbrq_base == NULL) {
                 hprintk("failed to allocate tbrq\n");
-               return -ENOMEM;
+               goto out_free_rbpq_base;
         }
         memset(he_dev->tbrq_base, 0, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq));
  
@@ -906,6 +914,39 @@ he_init_group(struct he_dev *he_dev, int group)
         he_writel(he_dev, CONFIG_TBRQ_THRESH, G0_TBRQ_THRESH + (group * 16));
  
         return 0;
+
+out_free_rbpq_base:
+       pci_free_consistent(he_dev->pci_dev, CONFIG_RBRQ_SIZE *
+                       sizeof(struct he_rbrq), he_dev->rbrq_base,
+                       he_dev->rbrq_phys);
+       i = CONFIG_RBPL_SIZE;
+out_free_rbpl_virt:
+       while (--i)
+               pci_pool_free(he_dev->rbps_pool, he_dev->rbpl_virt[i].virt,
+                               he_dev->rbps_base[i].phys);
+       kfree(he_dev->rbpl_virt);
+
+out_free_rbpl_base:
+       pci_free_consistent(he_dev->pci_dev, CONFIG_RBPL_SIZE *
+                       sizeof(struct he_rbp), he_dev->rbpl_base,
+                       he_dev->rbpl_phys);
+out_destroy_rbpl_pool:
+       pci_pool_destroy(he_dev->rbpl_pool);
+
+       i = CONFIG_RBPL_SIZE;
+out_free_rbps_virt:
+       while (--i)
+               pci_pool_free(he_dev->rbpl_pool, he_dev->rbps_virt[i].virt,
+                               he_dev->rbpl_base[i].phys);
+       kfree(he_dev->rbps_virt);
+
+out_free_rbps_base:
+       pci_free_consistent(he_dev->pci_dev, CONFIG_RBPS_SIZE *
+                       sizeof(struct he_rbp), he_dev->rbps_base,
+                       he_dev->rbps_phys);
+out_destroy_rbps_pool:
+       pci_pool_destroy(he_dev->rbps_pool);
+       return -ENOMEM;
  }
  
  static int __devinit
diff --git a/drivers/atm/solos-attrlist.c b/drivers/atm/solos-attrlist.c

index efa2808dd94d278cfae1fe54f0b60d2b829ac27d..1a9332e4efe0847aea86548b2e58e9fcd77b1268 100644 (file)
--- a/drivers/atm/solos-attrlist.c
+++ b/drivers/atm/solos-attrlist.c
@@ -25,6 +25,10 @@ SOLOS_ATTR_RO(RSCorrectedErrorsUp)
  SOLOS_ATTR_RO(RSUnCorrectedErrorsUp)
  SOLOS_ATTR_RO(InterleaveRDn)
  SOLOS_ATTR_RO(InterleaveRUp)
+SOLOS_ATTR_RO(BisRDn)
+SOLOS_ATTR_RO(BisRUp)
+SOLOS_ATTR_RO(INPdown)
+SOLOS_ATTR_RO(INPup)
  SOLOS_ATTR_RO(ShowtimeStart)
  SOLOS_ATTR_RO(ATURVendor)
  SOLOS_ATTR_RO(ATUCCountry)
@@ -62,6 +66,13 @@ SOLOS_ATTR_RW(Defaults)
  SOLOS_ATTR_RW(LineMode)
  SOLOS_ATTR_RW(Profile)
  SOLOS_ATTR_RW(DetectNoise)
+SOLOS_ATTR_RW(BisAForceSNRMarginDn)
+SOLOS_ATTR_RW(BisMForceSNRMarginDn)
+SOLOS_ATTR_RW(BisAMaxMargin)
+SOLOS_ATTR_RW(BisMMaxMargin)
+SOLOS_ATTR_RW(AnnexAForceSNRMarginDn)
+SOLOS_ATTR_RW(AnnexAMaxMargin)
+SOLOS_ATTR_RW(AnnexMMaxMargin)
  SOLOS_ATTR_RO(SupportedAnnexes)
  SOLOS_ATTR_RO(Status)
  SOLOS_ATTR_RO(TotalStart)
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c

index 307321b32cb35f43396f26631ce49069f59ce529..c5f5186d62a3160404a50b5819c440239b311870 100644 (file)
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -59,21 +59,29 @@
  #define RX_DMA_ADDR(port)      (0x30 + (4 * (port)))
  
  #define DATA_RAM_SIZE  32768
-#define BUF_SIZE       4096
+#define BUF_SIZE       2048
+#define OLD_BUF_SIZE   4096 /* For FPGA versions <= 2*/
  #define FPGA_PAGE      528 /* FPGA flash page size*/
  #define SOLOS_PAGE     512 /* Solos flash page size*/
  #define FPGA_BLOCK     (FPGA_PAGE * 8) /* FPGA flash block size*/
  #define SOLOS_BLOCK    (SOLOS_PAGE * 8) /* Solos flash block size*/
  
-#define RX_BUF(card, nr) ((card->buffers) + (nr)*BUF_SIZE*2)
-#define TX_BUF(card, nr) ((card->buffers) + (nr)*BUF_SIZE*2 + BUF_SIZE)
+#define RX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2)
+#define TX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2 + (card->buffer_size))
+#define FLASH_BUF ((card->buffers) + 4*(card->buffer_size)*2)
  
  #define RX_DMA_SIZE    2048
  
+#define FPGA_VERSION(a,b) (((a) << 8) + (b))
+#define LEGACY_BUFFERS 2
+#define DMA_SUPPORTED  4
+
  static int reset = 0;
  static int atmdebug = 0;
  static int firmware_upgrade = 0;
  static int fpga_upgrade = 0;
+static int db_firmware_upgrade = 0;
+static int db_fpga_upgrade = 0;
  
  struct pkt_hdr {
         __le16 size;
@@ -116,6 +124,8 @@ struct solos_card {
         wait_queue_head_t param_wq;
         wait_queue_head_t fw_wq;
         int using_dma;
+       int fpga_version;
+       int buffer_size;
  };
  
  
@@ -136,10 +146,14 @@ MODULE_PARM_DESC(reset, "Reset Solos chips on startup");
  MODULE_PARM_DESC(atmdebug, "Print ATM data");
  MODULE_PARM_DESC(firmware_upgrade, "Initiate Solos firmware upgrade");
  MODULE_PARM_DESC(fpga_upgrade, "Initiate FPGA upgrade");
+MODULE_PARM_DESC(db_firmware_upgrade, "Initiate daughter board Solos firmware upgrade");
+MODULE_PARM_DESC(db_fpga_upgrade, "Initiate daughter board FPGA upgrade");
  module_param(reset, int, 0444);
  module_param(atmdebug, int, 0644);
  module_param(firmware_upgrade, int, 0444);
  module_param(fpga_upgrade, int, 0444);
+module_param(db_firmware_upgrade, int, 0444);
+module_param(db_fpga_upgrade, int, 0444);
  
  static void fpga_queue(struct solos_card *card, int port, struct sk_buff *skb,
                        struct atm_vcc *vcc);
@@ -517,10 +531,32 @@ static int flash_upgrade(struct solos_card *card, int chip)
         if (chip == 0) {
                 fw_name = "solos-FPGA.bin";
                 blocksize = FPGA_BLOCK;
-       } else {
+       } 
+       
+       if (chip == 1) {
                 fw_name = "solos-Firmware.bin";
                 blocksize = SOLOS_BLOCK;
         }
+       
+       if (chip == 2){
+               if (card->fpga_version > LEGACY_BUFFERS){
+                       fw_name = "solos-db-FPGA.bin";
+                       blocksize = FPGA_BLOCK;
+               } else {
+                       dev_info(&card->dev->dev, "FPGA version doesn't support daughter board upgrades\n");
+                       return -EPERM;
+               }
+       }
+       
+       if (chip == 3){
+               if (card->fpga_version > LEGACY_BUFFERS){
+                       fw_name = "solos-Firmware.bin";
+                       blocksize = SOLOS_BLOCK;
+               } else {
+               dev_info(&card->dev->dev, "FPGA version doesn't support daughter board upgrades\n");
+               return -EPERM;
+               }
+       }
  
         if (request_firmware(&fw, fw_name, &card->dev->dev))
                 return -ENOENT;
@@ -536,8 +572,10 @@ static int flash_upgrade(struct solos_card *card, int chip)
         data32 = ioread32(card->config_regs + FPGA_MODE); 
  
         /* Set mode to Chip Erase */
-       dev_info(&card->dev->dev, "Set FPGA Flash mode to %s Chip Erase\n",
-                chip?"Solos":"FPGA");
+       if(chip == 0 || chip == 2)
+               dev_info(&card->dev->dev, "Set FPGA Flash mode to FPGA Chip Erase\n");
+       if(chip == 1 || chip == 3)
+               dev_info(&card->dev->dev, "Set FPGA Flash mode to Solos Chip Erase\n");
         iowrite32((chip * 2), card->config_regs + FLASH_MODE);
  
  
@@ -557,7 +595,10 @@ static int flash_upgrade(struct solos_card *card, int chip)
                 /* Copy block to buffer, swapping each 16 bits */
                 for(i = 0; i < blocksize; i += 4) {
                         uint32_t word = swahb32p((uint32_t *)(fw->data + offset + i));
-                       iowrite32(word, RX_BUF(card, 3) + i);
+                       if(card->fpga_version > LEGACY_BUFFERS)
+                               iowrite32(word, FLASH_BUF + i);
+                       else
+                               iowrite32(word, RX_BUF(card, 3) + i);
                 }
  
                 /* Specify block number and then trigger flash write */
@@ -630,6 +671,10 @@ void solos_bh(unsigned long card_arg)
                                 memcpy_fromio(header, RX_BUF(card, port), sizeof(*header));
  
                                 size = le16_to_cpu(header->size);
+                               if (size > (card->buffer_size - sizeof(*header))){
+                                       dev_warn(&card->dev->dev, "Invalid buffer size\n");
+                                       continue;
+                               }
  
                                 skb = alloc_skb(size + 1, GFP_ATOMIC);
                                 if (!skb) {
@@ -1094,12 +1139,18 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id)
         fpga_ver = (data32 & 0x0000FFFF);
         major_ver = ((data32 & 0xFF000000) >> 24);
         minor_ver = ((data32 & 0x00FF0000) >> 16);
+       card->fpga_version = FPGA_VERSION(major_ver,minor_ver);
+       if (card->fpga_version > LEGACY_BUFFERS)
+               card->buffer_size = BUF_SIZE;
+       else
+               card->buffer_size = OLD_BUF_SIZE;
         dev_info(&dev->dev, "Solos FPGA Version %d.%02d svn-%d\n",
                  major_ver, minor_ver, fpga_ver);
  
-       if (0 && fpga_ver > 27)
+       if (card->fpga_version >= DMA_SUPPORTED){
                 card->using_dma = 1;
-       else {
+       } else {
+               card->using_dma = 0;
                 /* Set RX empty flag for all ports */
                 iowrite32(0xF0, card->config_regs + FLAGS_ADDR);
         }
@@ -1131,6 +1182,12 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id)
         if (firmware_upgrade)
                 flash_upgrade(card, 1);
  
+       if (db_fpga_upgrade)
+               flash_upgrade(card, 2);
+
+       if (db_firmware_upgrade)
+               flash_upgrade(card, 3);
+
         err = atm_init(card);
         if (err)
                 goto out_free_irq;
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c

index 71d1b9bab70b515afbe682cca1de5bc4ca3613cb..614da5b8613ac7d11271865bd7766a49b3ffdf7b 100644 (file)
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3412,7 +3412,7 @@ static int cdrom_print_info(const char *header, int val, char *info,
         return 0;
  }
  
-static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp,
+static int cdrom_sysctl_info(ctl_table *ctl, int write,
                             void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         int pos;
@@ -3489,7 +3489,7 @@ static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp,
                 goto done;
  doit:
         mutex_unlock(&cdrom_mutex);
-       return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       return proc_dostring(ctl, write, buffer, lenp, ppos);
  done:
         printk(KERN_INFO "cdrom: info buffer too small\n");
         goto doit;
@@ -3525,12 +3525,12 @@ static void cdrom_update_settings(void)
         mutex_unlock(&cdrom_mutex);
  }
  
-static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp,
+static int cdrom_sysctl_handler(ctl_table *ctl, int write,
                                 void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         int ret;
         
-       ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
  
         if (write) {
         
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig

index 6a06913b01d30151065471350ec492e7df79a6a3..08a6f50ae791952508ba62fa896a99032788af38 100644 (file)
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -1087,6 +1087,14 @@ config MMTIMER
           The mmtimer device allows direct userspace access to the
           Altix system timer.
  
+config UV_MMTIMER
+       tristate "UV_MMTIMER Memory mapped RTC for SGI UV"
+       depends on X86_UV
+       default m
+       help
+         The uv_mmtimer device allows direct userspace access to the
+         UV system timer.
+
  source "drivers/char/tpm/Kconfig"
  
  config TELCLOCK
diff --git a/drivers/char/Makefile b/drivers/char/Makefile

index 66f779ad4f4c05297409c5eefadcf92d0a3903f1..19a79dd79eee0008503552ac41a14881205d87cd 100644 (file)
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -58,6 +58,7 @@ obj-$(CONFIG_RAW_DRIVER)      += raw.o
  obj-$(CONFIG_SGI_SNSC)         += snsc.o snsc_event.o
  obj-$(CONFIG_MSPEC)            += mspec.o
  obj-$(CONFIG_MMTIMER)          += mmtimer.o
+obj-$(CONFIG_UV_MMTIMER)       += uv_mmtimer.o
  obj-$(CONFIG_VIOTAPE)          += viotape.o
  obj-$(CONFIG_HVCS)             += hvcs.o
  obj-$(CONFIG_IBM_BSR)          += bsr.o
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c

index 1540e693d91ebf3f886ad650eb067e6a297b4e3e..4068467ce7b93bcd8e82e1174603b089b868c6b9 100644 (file)
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -46,6 +46,8 @@
  #define PCI_DEVICE_ID_INTEL_Q35_IG          0x29B2
  #define PCI_DEVICE_ID_INTEL_Q33_HB          0x29D0
  #define PCI_DEVICE_ID_INTEL_Q33_IG          0x29D2
+#define PCI_DEVICE_ID_INTEL_B43_HB          0x2E40
+#define PCI_DEVICE_ID_INTEL_B43_IG          0x2E42
  #define PCI_DEVICE_ID_INTEL_GM45_HB         0x2A40
  #define PCI_DEVICE_ID_INTEL_GM45_IG         0x2A42
  #define PCI_DEVICE_ID_INTEL_IGD_E_HB        0x2E00
@@ -91,6 +93,7 @@
                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G45_HB || \
                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_GM45_HB || \
                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G41_HB || \
+               agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_B43_HB || \
                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_D_HB || \
                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_M_HB || \
                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MA_HB)
@@ -804,23 +807,39 @@ static void intel_i830_setup_flush(void)
         if (!intel_private.i8xx_page)
                 return;
  
-       /* make page uncached */
-       map_page_into_agp(intel_private.i8xx_page);
-
         intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page);
         if (!intel_private.i8xx_flush_page)
                 intel_i830_fini_flush();
  }
  
+static void
+do_wbinvd(void *null)
+{
+       wbinvd();
+}
+
+/* The chipset_flush interface needs to get data that has already been
+ * flushed out of the CPU all the way out to main memory, because the GPU
+ * doesn't snoop those buffers.
+ *
+ * The 8xx series doesn't have the same lovely interface for flushing the
+ * chipset write buffers that the later chips do. According to the 865
+ * specs, it's 64 octwords, or 1KB.  So, to get those previous things in
+ * that buffer out, we just fill 1KB and clflush it out, on the assumption
+ * that it'll push whatever was in there out.  It appears to work.
+ */
  static void intel_i830_chipset_flush(struct agp_bridge_data *bridge)
  {
         unsigned int *pg = intel_private.i8xx_flush_page;
-       int i;
  
-       for (i = 0; i < 256; i += 2)
-               *(pg + i) = i;
+       memset(pg, 0, 1024);
  
-       wmb();
+       if (cpu_has_clflush) {
+               clflush_cache_range(pg, 1024);
+       } else {
+               if (on_each_cpu(do_wbinvd, NULL, 1) != 0)
+                       printk(KERN_ERR "Timed out waiting for cache flush.\n");
+       }
  }
  
  /* The intel i830 automatically initializes the agp aperture during POST.
@@ -1341,6 +1360,7 @@ static void intel_i965_get_gtt_range(int *gtt_offset, int *gtt_size)
         case PCI_DEVICE_ID_INTEL_Q45_HB:
         case PCI_DEVICE_ID_INTEL_G45_HB:
         case PCI_DEVICE_ID_INTEL_G41_HB:
+       case PCI_DEVICE_ID_INTEL_B43_HB:
         case PCI_DEVICE_ID_INTEL_IGDNG_D_HB:
         case PCI_DEVICE_ID_INTEL_IGDNG_M_HB:
         case PCI_DEVICE_ID_INTEL_IGDNG_MA_HB:
@@ -2335,6 +2355,8 @@ static const struct intel_driver_description {
             "Q45/Q43", NULL, &intel_i965_driver },
         { PCI_DEVICE_ID_INTEL_G45_HB, PCI_DEVICE_ID_INTEL_G45_IG, 0,
             "G45/G43", NULL, &intel_i965_driver },
+       { PCI_DEVICE_ID_INTEL_B43_HB, PCI_DEVICE_ID_INTEL_B43_IG, 0,
+           "B43", NULL, &intel_i965_driver },
         { PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, 0,
             "G41", NULL, &intel_i965_driver },
         { PCI_DEVICE_ID_INTEL_IGDNG_D_HB, PCI_DEVICE_ID_INTEL_IGDNG_D_IG, 0,
@@ -2535,6 +2557,7 @@ static struct pci_device_id agp_intel_pci_table[] = {
         ID(PCI_DEVICE_ID_INTEL_Q45_HB),
         ID(PCI_DEVICE_ID_INTEL_G45_HB),
         ID(PCI_DEVICE_ID_INTEL_G41_HB),
+       ID(PCI_DEVICE_ID_INTEL_B43_HB),
         ID(PCI_DEVICE_ID_INTEL_IGDNG_D_HB),
         ID(PCI_DEVICE_ID_INTEL_IGDNG_M_HB),
         ID(PCI_DEVICE_ID_INTEL_IGDNG_MA_HB),
diff --git a/drivers/char/bfin-otp.c b/drivers/char/bfin-otp.c

index 0a01329451e4af6d0f1b83624de37099c8c29382..e3dd24bff5143206df8c4345fc1fce56fe492cda 100644 (file)
--- a/drivers/char/bfin-otp.c
+++ b/drivers/char/bfin-otp.c
@@ -1,8 +1,7 @@
  /*
   * Blackfin On-Chip OTP Memory Interface
- *  Supports BF52x/BF54x
   *
- * Copyright 2007-2008 Analog Devices Inc.
+ * Copyright 2007-2009 Analog Devices Inc.
   *
   * Enter bugs at http://blackfin.uclinux.org/
   *
@@ -17,8 +16,10 @@
  #include <linux/module.h>
  #include <linux/mutex.h>
  #include <linux/types.h>
+#include <mtd/mtd-abi.h>
  
  #include <asm/blackfin.h>
+#include <asm/bfrom.h>
  #include <asm/uaccess.h>
  
  #define stamp(fmt, args...) pr_debug("%s:%i: " fmt "\n", __func__, __LINE__, ## args)
@@ -30,39 +31,6 @@
  
  static DEFINE_MUTEX(bfin_otp_lock);
  
-/* OTP Boot ROM functions */
-#define _BOOTROM_OTP_COMMAND           0xEF000018
-#define _BOOTROM_OTP_READ              0xEF00001A
-#define _BOOTROM_OTP_WRITE             0xEF00001C
-
-static u32 (* const otp_command)(u32 command, u32 value) = (void *)_BOOTROM_OTP_COMMAND;
-static u32 (* const otp_read)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_READ;
-static u32 (* const otp_write)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_WRITE;
-
-/* otp_command(): defines for "command" */
-#define OTP_INIT             0x00000001
-#define OTP_CLOSE            0x00000002
-
-/* otp_{read,write}(): defines for "flags" */
-#define OTP_LOWER_HALF       0x00000000 /* select upper/lower 64-bit half (bit 0) */
-#define OTP_UPPER_HALF       0x00000001
-#define OTP_NO_ECC           0x00000010 /* do not use ECC */
-#define OTP_LOCK             0x00000020 /* sets page protection bit for page */
-#define OTP_ACCESS_READ      0x00001000
-#define OTP_ACCESS_READWRITE 0x00002000
-
-/* Return values for all functions */
-#define OTP_SUCCESS          0x00000000
-#define OTP_MASTER_ERROR     0x001
-#define OTP_WRITE_ERROR      0x003
-#define OTP_READ_ERROR       0x005
-#define OTP_ACC_VIO_ERROR    0x009
-#define OTP_DATA_MULT_ERROR  0x011
-#define OTP_ECC_MULT_ERROR   0x021
-#define OTP_PREV_WR_ERROR    0x041
-#define OTP_DATA_SB_WARN     0x100
-#define OTP_ECC_SB_WARN      0x200
-
  /**
   *     bfin_otp_read - Read OTP pages
   *
@@ -86,9 +54,11 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count,
         page = *pos / (sizeof(u64) * 2);
         while (bytes_done < count) {
                 flags = (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF);
-               stamp("processing page %i (%s)", page, (flags == OTP_UPPER_HALF ? "upper" : "lower"));
-               ret = otp_read(page, flags, &content);
+               stamp("processing page %i (0x%x:%s)", page, flags,
+                       (flags & OTP_UPPER_HALF ? "upper" : "lower"));
+               ret = bfrom_OtpRead(page, flags, &content);
                 if (ret & OTP_MASTER_ERROR) {
+                       stamp("error from otp: 0x%x", ret);
                         bytes_done = -EIO;
                         break;
                 }
@@ -96,7 +66,7 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count,
                         bytes_done = -EFAULT;
                         break;
                 }
-               if (flags == OTP_UPPER_HALF)
+               if (flags & OTP_UPPER_HALF)
                         ++page;
                 bytes_done += sizeof(content);
                 *pos += sizeof(content);
@@ -108,14 +78,53 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count,
  }
  
  #ifdef CONFIG_BFIN_OTP_WRITE_ENABLE
+static bool allow_writes;
+
+/**
+ *     bfin_otp_init_timing - setup OTP timing parameters
+ *
+ *     Required before doing any write operation.  Algorithms from HRM.
+ */
+static u32 bfin_otp_init_timing(void)
+{
+       u32 tp1, tp2, tp3, timing;
+
+       tp1 = get_sclk() / 1000000;
+       tp2 = (2 * get_sclk() / 10000000) << 8;
+       tp3 = (0x1401) << 15;
+       timing = tp1 | tp2 | tp3;
+       if (bfrom_OtpCommand(OTP_INIT, timing))
+               return 0;
+
+       return timing;
+}
+
+/**
+ *     bfin_otp_deinit_timing - set timings to only allow reads
+ *
+ *     Should be called after all writes are done.
+ */
+static void bfin_otp_deinit_timing(u32 timing)
+{
+       /* mask bits [31:15] so that any attempts to write fail */
+       bfrom_OtpCommand(OTP_CLOSE, 0);
+       bfrom_OtpCommand(OTP_INIT, timing & ~(-1 << 15));
+       bfrom_OtpCommand(OTP_CLOSE, 0);
+}
+
  /**
- *     bfin_otp_write - Write OTP pages
+ *     bfin_otp_write - write OTP pages
   *
   *     All writes must be in half page chunks (half page == 64 bits).
   */
  static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t count, loff_t *pos)
  {
-       stampit();
+       ssize_t bytes_done;
+       u32 timing, page, base_flags, flags, ret;
+       u64 content;
+
+       if (!allow_writes)
+               return -EACCES;
  
         if (count % sizeof(u64))
                 return -EMSGSIZE;
@@ -123,20 +132,96 @@ static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t
         if (mutex_lock_interruptible(&bfin_otp_lock))
                 return -ERESTARTSYS;
  
-       /* need otp_init() documentation before this can be implemented */
+       stampit();
+
+       timing = bfin_otp_init_timing();
+       if (timing == 0) {
+               mutex_unlock(&bfin_otp_lock);
+               return -EIO;
+       }
+
+       base_flags = OTP_CHECK_FOR_PREV_WRITE;
+
+       bytes_done = 0;
+       page = *pos / (sizeof(u64) * 2);
+       while (bytes_done < count) {
+               flags = base_flags | (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF);
+               stamp("processing page %i (0x%x:%s) from %p", page, flags,
+                       (flags & OTP_UPPER_HALF ? "upper" : "lower"), buff + bytes_done);
+               if (copy_from_user(&content, buff + bytes_done, sizeof(content))) {
+                       bytes_done = -EFAULT;
+                       break;
+               }
+               ret = bfrom_OtpWrite(page, flags, &content);
+               if (ret & OTP_MASTER_ERROR) {
+                       stamp("error from otp: 0x%x", ret);
+                       bytes_done = -EIO;
+                       break;
+               }
+               if (flags & OTP_UPPER_HALF)
+                       ++page;
+               bytes_done += sizeof(content);
+               *pos += sizeof(content);
+       }
+
+       bfin_otp_deinit_timing(timing);
  
         mutex_unlock(&bfin_otp_lock);
  
+       return bytes_done;
+}
+
+static long bfin_otp_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
+{
+       stampit();
+
+       switch (cmd) {
+       case OTPLOCK: {
+               u32 timing;
+               int ret = -EIO;
+
+               if (!allow_writes)
+                       return -EACCES;
+
+               if (mutex_lock_interruptible(&bfin_otp_lock))
+                       return -ERESTARTSYS;
+
+               timing = bfin_otp_init_timing();
+               if (timing) {
+                       u32 otp_result = bfrom_OtpWrite(arg, OTP_LOCK, NULL);
+                       stamp("locking page %lu resulted in 0x%x", arg, otp_result);
+                       if (!(otp_result & OTP_MASTER_ERROR))
+                               ret = 0;
+
+                       bfin_otp_deinit_timing(timing);
+               }
+
+               mutex_unlock(&bfin_otp_lock);
+
+               return ret;
+       }
+
+       case MEMLOCK:
+               allow_writes = false;
+               return 0;
+
+       case MEMUNLOCK:
+               allow_writes = true;
+               return 0;
+       }
+
         return -EINVAL;
  }
  #else
  # define bfin_otp_write NULL
+# define bfin_otp_ioctl NULL
  #endif
  
  static struct file_operations bfin_otp_fops = {
-       .owner    = THIS_MODULE,
-       .read     = bfin_otp_read,
-       .write    = bfin_otp_write,
+       .owner          = THIS_MODULE,
+       .unlocked_ioctl = bfin_otp_ioctl,
+       .read           = bfin_otp_read,
+       .write          = bfin_otp_write,
  };
  
  static struct miscdevice bfin_otp_misc_device = {
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c

index 4a9f3492b9216142333f3fa2013542b4638b9c1e..70a770ac013875a3e4b5200b954a21197623e2e7 100644 (file)
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -166,9 +166,8 @@ static irqreturn_t hpet_interrupt(int irq, void *data)
                 unsigned long m, t;
  
                 t = devp->hd_ireqfreq;
-               m = read_counter(&devp->hd_hpet->hpet_mc);
-               write_counter(t + m + devp->hd_hpets->hp_delta,
-                             &devp->hd_timer->hpet_compare);
+               m = read_counter(&devp->hd_timer->hpet_compare);
+               write_counter(t + m, &devp->hd_timer->hpet_compare);
         }
  
         if (devp->hd_flags & HPET_SHARED_IRQ)
@@ -504,21 +503,25 @@ static int hpet_ioctl_ieon(struct hpet_dev *devp)
         g = v | Tn_32MODE_CNF_MASK | Tn_INT_ENB_CNF_MASK;
  
         if (devp->hd_flags & HPET_PERIODIC) {
-               write_counter(t, &timer->hpet_compare);
                 g |= Tn_TYPE_CNF_MASK;
-               v |= Tn_TYPE_CNF_MASK;
-               writeq(v, &timer->hpet_config);
-               v |= Tn_VAL_SET_CNF_MASK;
+               v |= Tn_TYPE_CNF_MASK | Tn_VAL_SET_CNF_MASK;
                 writeq(v, &timer->hpet_config);
                 local_irq_save(flags);
  
-               /* NOTE:  what we modify here is a hidden accumulator
+               /*
+                * NOTE: First we modify the hidden accumulator
                  * register supported by periodic-capable comparators.
                  * We never want to modify the (single) counter; that
-                * would affect all the comparators.
+                * would affect all the comparators. The value written
+                * is the counter value when the first interrupt is due.
                  */
                 m = read_counter(&hpet->hpet_mc);
                 write_counter(t + m + hpetp->hp_delta, &timer->hpet_compare);
+               /*
+                * Then we modify the comparator, indicating the period
+                * for subsequent interrupt.
+                */
+               write_counter(t, &timer->hpet_compare);
         } else {
                 local_irq_save(flags);
                 m = read_counter(&hpet->hpet_mc);
diff --git a/drivers/char/mem.c b/drivers/char/mem.c

index 0aede1d6a9eaa28a843274793ddc0af54e492962..6c8b65d069e514badcab1b70c6032fea129b2bc7 100644 (file)
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -690,7 +690,7 @@ static ssize_t read_zero(struct file * file, char __user * buf,
  
                 if (chunk > PAGE_SIZE)
                         chunk = PAGE_SIZE;      /* Just for latency reasons */
-               unwritten = clear_user(buf, chunk);
+               unwritten = __clear_user(buf, chunk);
                 written += chunk - unwritten;
                 if (unwritten)
                         break;
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c

index 94ad2c3bfc4a290d025bffbbf14481fa1b8dd39d..a4ec50c950722410e9cd6dc99b72b76edf11fb2b 100644 (file)
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -281,12 +281,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
                 case IOCTL_MW_REGISTER_IPC: {
                         unsigned int ipcnum = (unsigned int) ioarg;
         
-                       PRINTK_3(TRACE_MWAVE,
-                               "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
-                               " ipcnum %x entry usIntCount %x\n",
-                               ipcnum,
-                               pDrvData->IPCs[ipcnum].usIntCount);
-       
                         if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) {
                                 PRINTK_ERROR(KERN_ERR_MWAVE
                                                 "mwavedd::mwave_ioctl:"
@@ -295,6 +289,12 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
                                                 ipcnum);
                                 return -EINVAL;
                         }
+                       PRINTK_3(TRACE_MWAVE,
+                               "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
+                               " ipcnum %x entry usIntCount %x\n",
+                               ipcnum,
+                               pDrvData->IPCs[ipcnum].usIntCount);
+
                         lock_kernel();
                         pDrvData->IPCs[ipcnum].bIsHere = FALSE;
                         pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
@@ -310,11 +310,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
                 case IOCTL_MW_GET_IPC: {
                         unsigned int ipcnum = (unsigned int) ioarg;
         
-                       PRINTK_3(TRACE_MWAVE,
-                               "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC"
-                               " ipcnum %x, usIntCount %x\n",
-                               ipcnum,
-                               pDrvData->IPCs[ipcnum].usIntCount);
                         if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) {
                                 PRINTK_ERROR(KERN_ERR_MWAVE
                                                 "mwavedd::mwave_ioctl:"
@@ -322,6 +317,11 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
                                                 " Invalid ipcnum %x\n", ipcnum);
                                 return -EINVAL;
                         }
+                       PRINTK_3(TRACE_MWAVE,
+                               "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC"
+                               " ipcnum %x, usIntCount %x\n",
+                               ipcnum,
+                               pDrvData->IPCs[ipcnum].usIntCount);
         
                         lock_kernel();
                         if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
diff --git a/drivers/char/random.c b/drivers/char/random.c

index d8a9255e1a3f1ed757d2066a528108cf033e1760..04b505e5a5e25da1b477218b787f6bd9f2470709 100644 (file)
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1231,7 +1231,7 @@ static char sysctl_bootid[16];
   * as an ASCII string in the standard UUID format.  If accesses via the
   * sysctl system call, it is returned as 16 bytes of binary data.
   */
-static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
+static int proc_do_uuid(ctl_table *table, int write,
                         void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         ctl_table fake_table;
@@ -1254,7 +1254,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
         fake_table.data = buf;
         fake_table.maxlen = sizeof(buf);
  
-       return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos);
+       return proc_dostring(&fake_table, write, buffer, lenp, ppos);
  }
  
  static int uuid_strategy(ctl_table *table,
diff --git a/drivers/char/rio/rioctrl.c b/drivers/char/rio/rioctrl.c

index eecee0f576d2d7eea772b574ebac97ec33db04f4..74339559f0b9a3be4a768c5ef1f65d4cc8b3ca58 100644 (file)
--- a/drivers/char/rio/rioctrl.c
+++ b/drivers/char/rio/rioctrl.c
@@ -873,7 +873,7 @@ int riocontrol(struct rio_info *p, dev_t dev, int cmd, unsigned long arg, int su
                 /*
                  ** It is important that the product code is an unsigned object!
                  */
-               if (DownLoad.ProductCode > MAX_PRODUCT) {
+               if (DownLoad.ProductCode >= MAX_PRODUCT) {
                         rio_dprintk(RIO_DEBUG_CTRL, "RIO_DOWNLOAD: Bad product code %d passed\n", DownLoad.ProductCode);
                         p->RIOError.Error = NO_SUCH_PRODUCT;
                         return -ENXIO;
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c

index 32b957efa42047af3065b6e30c31827031e94159..45d58002b06c0087ad5ece5a2bbb14a844f574c0 100644 (file)
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -742,7 +742,7 @@ EXPORT_SYMBOL_GPL(tpm_pcr_read);
   * the module usage count.
   */
  #define TPM_ORD_PCR_EXTEND cpu_to_be32(20)
-#define EXTEND_PCR_SIZE 34
+#define EXTEND_PCR_RESULT_SIZE 34
  static struct tpm_input_header pcrextend_header = {
         .tag = TPM_TAG_RQU_COMMAND,
         .length = cpu_to_be32(34),
@@ -760,10 +760,9 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
                 return -ENODEV;
  
         cmd.header.in = pcrextend_header;
-       BUG_ON(be32_to_cpu(cmd.header.in.length) > EXTEND_PCR_SIZE);
         cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
         memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
-       rc = transmit_cmd(chip, &cmd, cmd.header.in.length,
+       rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
                           "attempting extend a PCR value");
  
         module_put(chip->dev->driver->owner);
diff --git a/drivers/char/uv_mmtimer.c b/drivers/char/uv_mmtimer.c

new file mode 100644 (file)

index 0000000..867b67b
--- /dev/null
+++ b/drivers/char/uv_mmtimer.c
@@ -0,0 +1,216 @@
+/*
+ * Timer device implementation for SGI UV platform.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2009 Silicon Graphics, Inc.  All rights reserved.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mmtimer.h>
+#include <linux/miscdevice.h>
+#include <linux/posix-timers.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/smp_lock.h>
+
+#include <asm/genapic.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+
+MODULE_AUTHOR("Dimitri Sivanich <sivanich@sgi.com>");
+MODULE_DESCRIPTION("SGI UV Memory Mapped RTC Timer");
+MODULE_LICENSE("GPL");
+
+/* name of the device, usually in /dev */
+#define UV_MMTIMER_NAME "mmtimer"
+#define UV_MMTIMER_DESC "SGI UV Memory Mapped RTC Timer"
+#define UV_MMTIMER_VERSION "1.0"
+
+static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd,
+                                               unsigned long arg);
+static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma);
+
+/*
+ * Period in femtoseconds (10^-15 s)
+ */
+static unsigned long uv_mmtimer_femtoperiod;
+
+static const struct file_operations uv_mmtimer_fops = {
+       .owner = THIS_MODULE,
+       .mmap = uv_mmtimer_mmap,
+       .unlocked_ioctl = uv_mmtimer_ioctl,
+};
+
+/**
+ * uv_mmtimer_ioctl - ioctl interface for /dev/uv_mmtimer
+ * @file: file structure for the device
+ * @cmd: command to execute
+ * @arg: optional argument to command
+ *
+ * Executes the command specified by @cmd.  Returns 0 for success, < 0 for
+ * failure.
+ *
+ * Valid commands:
+ *
+ * %MMTIMER_GETOFFSET - Should return the offset (relative to the start
+ * of the page where the registers are mapped) for the counter in question.
+ *
+ * %MMTIMER_GETRES - Returns the resolution of the clock in femto (10^-15)
+ * seconds
+ *
+ * %MMTIMER_GETFREQ - Copies the frequency of the clock in Hz to the address
+ * specified by @arg
+ *
+ * %MMTIMER_GETBITS - Returns the number of bits in the clock's counter
+ *
+ * %MMTIMER_MMAPAVAIL - Returns 1 if registers can be mmap'd into userspace
+ *
+ * %MMTIMER_GETCOUNTER - Gets the current value in the counter and places it
+ * in the address specified by @arg.
+ */
+static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd,
+                                               unsigned long arg)
+{
+       int ret = 0;
+
+       switch (cmd) {
+       case MMTIMER_GETOFFSET: /* offset of the counter */
+               /*
+                * UV RTC register is on its own page
+                */
+               if (PAGE_SIZE <= (1 << 16))
+                       ret = ((UV_LOCAL_MMR_BASE | UVH_RTC) & (PAGE_SIZE-1))
+                               / 8;
+               else
+                       ret = -ENOSYS;
+               break;
+
+       case MMTIMER_GETRES: /* resolution of the clock in 10^-15 s */
+               if (copy_to_user((unsigned long __user *)arg,
+                               &uv_mmtimer_femtoperiod, sizeof(unsigned long)))
+                       ret = -EFAULT;
+               break;
+
+       case MMTIMER_GETFREQ: /* frequency in Hz */
+               if (copy_to_user((unsigned long __user *)arg,
+                               &sn_rtc_cycles_per_second,
+                               sizeof(unsigned long)))
+                       ret = -EFAULT;
+               break;
+
+       case MMTIMER_GETBITS: /* number of bits in the clock */
+               ret = hweight64(UVH_RTC_REAL_TIME_CLOCK_MASK);
+               break;
+
+       case MMTIMER_MMAPAVAIL: /* can we mmap the clock into userspace? */
+               ret = (PAGE_SIZE <= (1 << 16)) ? 1 : 0;
+               break;
+
+       case MMTIMER_GETCOUNTER:
+               if (copy_to_user((unsigned long __user *)arg,
+                               (unsigned long *)uv_local_mmr_address(UVH_RTC),
+                               sizeof(unsigned long)))
+                       ret = -EFAULT;
+               break;
+       default:
+               ret = -ENOTTY;
+               break;
+       }
+       return ret;
+}
+
+/**
+ * uv_mmtimer_mmap - maps the clock's registers into userspace
+ * @file: file structure for the device
+ * @vma: VMA to map the registers into
+ *
+ * Calls remap_pfn_range() to map the clock's registers into
+ * the calling process' address space.
+ */
+static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       unsigned long uv_mmtimer_addr;
+
+       if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+               return -EINVAL;
+
+       if (vma->vm_flags & VM_WRITE)
+               return -EPERM;
+
+       if (PAGE_SIZE > (1 << 16))
+               return -ENOSYS;
+
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+       uv_mmtimer_addr = UV_LOCAL_MMR_BASE | UVH_RTC;
+       uv_mmtimer_addr &= ~(PAGE_SIZE - 1);
+       uv_mmtimer_addr &= 0xfffffffffffffffUL;
+
+       if (remap_pfn_range(vma, vma->vm_start, uv_mmtimer_addr >> PAGE_SHIFT,
+                                       PAGE_SIZE, vma->vm_page_prot)) {
+               printk(KERN_ERR "remap_pfn_range failed in uv_mmtimer_mmap\n");
+               return -EAGAIN;
+       }
+
+       return 0;
+}
+
+static struct miscdevice uv_mmtimer_miscdev = {
+       MISC_DYNAMIC_MINOR,
+       UV_MMTIMER_NAME,
+       &uv_mmtimer_fops
+};
+
+
+/**
+ * uv_mmtimer_init - device initialization routine
+ *
+ * Does initial setup for the uv_mmtimer device.
+ */
+static int __init uv_mmtimer_init(void)
+{
+       if (!is_uv_system()) {
+               printk(KERN_ERR "%s: Hardware unsupported\n", UV_MMTIMER_NAME);
+               return -1;
+       }
+
+       /*
+        * Sanity check the cycles/sec variable
+        */
+       if (sn_rtc_cycles_per_second < 100000) {
+               printk(KERN_ERR "%s: unable to determine clock frequency\n",
+                      UV_MMTIMER_NAME);
+               return -1;
+       }
+
+       uv_mmtimer_femtoperiod = ((unsigned long)1E15 +
+                               sn_rtc_cycles_per_second / 2) /
+                               sn_rtc_cycles_per_second;
+
+       if (misc_register(&uv_mmtimer_miscdev)) {
+               printk(KERN_ERR "%s: failed to register device\n",
+                      UV_MMTIMER_NAME);
+               return -1;
+       }
+
+       printk(KERN_INFO "%s: v%s, %ld MHz\n", UV_MMTIMER_DESC,
+               UV_MMTIMER_VERSION,
+               sn_rtc_cycles_per_second/(unsigned long)1E6);
+
+       return 0;
+}
+
+module_init(uv_mmtimer_init);
diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c

index 25b743abfb59442b52ccec0057573716249bdbbf..52e6bb70a490d8bebf26745191bcf80ce5338ca2 100644 (file)
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -28,7 +28,7 @@
  #include <linux/device.h>
  #include <linux/dca.h>
  
-#define DCA_VERSION "1.8"
+#define DCA_VERSION "1.12.1"
  
  MODULE_VERSION(DCA_VERSION);
  MODULE_LICENSE("GPL");
@@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation");
  
  static DEFINE_SPINLOCK(dca_lock);
  
-static LIST_HEAD(dca_providers);
+static LIST_HEAD(dca_domains);
  
-static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+static struct pci_bus *dca_pci_rc_from_dev(struct device *dev)
  {
-       struct dca_provider *dca, *ret = NULL;
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct pci_bus *bus = pdev->bus;
  
-       list_for_each_entry(dca, &dca_providers, node) {
-               if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
-                       ret = dca;
-                       break;
-               }
+       while (bus->parent)
+               bus = bus->parent;
+
+       return bus;
+}
+
+static struct dca_domain *dca_allocate_domain(struct pci_bus *rc)
+{
+       struct dca_domain *domain;
+
+       domain = kzalloc(sizeof(*domain), GFP_NOWAIT);
+       if (!domain)
+               return NULL;
+
+       INIT_LIST_HEAD(&domain->dca_providers);
+       domain->pci_rc = rc;
+
+       return domain;
+}
+
+static void dca_free_domain(struct dca_domain *domain)
+{
+       list_del(&domain->node);
+       kfree(domain);
+}
+
+static struct dca_domain *dca_find_domain(struct pci_bus *rc)
+{
+       struct dca_domain *domain;
+
+       list_for_each_entry(domain, &dca_domains, node)
+               if (domain->pci_rc == rc)
+                       return domain;
+
+       return NULL;
+}
+
+static struct dca_domain *dca_get_domain(struct device *dev)
+{
+       struct pci_bus *rc;
+       struct dca_domain *domain;
+
+       rc = dca_pci_rc_from_dev(dev);
+       domain = dca_find_domain(rc);
+
+       if (!domain) {
+               domain = dca_allocate_domain(rc);
+               if (domain)
+                       list_add(&domain->node, &dca_domains);
+       }
+
+       return domain;
+}
+
+static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+{
+       struct dca_provider *dca;
+       struct pci_bus *rc;
+       struct dca_domain *domain;
+
+       if (dev) {
+               rc = dca_pci_rc_from_dev(dev);
+               domain = dca_find_domain(rc);
+               if (!domain)
+                       return NULL;
+       } else {
+               if (!list_empty(&dca_domains))
+                       domain = list_first_entry(&dca_domains,
+                                                 struct dca_domain,
+                                                 node);
+               else
+                       return NULL;
         }
  
-       return ret;
+       list_for_each_entry(dca, &domain->dca_providers, node)
+               if ((!dev) || (dca->ops->dev_managed(dca, dev)))
+                       return dca;
+
+       return NULL;
  }
  
  /**
@@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev)
         struct dca_provider *dca;
         int err, slot = -ENODEV;
         unsigned long flags;
+       struct pci_bus *pci_rc;
+       struct dca_domain *domain;
  
         if (!dev)
                 return -EFAULT;
@@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev)
                 return -EEXIST;
         }
  
-       list_for_each_entry(dca, &dca_providers, node) {
+       pci_rc = dca_pci_rc_from_dev(dev);
+       domain = dca_find_domain(pci_rc);
+       if (!domain) {
+               spin_unlock_irqrestore(&dca_lock, flags);
+               return -ENODEV;
+       }
+
+       list_for_each_entry(dca, &domain->dca_providers, node) {
                 slot = dca->ops->add_requester(dca, dev);
                 if (slot >= 0)
                         break;
@@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
  {
         int err;
         unsigned long flags;
+       struct dca_domain *domain;
  
         err = dca_sysfs_add_provider(dca, dev);
         if (err)
                 return err;
  
         spin_lock_irqsave(&dca_lock, flags);
-       list_add(&dca->node, &dca_providers);
+       domain = dca_get_domain(dev);
+       if (!domain) {
+               spin_unlock_irqrestore(&dca_lock, flags);
+               return -ENODEV;
+       }
+       list_add(&dca->node, &domain->dca_providers);
         spin_unlock_irqrestore(&dca_lock, flags);
  
         blocking_notifier_call_chain(&dca_provider_chain,
@@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider);
   * unregister_dca_provider - remove a dca provider
   * @dca - struct created by alloc_dca_provider()
   */
-void unregister_dca_provider(struct dca_provider *dca)
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev)
  {
         unsigned long flags;
+       struct pci_bus *pci_rc;
+       struct dca_domain *domain;
  
         blocking_notifier_call_chain(&dca_provider_chain,
                                      DCA_PROVIDER_REMOVE, NULL);
  
         spin_lock_irqsave(&dca_lock, flags);
+
         list_del(&dca->node);
+
+       pci_rc = dca_pci_rc_from_dev(dev);
+       domain = dca_find_domain(pci_rc);
+       if (list_empty(&domain->dca_providers))
+               dca_free_domain(domain);
+
         spin_unlock_irqrestore(&dca_lock, flags);
  
         dca_sysfs_remove_provider(dca);
@@ -276,7 +372,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify);
  
  static int __init dca_init(void)
  {
-       printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION);
+       pr_info("dca service started, version %s\n", DCA_VERSION);
         return dca_sysfs_init();
  }
  
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig

index 81e1020fb5148a75677aec191e974c554031d03f..5903a88351bfdf5b844a45c616aa68419318a3fb 100644 (file)
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -17,11 +17,15 @@ if DMADEVICES
  
  comment "DMA Devices"
  
+config ASYNC_TX_DISABLE_CHANNEL_SWITCH
+       bool
+
  config INTEL_IOATDMA
         tristate "Intel I/OAT DMA support"
         depends on PCI && X86
         select DMA_ENGINE
         select DCA
+       select ASYNC_TX_DISABLE_CHANNEL_SWITCH
         help
           Enable support for the Intel(R) I/OAT DMA engine present
           in recent Intel Xeon chipsets.
@@ -97,6 +101,14 @@ config TXX9_DMAC
           Support the TXx9 SoC internal DMA controller.  This can be
           integrated in chips such as the Toshiba TX4927/38/39.
  
+config SH_DMAE
+       tristate "Renesas SuperH DMAC support"
+       depends on SUPERH && SH_DMA
+       depends on !SH_DMA_API
+       select DMA_ENGINE
+       help
+         Enable support for the Renesas SuperH DMA controllers.
+
  config DMA_ENGINE
         bool
  
@@ -116,7 +128,7 @@ config NET_DMA
  
  config ASYNC_TX_DMA
         bool "Async_tx: Offload support for the async_tx api"
-       depends on DMA_ENGINE && !HIGHMEM64G
+       depends on DMA_ENGINE
         help
           This allows the async_tx api to take advantage of offload engines for
           memcpy, memset, xor, and raid6 p+q operations.  If your platform has
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile

index 40e1e008357192b1aced8b1abab5a86460c78352..eca71ba78ae9716234b46c30788f350ded01a692 100644 (file)
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -1,8 +1,7 @@
  obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
  obj-$(CONFIG_NET_DMA) += iovlock.o
  obj-$(CONFIG_DMATEST) += dmatest.o
-obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
-ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
+obj-$(CONFIG_INTEL_IOATDMA) += ioat/
  obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
  obj-$(CONFIG_FSL_DMA) += fsldma.o
  obj-$(CONFIG_MV_XOR) += mv_xor.o
@@ -10,3 +9,4 @@ obj-$(CONFIG_DW_DMAC) += dw_dmac.o
  obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
  obj-$(CONFIG_MX3_IPU) += ipu/
  obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
+obj-$(CONFIG_SH_DMAE) += shdma.o
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c

index c8522e6f1ad2d0aef5722e740b3bbce22ca3986b..7585c4164bd5f1d28d300e923969b797c5d9bd99 100644 (file)
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -87,6 +87,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan,
         desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys);
         if (desc) {
                 memset(desc, 0, sizeof(struct at_desc));
+               INIT_LIST_HEAD(&desc->tx_list);
                 dma_async_tx_descriptor_init(&desc->txd, chan);
                 /* txd.flags will be overwritten in prep functions */
                 desc->txd.flags = DMA_CTRL_ACK;
@@ -150,11 +151,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc)
                 struct at_desc *child;
  
                 spin_lock_bh(&atchan->lock);
-               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               list_for_each_entry(child, &desc->tx_list, desc_node)
                         dev_vdbg(chan2dev(&atchan->chan_common),
                                         "moving child desc %p to freelist\n",
                                         child);
-               list_splice_init(&desc->txd.tx_list, &atchan->free_list);
+               list_splice_init(&desc->tx_list, &atchan->free_list);
                 dev_vdbg(chan2dev(&atchan->chan_common),
                          "moving desc %p to freelist\n", desc);
                 list_add(&desc->desc_node, &atchan->free_list);
@@ -247,30 +248,33 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
         param = txd->callback_param;
  
         /* move children to free_list */
-       list_splice_init(&txd->tx_list, &atchan->free_list);
+       list_splice_init(&desc->tx_list, &atchan->free_list);
         /* move myself to free_list */
         list_move(&desc->desc_node, &atchan->free_list);
  
         /* unmap dma addresses */
-       if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                       dma_unmap_single(chan2parent(&atchan->chan_common),
-                                       desc->lli.daddr,
-                                       desc->len, DMA_FROM_DEVICE);
-               else
-                       dma_unmap_page(chan2parent(&atchan->chan_common),
-                                       desc->lli.daddr,
-                                       desc->len, DMA_FROM_DEVICE);
-       }
-       if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                       dma_unmap_single(chan2parent(&atchan->chan_common),
-                                       desc->lli.saddr,
-                                       desc->len, DMA_TO_DEVICE);
-               else
-                       dma_unmap_page(chan2parent(&atchan->chan_common),
-                                       desc->lli.saddr,
-                                       desc->len, DMA_TO_DEVICE);
+       if (!atchan->chan_common.private) {
+               struct device *parent = chan2parent(&atchan->chan_common);
+               if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+                       if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+                               dma_unmap_single(parent,
+                                               desc->lli.daddr,
+                                               desc->len, DMA_FROM_DEVICE);
+                       else
+                               dma_unmap_page(parent,
+                                               desc->lli.daddr,
+                                               desc->len, DMA_FROM_DEVICE);
+               }
+               if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+                       if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+                               dma_unmap_single(parent,
+                                               desc->lli.saddr,
+                                               desc->len, DMA_TO_DEVICE);
+                       else
+                               dma_unmap_page(parent,
+                                               desc->lli.saddr,
+                                               desc->len, DMA_TO_DEVICE);
+               }
         }
  
         /*
@@ -334,7 +338,7 @@ static void atc_cleanup_descriptors(struct at_dma_chan *atchan)
                         /* This one is currently in progress */
                         return;
  
-               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               list_for_each_entry(child, &desc->tx_list, desc_node)
                         if (!(child->lli.ctrla & ATC_DONE))
                                 /* Currently in progress */
                                 return;
@@ -407,7 +411,7 @@ static void atc_handle_error(struct at_dma_chan *atchan)
         dev_crit(chan2dev(&atchan->chan_common),
                         "  cookie: %d\n", bad_desc->txd.cookie);
         atc_dump_lli(atchan, &bad_desc->lli);
-       list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+       list_for_each_entry(child, &bad_desc->tx_list, desc_node)
                 atc_dump_lli(atchan, &child->lli);
  
         /* Pretend the descriptor completed successfully */
@@ -587,7 +591,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
                         prev->lli.dscr = desc->txd.phys;
                         /* insert the link descriptor to the LD ring */
                         list_add_tail(&desc->desc_node,
-                                       &first->txd.tx_list);
+                                       &first->tx_list);
                 }
                 prev = desc;
         }
@@ -646,8 +650,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
  
         reg_width = atslave->reg_width;
  
-       sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
-
         ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla;
         ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN;
  
@@ -687,7 +689,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                                 prev->lli.dscr = desc->txd.phys;
                                 /* insert the link descriptor to the LD ring */
                                 list_add_tail(&desc->desc_node,
-                                               &first->txd.tx_list);
+                                               &first->tx_list);
                         }
                         prev = desc;
                         total_len += len;
@@ -729,7 +731,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                                 prev->lli.dscr = desc->txd.phys;
                                 /* insert the link descriptor to the LD ring */
                                 list_add_tail(&desc->desc_node,
-                                               &first->txd.tx_list);
+                                               &first->tx_list);
                         }
                         prev = desc;
                         total_len += len;
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h

index 4c972afc49ec32db97ddd52059c9e27961f20f98..495457e3dc4b6133dc6c846392961db3a461aaf9 100644 (file)
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h
@@ -165,6 +165,7 @@ struct at_desc {
         struct at_lli                   lli;
  
         /* THEN values for driver housekeeping */
+       struct list_head                tx_list;
         struct dma_async_tx_descriptor  txd;
         struct list_head                desc_node;
         size_t                          len;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c

index 5a87384ea4ff5cf58c77b64200a621b8fe616b76..bd0b248de2cfabc28f1fecd63e5d64ed61236844 100644 (file)
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -608,6 +608,40 @@ void dmaengine_put(void)
  }
  EXPORT_SYMBOL(dmaengine_put);
  
+static bool device_has_all_tx_types(struct dma_device *device)
+{
+       /* A device that satisfies this test has channels that will never cause
+        * an async_tx channel switch event as all possible operation types can
+        * be handled.
+        */
+       #ifdef CONFIG_ASYNC_TX_DMA
+       if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+               return false;
+       #endif
+
+       #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
+       if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
+               return false;
+       #endif
+
+       #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
+       if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
+               return false;
+       #endif
+
+       #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
+       if (!dma_has_cap(DMA_XOR, device->cap_mask))
+               return false;
+       #endif
+
+       #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
+       if (!dma_has_cap(DMA_PQ, device->cap_mask))
+               return false;
+       #endif
+
+       return true;
+}
+
  static int get_dma_id(struct dma_device *device)
  {
         int rc;
@@ -644,8 +678,12 @@ int dma_async_device_register(struct dma_device *device)
                 !device->device_prep_dma_memcpy);
         BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
                 !device->device_prep_dma_xor);
-       BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
-               !device->device_prep_dma_zero_sum);
+       BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
+               !device->device_prep_dma_xor_val);
+       BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
+               !device->device_prep_dma_pq);
+       BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
+               !device->device_prep_dma_pq_val);
         BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
                 !device->device_prep_dma_memset);
         BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
@@ -661,6 +699,12 @@ int dma_async_device_register(struct dma_device *device)
         BUG_ON(!device->device_issue_pending);
         BUG_ON(!device->dev);
  
+       /* note: this only matters in the
+        * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
+        */
+       if (device_has_all_tx_types(device))
+               dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
+
         idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
         if (!idr_ref)
                 return -ENOMEM;
@@ -933,55 +977,29 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
  {
         tx->chan = chan;
         spin_lock_init(&tx->lock);
-       INIT_LIST_HEAD(&tx->tx_list);
  }
  EXPORT_SYMBOL(dma_async_tx_descriptor_init);
  
  /* dma_wait_for_async_tx - spin wait for a transaction to complete
   * @tx: in-flight transaction to wait on
- *
- * This routine assumes that tx was obtained from a call to async_memcpy,
- * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
- * and submitted).  Walking the parent chain is only meant to cover for DMA
- * drivers that do not implement the DMA_INTERRUPT capability and may race with
- * the driver's descriptor cleanup routine.
   */
  enum dma_status
  dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
  {
-       enum dma_status status;
-       struct dma_async_tx_descriptor *iter;
-       struct dma_async_tx_descriptor *parent;
+       unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
  
         if (!tx)
                 return DMA_SUCCESS;
  
-       WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for"
-                 " %s\n", __func__, dma_chan_name(tx->chan));
-
-       /* poll through the dependency chain, return when tx is complete */
-       do {
-               iter = tx;
-
-               /* find the root of the unsubmitted dependency chain */
-               do {
-                       parent = iter->parent;
-                       if (!parent)
-                               break;
-                       else
-                               iter = parent;
-               } while (parent);
-
-               /* there is a small window for ->parent == NULL and
-                * ->cookie == -EBUSY
-                */
-               while (iter->cookie == -EBUSY)
-                       cpu_relax();
-
-               status = dma_sync_wait(iter->chan, iter->cookie);
-       } while (status == DMA_IN_PROGRESS || (iter != tx));
-
-       return status;
+       while (tx->cookie == -EBUSY) {
+               if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+                       pr_err("%s timeout waiting for descriptor submission\n",
+                               __func__);
+                       return DMA_ERROR;
+               }
+               cpu_relax();
+       }
+       return dma_sync_wait(tx->chan, tx->cookie);
  }
  EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
  
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c

index d93017fc7872d1f0fe3904ff85870ff8aee1156a..a32a4cf7b1e049ab537db5c065f4f6a474dca2d2 100644 (file)
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -48,6 +48,11 @@ module_param(xor_sources, uint, S_IRUGO);
  MODULE_PARM_DESC(xor_sources,
                 "Number of xor source buffers (default: 3)");
  
+static unsigned int pq_sources = 3;
+module_param(pq_sources, uint, S_IRUGO);
+MODULE_PARM_DESC(pq_sources,
+               "Number of p+q source buffers (default: 3)");
+
  /*
   * Initialization patterns. All bytes in the source buffer has bit 7
   * set, all bytes in the destination buffer has bit 7 cleared.
@@ -232,6 +237,7 @@ static int dmatest_func(void *data)
         dma_cookie_t            cookie;
         enum dma_status         status;
         enum dma_ctrl_flags     flags;
+       u8                      pq_coefs[pq_sources];
         int                     ret;
         int                     src_cnt;
         int                     dst_cnt;
@@ -248,6 +254,11 @@ static int dmatest_func(void *data)
         else if (thread->type == DMA_XOR) {
                 src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
                 dst_cnt = 1;
+       } else if (thread->type == DMA_PQ) {
+               src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
+               dst_cnt = 2;
+               for (i = 0; i < pq_sources; i++)
+                       pq_coefs[i] = 1;
         } else
                 goto err_srcs;
  
@@ -283,6 +294,7 @@ static int dmatest_func(void *data)
                 dma_addr_t dma_dsts[dst_cnt];
                 struct completion cmp;
                 unsigned long tmo = msecs_to_jiffies(3000);
+               u8 align = 0;
  
                 total_tests++;
  
@@ -290,6 +302,18 @@ static int dmatest_func(void *data)
                 src_off = dmatest_random() % (test_buf_size - len + 1);
                 dst_off = dmatest_random() % (test_buf_size - len + 1);
  
+               /* honor alignment restrictions */
+               if (thread->type == DMA_MEMCPY)
+                       align = dev->copy_align;
+               else if (thread->type == DMA_XOR)
+                       align = dev->xor_align;
+               else if (thread->type == DMA_PQ)
+                       align = dev->pq_align;
+
+               len = (len >> align) << align;
+               src_off = (src_off >> align) << align;
+               dst_off = (dst_off >> align) << align;
+
                 dmatest_init_srcs(thread->srcs, src_off, len);
                 dmatest_init_dsts(thread->dsts, dst_off, len);
  
@@ -306,6 +330,7 @@ static int dmatest_func(void *data)
                                                      DMA_BIDIRECTIONAL);
                 }
  
+
                 if (thread->type == DMA_MEMCPY)
                         tx = dev->device_prep_dma_memcpy(chan,
                                                          dma_dsts[0] + dst_off,
@@ -316,6 +341,15 @@ static int dmatest_func(void *data)
                                                       dma_dsts[0] + dst_off,
                                                       dma_srcs, xor_sources,
                                                       len, flags);
+               else if (thread->type == DMA_PQ) {
+                       dma_addr_t dma_pq[dst_cnt];
+
+                       for (i = 0; i < dst_cnt; i++)
+                               dma_pq[i] = dma_dsts[i] + dst_off;
+                       tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
+                                                    pq_sources, pq_coefs,
+                                                    len, flags);
+               }
  
                 if (!tx) {
                         for (i = 0; i < src_cnt; i++)
@@ -459,6 +493,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty
                 op = "copy";
         else if (type == DMA_XOR)
                 op = "xor";
+       else if (type == DMA_PQ)
+               op = "pq";
         else
                 return -EINVAL;
  
@@ -514,6 +550,10 @@ static int dmatest_add_channel(struct dma_chan *chan)
                 cnt = dmatest_add_threads(dtc, DMA_XOR);
                 thread_count += cnt > 0 ? cnt : 0;
         }
+       if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+               cnt = dmatest_add_threads(dtc, DMA_PQ);
+               thread_count += cnt > 0 ?: 0;
+       }
  
         pr_info("dmatest: Started %u threads using %s\n",
                 thread_count, dma_chan_name(chan));
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c

index 933c143b6a740d8030d0d5318de4232e3d71d1ee..2eea823516a7aa9797f7908074c4ec68bd564b81 100644 (file)
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -116,7 +116,7 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
  {
         struct dw_desc  *child;
  
-       list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+       list_for_each_entry(child, &desc->tx_list, desc_node)
                 dma_sync_single_for_cpu(chan2parent(&dwc->chan),
                                 child->txd.phys, sizeof(child->lli),
                                 DMA_TO_DEVICE);
@@ -137,11 +137,11 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
                 dwc_sync_desc_for_cpu(dwc, desc);
  
                 spin_lock_bh(&dwc->lock);
-               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               list_for_each_entry(child, &desc->tx_list, desc_node)
                         dev_vdbg(chan2dev(&dwc->chan),
                                         "moving child desc %p to freelist\n",
                                         child);
-               list_splice_init(&desc->txd.tx_list, &dwc->free_list);
+               list_splice_init(&desc->tx_list, &dwc->free_list);
                 dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
                 list_add(&desc->desc_node, &dwc->free_list);
                 spin_unlock_bh(&dwc->lock);
@@ -209,19 +209,28 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
         param = txd->callback_param;
  
         dwc_sync_desc_for_cpu(dwc, desc);
-       list_splice_init(&txd->tx_list, &dwc->free_list);
+       list_splice_init(&desc->tx_list, &dwc->free_list);
         list_move(&desc->desc_node, &dwc->free_list);
  
-       /*
-        * We use dma_unmap_page() regardless of how the buffers were
-        * mapped before they were submitted...
-        */
-       if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP))
-               dma_unmap_page(chan2parent(&dwc->chan), desc->lli.dar,
-                              desc->len, DMA_FROM_DEVICE);
-       if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
-               dma_unmap_page(chan2parent(&dwc->chan), desc->lli.sar,
-                              desc->len, DMA_TO_DEVICE);
+       if (!dwc->chan.private) {
+               struct device *parent = chan2parent(&dwc->chan);
+               if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+                       if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+                               dma_unmap_single(parent, desc->lli.dar,
+                                               desc->len, DMA_FROM_DEVICE);
+                       else
+                               dma_unmap_page(parent, desc->lli.dar,
+                                               desc->len, DMA_FROM_DEVICE);
+               }
+               if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+                       if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+                               dma_unmap_single(parent, desc->lli.sar,
+                                               desc->len, DMA_TO_DEVICE);
+                       else
+                               dma_unmap_page(parent, desc->lli.sar,
+                                               desc->len, DMA_TO_DEVICE);
+               }
+       }
  
         /*
          * The API requires that no submissions are done from a
@@ -289,7 +298,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
                         /* This one is currently in progress */
                         return;
  
-               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               list_for_each_entry(child, &desc->tx_list, desc_node)
                         if (child->lli.llp == llp)
                                 /* Currently in progress */
                                 return;
@@ -356,7 +365,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
         dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
                         "  cookie: %d\n", bad_desc->txd.cookie);
         dwc_dump_lli(dwc, &bad_desc->lli);
-       list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+       list_for_each_entry(child, &bad_desc->tx_list, desc_node)
                 dwc_dump_lli(dwc, &child->lli);
  
         /* Pretend the descriptor completed successfully */
@@ -608,7 +617,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
                                         prev->txd.phys, sizeof(prev->lli),
                                         DMA_TO_DEVICE);
                         list_add_tail(&desc->desc_node,
-                                       &first->txd.tx_list);
+                                       &first->tx_list);
                 }
                 prev = desc;
         }
@@ -658,8 +667,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
         reg_width = dws->reg_width;
         prev = first = NULL;
  
-       sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
-
         switch (direction) {
         case DMA_TO_DEVICE:
                 ctllo = (DWC_DEFAULT_CTLLO
@@ -700,7 +707,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                                                 sizeof(prev->lli),
                                                 DMA_TO_DEVICE);
                                 list_add_tail(&desc->desc_node,
-                                               &first->txd.tx_list);
+                                               &first->tx_list);
                         }
                         prev = desc;
                         total_len += len;
@@ -746,7 +753,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                                                 sizeof(prev->lli),
                                                 DMA_TO_DEVICE);
                                 list_add_tail(&desc->desc_node,
-                                               &first->txd.tx_list);
+                                               &first->tx_list);
                         }
                         prev = desc;
                         total_len += len;
@@ -902,6 +909,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
                         break;
                 }
  
+               INIT_LIST_HEAD(&desc->tx_list);
                 dma_async_tx_descriptor_init(&desc->txd, chan);
                 desc->txd.tx_submit = dwc_tx_submit;
                 desc->txd.flags = DMA_CTRL_ACK;
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h

index 13a580767031a3aac04046ffd563ef1d83f053b0..d9a939f67f461ffa7b5deffd0f21d35bd7678864 100644 (file)
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -217,6 +217,7 @@ struct dw_desc {
  
         /* THEN values for driver housekeeping */
         struct list_head                desc_node;
+       struct list_head                tx_list;
         struct dma_async_tx_descriptor  txd;
         size_t                          len;
  };
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c

index ef87a89841450e0f35f1a35f4753b0207abc0300..296f9e747fac3b920cc344e6862f6d20d7c4b6b8 100644 (file)
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -34,6 +34,7 @@
  #include <linux/dmapool.h>
  #include <linux/of_platform.h>
  
+#include <asm/fsldma.h>
  #include "fsldma.h"
  
  static void dma_init(struct fsl_dma_chan *fsl_chan)
@@ -280,28 +281,40 @@ static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size)
  }
  
  /**
- * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * fsl_chan_set_request_count - Set DMA Request Count for external control
   * @fsl_chan : Freescale DMA channel
- * @size     : Pause control size, 0 for disable external pause control.
- *             The maximum is 1024.
+ * @size     : Number of bytes to transfer in a single request
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA request count is how many bytes are allowed to transfer before
+ * pausing the channel, after which a new assertion of DREQ# resumes channel
+ * operation.
   *
- * The Freescale DMA channel can be controlled by the external
- * signal DREQ#. The pause control size is how many bytes are allowed
- * to transfer before pausing the channel, after which a new assertion
- * of DREQ# resumes channel operation.
+ * A size of 0 disables external pause control. The maximum size is 1024.
   */
-static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size)
+static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size)
  {
-       if (size > 1024)
-               return;
+       BUG_ON(size > 1024);
+       DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+               DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+                       | ((__ilog2(size) << 24) & 0x0f000000),
+               32);
+}
  
-       if (size) {
-               DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
-                       DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
-                               | ((__ilog2(size) << 24) & 0x0f000000),
-                       32);
+/**
+ * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * @fsl_chan : Freescale DMA channel
+ * @enable   : 0 is disabled, 1 is enabled.
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA Request Count feature should be used in addition to this feature
+ * to set the number of bytes to transfer before pausing the channel.
+ */
+static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable)
+{
+       if (enable)
                 fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
-       } else
+       else
                 fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
  }
  
@@ -326,7 +339,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable)
  static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
  {
         struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
-       struct fsl_desc_sw *desc;
+       struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
+       struct fsl_desc_sw *child;
         unsigned long flags;
         dma_cookie_t cookie;
  
@@ -334,7 +348,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
         spin_lock_irqsave(&fsl_chan->desc_lock, flags);
  
         cookie = fsl_chan->common.cookie;
-       list_for_each_entry(desc, &tx->tx_list, node) {
+       list_for_each_entry(child, &desc->tx_list, node) {
                 cookie++;
                 if (cookie < 0)
                         cookie = 1;
@@ -343,8 +357,8 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
         }
  
         fsl_chan->common.cookie = cookie;
-       append_ld_queue(fsl_chan, tx_to_fsl_desc(tx));
-       list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev);
+       append_ld_queue(fsl_chan, desc);
+       list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev);
  
         spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
  
@@ -366,6 +380,7 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
         desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc);
         if (desc_sw) {
                 memset(desc_sw, 0, sizeof(struct fsl_desc_sw));
+               INIT_LIST_HEAD(&desc_sw->tx_list);
                 dma_async_tx_descriptor_init(&desc_sw->async_tx,
                                                 &fsl_chan->common);
                 desc_sw->async_tx.tx_submit = fsl_dma_tx_submit;
@@ -455,7 +470,7 @@ fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags)
         new->async_tx.flags = flags;
  
         /* Insert the link descriptor to the LD ring */
-       list_add_tail(&new->node, &new->async_tx.tx_list);
+       list_add_tail(&new->node, &new->tx_list);
  
         /* Set End-of-link to the last link descriptor of new list*/
         set_ld_eol(fsl_chan, new);
@@ -513,7 +528,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
                 dma_dest += copy;
  
                 /* Insert the link descriptor to the LD ring */
-               list_add_tail(&new->node, &first->async_tx.tx_list);
+               list_add_tail(&new->node, &first->tx_list);
         } while (len);
  
         new->async_tx.flags = flags; /* client is in control of this ack */
@@ -528,7 +543,7 @@ fail:
         if (!first)
                 return NULL;
  
-       list = &first->async_tx.tx_list;
+       list = &first->tx_list;
         list_for_each_entry_safe_reverse(new, prev, list, node) {
                 list_del(&new->node);
                 dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
@@ -537,6 +552,229 @@ fail:
         return NULL;
  }
  
+/**
+ * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @chan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: DMAEngine flags
+ *
+ * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the
+ * DMA_SLAVE API, this gets the device-specific information from the
+ * chan->private variable.
+ */
+static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
+       struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+       enum dma_data_direction direction, unsigned long flags)
+{
+       struct fsl_dma_chan *fsl_chan;
+       struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+       struct fsl_dma_slave *slave;
+       struct list_head *tx_list;
+       size_t copy;
+
+       int i;
+       struct scatterlist *sg;
+       size_t sg_used;
+       size_t hw_used;
+       struct fsl_dma_hw_addr *hw;
+       dma_addr_t dma_dst, dma_src;
+
+       if (!chan)
+               return NULL;
+
+       if (!chan->private)
+               return NULL;
+
+       fsl_chan = to_fsl_chan(chan);
+       slave = chan->private;
+
+       if (list_empty(&slave->addresses))
+               return NULL;
+
+       hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry);
+       hw_used = 0;
+
+       /*
+        * Build the hardware transaction to copy from the scatterlist to
+        * the hardware, or from the hardware to the scatterlist
+        *
+        * If you are copying from the hardware to the scatterlist and it
+        * takes two hardware entries to fill an entire page, then both
+        * hardware entries will be coalesced into the same page
+        *
+        * If you are copying from the scatterlist to the hardware and a
+        * single page can fill two hardware entries, then the data will
+        * be read out of the page into the first hardware entry, and so on
+        */
+       for_each_sg(sgl, sg, sg_len, i) {
+               sg_used = 0;
+
+               /* Loop until the entire scatterlist entry is used */
+               while (sg_used < sg_dma_len(sg)) {
+
+                       /*
+                        * If we've used up the current hardware address/length
+                        * pair, we need to load a new one
+                        *
+                        * This is done in a while loop so that descriptors with
+                        * length == 0 will be skipped
+                        */
+                       while (hw_used >= hw->length) {
+
+                               /*
+                                * If the current hardware entry is the last
+                                * entry in the list, we're finished
+                                */
+                               if (list_is_last(&hw->entry, &slave->addresses))
+                                       goto finished;
+
+                               /* Get the next hardware address/length pair */
+                               hw = list_entry(hw->entry.next,
+                                               struct fsl_dma_hw_addr, entry);
+                               hw_used = 0;
+                       }
+
+                       /* Allocate the link descriptor from DMA pool */
+                       new = fsl_dma_alloc_descriptor(fsl_chan);
+                       if (!new) {
+                               dev_err(fsl_chan->dev, "No free memory for "
+                                                      "link descriptor\n");
+                               goto fail;
+                       }
+#ifdef FSL_DMA_LD_DEBUG
+                       dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
+#endif
+
+                       /*
+                        * Calculate the maximum number of bytes to transfer,
+                        * making sure it is less than the DMA controller limit
+                        */
+                       copy = min_t(size_t, sg_dma_len(sg) - sg_used,
+                                            hw->length - hw_used);
+                       copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT);
+
+                       /*
+                        * DMA_FROM_DEVICE
+                        * from the hardware to the scatterlist
+                        *
+                        * DMA_TO_DEVICE
+                        * from the scatterlist to the hardware
+                        */
+                       if (direction == DMA_FROM_DEVICE) {
+                               dma_src = hw->address + hw_used;
+                               dma_dst = sg_dma_address(sg) + sg_used;
+                       } else {
+                               dma_src = sg_dma_address(sg) + sg_used;
+                               dma_dst = hw->address + hw_used;
+                       }
+
+                       /* Fill in the descriptor */
+                       set_desc_cnt(fsl_chan, &new->hw, copy);
+                       set_desc_src(fsl_chan, &new->hw, dma_src);
+                       set_desc_dest(fsl_chan, &new->hw, dma_dst);
+
+                       /*
+                        * If this is not the first descriptor, chain the
+                        * current descriptor after the previous descriptor
+                        */
+                       if (!first) {
+                               first = new;
+                       } else {
+                               set_desc_next(fsl_chan, &prev->hw,
+                                             new->async_tx.phys);
+                       }
+
+                       new->async_tx.cookie = 0;
+                       async_tx_ack(&new->async_tx);
+
+                       prev = new;
+                       sg_used += copy;
+                       hw_used += copy;
+
+                       /* Insert the link descriptor into the LD ring */
+                       list_add_tail(&new->node, &first->tx_list);
+               }
+       }
+
+finished:
+
+       /* All of the hardware address/length pairs had length == 0 */
+       if (!first || !new)
+               return NULL;
+
+       new->async_tx.flags = flags;
+       new->async_tx.cookie = -EBUSY;
+
+       /* Set End-of-link to the last link descriptor of new list */
+       set_ld_eol(fsl_chan, new);
+
+       /* Enable extra controller features */
+       if (fsl_chan->set_src_loop_size)
+               fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size);
+
+       if (fsl_chan->set_dest_loop_size)
+               fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size);
+
+       if (fsl_chan->toggle_ext_start)
+               fsl_chan->toggle_ext_start(fsl_chan, slave->external_start);
+
+       if (fsl_chan->toggle_ext_pause)
+               fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause);
+
+       if (fsl_chan->set_request_count)
+               fsl_chan->set_request_count(fsl_chan, slave->request_count);
+
+       return &first->async_tx;
+
+fail:
+       /* If first was not set, then we failed to allocate the very first
+        * descriptor, and we're done */
+       if (!first)
+               return NULL;
+
+       /*
+        * First is set, so all of the descriptors we allocated have been added
+        * to first->tx_list, INCLUDING "first" itself. Therefore we
+        * must traverse the list backwards freeing each descriptor in turn
+        *
+        * We're re-using variables for the loop, oh well
+        */
+       tx_list = &first->tx_list;
+       list_for_each_entry_safe_reverse(new, prev, tx_list, node) {
+               list_del_init(&new->node);
+               dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
+       }
+
+       return NULL;
+}
+
+static void fsl_dma_device_terminate_all(struct dma_chan *chan)
+{
+       struct fsl_dma_chan *fsl_chan;
+       struct fsl_desc_sw *desc, *tmp;
+       unsigned long flags;
+
+       if (!chan)
+               return;
+
+       fsl_chan = to_fsl_chan(chan);
+
+       /* Halt the DMA engine */
+       dma_halt(fsl_chan);
+
+       spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+       /* Remove and free all of the descriptors in the LD queue */
+       list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) {
+               list_del(&desc->node);
+               dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
+       }
+
+       spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+}
+
  /**
   * fsl_dma_update_completed_cookie - Update the completed cookie.
   * @fsl_chan : Freescale DMA channel
@@ -883,6 +1121,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
                 new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start;
                 new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size;
                 new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size;
+               new_fsl_chan->set_request_count = fsl_chan_set_request_count;
         }
  
         spin_lock_init(&new_fsl_chan->desc_lock);
@@ -962,12 +1201,15 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
  
         dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
         dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
+       dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
         fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
         fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
         fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
         fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
         fdev->common.device_is_tx_complete = fsl_dma_is_complete;
         fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
+       fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;
+       fdev->common.device_terminate_all = fsl_dma_device_terminate_all;
         fdev->common.dev = &dev->dev;
  
         fdev->irq = irq_of_parse_and_map(dev->node, 0);
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h

index dc7f26865797cc12ef6610296a83919107c5ad84..0df14cbb8ca335d4ab5b2e444e78a3f42c22709b 100644 (file)
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -90,6 +90,7 @@ struct fsl_dma_ld_hw {
  struct fsl_desc_sw {
         struct fsl_dma_ld_hw hw;
         struct list_head node;
+       struct list_head tx_list;
         struct dma_async_tx_descriptor async_tx;
         struct list_head *ld;
         void *priv;
@@ -143,10 +144,11 @@ struct fsl_dma_chan {
         struct tasklet_struct tasklet;
         u32 feature;
  
-       void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size);
+       void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable);
         void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable);
         void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
         void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
+       void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size);
  };
  
  #define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common)
diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c

deleted file mode 100644 (file)

index 2225bb6..0000000
--- a/drivers/dma/ioat.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2007 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-/*
- * This driver supports an Intel I/OAT DMA engine, which does asynchronous
- * copy operations.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/dca.h>
-#include "ioatdma.h"
-#include "ioatdma_registers.h"
-#include "ioatdma_hw.h"
-
-MODULE_VERSION(IOAT_DMA_VERSION);
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Intel Corporation");
-
-static struct pci_device_id ioat_pci_tbl[] = {
-       /* I/OAT v1 platforms */
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB)  },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
-       { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
-
-       /* I/OAT v2 platforms */
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
-
-       /* I/OAT v3 platforms */
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
-       { 0, }
-};
-
-struct ioat_device {
-       struct pci_dev          *pdev;
-       void __iomem            *iobase;
-       struct ioatdma_device   *dma;
-       struct dca_provider     *dca;
-};
-
-static int __devinit ioat_probe(struct pci_dev *pdev,
-                               const struct pci_device_id *id);
-static void __devexit ioat_remove(struct pci_dev *pdev);
-
-static int ioat_dca_enabled = 1;
-module_param(ioat_dca_enabled, int, 0644);
-MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
-
-static struct pci_driver ioat_pci_driver = {
-       .name           = "ioatdma",
-       .id_table       = ioat_pci_tbl,
-       .probe          = ioat_probe,
-       .remove         = __devexit_p(ioat_remove),
-};
-
-static int __devinit ioat_probe(struct pci_dev *pdev,
-                               const struct pci_device_id *id)
-{
-       void __iomem *iobase;
-       struct ioat_device *device;
-       unsigned long mmio_start, mmio_len;
-       int err;
-
-       err = pci_enable_device(pdev);
-       if (err)
-               goto err_enable_device;
-
-       err = pci_request_regions(pdev, ioat_pci_driver.name);
-       if (err)
-               goto err_request_regions;
-
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (err)
-               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-       if (err)
-               goto err_set_dma_mask;
-
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (err)
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-       if (err)
-               goto err_set_dma_mask;
-
-       mmio_start = pci_resource_start(pdev, 0);
-       mmio_len = pci_resource_len(pdev, 0);
-       iobase = ioremap(mmio_start, mmio_len);
-       if (!iobase) {
-               err = -ENOMEM;
-               goto err_ioremap;
-       }
-
-       device = kzalloc(sizeof(*device), GFP_KERNEL);
-       if (!device) {
-               err = -ENOMEM;
-               goto err_kzalloc;
-       }
-       device->pdev = pdev;
-       pci_set_drvdata(pdev, device);
-       device->iobase = iobase;
-
-       pci_set_master(pdev);
-
-       switch (readb(iobase + IOAT_VER_OFFSET)) {
-       case IOAT_VER_1_2:
-               device->dma = ioat_dma_probe(pdev, iobase);
-               if (device->dma && ioat_dca_enabled)
-                       device->dca = ioat_dca_init(pdev, iobase);
-               break;
-       case IOAT_VER_2_0:
-               device->dma = ioat_dma_probe(pdev, iobase);
-               if (device->dma && ioat_dca_enabled)
-                       device->dca = ioat2_dca_init(pdev, iobase);
-               break;
-       case IOAT_VER_3_0:
-               device->dma = ioat_dma_probe(pdev, iobase);
-               if (device->dma && ioat_dca_enabled)
-                       device->dca = ioat3_dca_init(pdev, iobase);
-               break;
-       default:
-               err = -ENODEV;
-               break;
-       }
-       if (!device->dma)
-               err = -ENODEV;
-
-       if (err)
-               goto err_version;
-
-       return 0;
-
-err_version:
-       kfree(device);
-err_kzalloc:
-       iounmap(iobase);
-err_ioremap:
-err_set_dma_mask:
-       pci_release_regions(pdev);
-       pci_disable_device(pdev);
-err_request_regions:
-err_enable_device:
-       return err;
-}
-
-static void __devexit ioat_remove(struct pci_dev *pdev)
-{
-       struct ioat_device *device = pci_get_drvdata(pdev);
-
-       dev_err(&pdev->dev, "Removing dma and dca services\n");
-       if (device->dca) {
-               unregister_dca_provider(device->dca);
-               free_dca_provider(device->dca);
-               device->dca = NULL;
-       }
-
-       if (device->dma) {
-               ioat_dma_remove(device->dma);
-               device->dma = NULL;
-       }
-
-       kfree(device);
-}
-
-static int __init ioat_init_module(void)
-{
-       return pci_register_driver(&ioat_pci_driver);
-}
-module_init(ioat_init_module);
-
-static void __exit ioat_exit_module(void)
-{
-       pci_unregister_driver(&ioat_pci_driver);
-}
-module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile

new file mode 100644 (file)

index 0000000..8997d3f
--- /dev/null
+++ b/drivers/dma/ioat/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
+ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c

new file mode 100644 (file)

index 0000000..69d0261
--- /dev/null
+++ b/drivers/dma/ioat/dca.c
@@ -0,0 +1,684 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+
+/* either a kernel change is needed, or we need something like this in kernel */
+#ifndef CONFIG_SMP
+#include <asm/smp.h>
+#undef cpu_physical_id
+#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
+#endif
+
+#include "dma.h"
+#include "registers.h"
+
+/*
+ * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
+ * contain the bit number of the APIC ID to map into the DCA tag.  If the valid
+ * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
+ */
+#define DCA_TAG_MAP_VALID 0x80
+
+#define DCA3_TAG_MAP_BIT_TO_INV 0x80
+#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
+#define DCA3_TAG_MAP_LITERAL_VAL 0x1
+
+#define DCA_TAG_MAP_MASK 0xDF
+
+/* expected tag map bytes for I/OAT ver.2 */
+#define DCA2_TAG_MAP_BYTE0 0x80
+#define DCA2_TAG_MAP_BYTE1 0x0
+#define DCA2_TAG_MAP_BYTE2 0x81
+#define DCA2_TAG_MAP_BYTE3 0x82
+#define DCA2_TAG_MAP_BYTE4 0x82
+
+/* verify if tag map matches expected values */
+static inline int dca2_tag_map_valid(u8 *tag_map)
+{
+       return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) &&
+               (tag_map[1] == DCA2_TAG_MAP_BYTE1) &&
+               (tag_map[2] == DCA2_TAG_MAP_BYTE2) &&
+               (tag_map[3] == DCA2_TAG_MAP_BYTE3) &&
+               (tag_map[4] == DCA2_TAG_MAP_BYTE4));
+}
+
+/*
+ * "Legacy" DCA systems do not implement the DCA register set in the
+ * I/OAT device.  Software needs direct support for their tag mappings.
+ */
+
+#define APICID_BIT(x)          (DCA_TAG_MAP_VALID | (x))
+#define IOAT_TAG_MAP_LEN       8
+
+static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
+       1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
+       1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
+       1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
+static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
+
+/* pack PCI B/D/F into a u16 */
+static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
+{
+       return (pci->bus->number << 8) | pci->devfn;
+}
+
+static int dca_enabled_in_bios(struct pci_dev *pdev)
+{
+       /* CPUID level 9 returns DCA configuration */
+       /* Bit 0 indicates DCA enabled by the BIOS */
+       unsigned long cpuid_level_9;
+       int res;
+
+       cpuid_level_9 = cpuid_eax(9);
+       res = test_bit(0, &cpuid_level_9);
+       if (!res)
+               dev_err(&pdev->dev, "DCA is disabled in BIOS\n");
+
+       return res;
+}
+
+static int system_has_dca_enabled(struct pci_dev *pdev)
+{
+       if (boot_cpu_has(X86_FEATURE_DCA))
+               return dca_enabled_in_bios(pdev);
+
+       dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
+       return 0;
+}
+
+struct ioat_dca_slot {
+       struct pci_dev *pdev;   /* requester device */
+       u16 rid;                /* requester id, as used by IOAT */
+};
+
+#define IOAT_DCA_MAX_REQ 6
+#define IOAT3_DCA_MAX_REQ 2
+
+struct ioat_dca_priv {
+       void __iomem            *iobase;
+       void __iomem            *dca_base;
+       int                      max_requesters;
+       int                      requester_count;
+       u8                       tag_map[IOAT_TAG_MAP_LEN];
+       struct ioat_dca_slot     req_slots[0];
+};
+
+/* 5000 series chipset DCA Port Requester ID Table Entry Format
+ * [15:8]      PCI-Express Bus Number
+ * [7:3]       PCI-Express Device Number
+ * [2:0]       PCI-Express Function Number
+ *
+ * 5000 series chipset DCA control register format
+ * [7:1]       Reserved (0)
+ * [0]         Ignore Function Number
+ */
+
+static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+       u16 id;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+       id = dcaid_from_pcidev(pdev);
+
+       if (ioatdca->requester_count == ioatdca->max_requesters)
+               return -ENODEV;
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == NULL) {
+                       /* found an empty slot */
+                       ioatdca->requester_count++;
+                       ioatdca->req_slots[i].pdev = pdev;
+                       ioatdca->req_slots[i].rid = id;
+                       writew(id, ioatdca->dca_base + (i * 4));
+                       /* make sure the ignore function bit is off */
+                       writeb(0, ioatdca->dca_base + (i * 4) + 2);
+                       return i;
+               }
+       }
+       /* Error, ioatdma->requester_count is out of whack */
+       return -EFAULT;
+}
+
+static int ioat_dca_remove_requester(struct dca_provider *dca,
+                                    struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == pdev) {
+                       writew(0, ioatdca->dca_base + (i * 4));
+                       ioatdca->req_slots[i].pdev = NULL;
+                       ioatdca->req_slots[i].rid = 0;
+                       ioatdca->requester_count--;
+                       return i;
+               }
+       }
+       return -ENODEV;
+}
+
+static u8 ioat_dca_get_tag(struct dca_provider *dca,
+                          struct device *dev,
+                          int cpu)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       int i, apic_id, bit, value;
+       u8 entry, tag;
+
+       tag = 0;
+       apic_id = cpu_physical_id(cpu);
+
+       for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+               entry = ioatdca->tag_map[i];
+               if (entry & DCA_TAG_MAP_VALID) {
+                       bit = entry & ~DCA_TAG_MAP_VALID;
+                       value = (apic_id & (1 << bit)) ? 1 : 0;
+               } else {
+                       value = entry ? 1 : 0;
+               }
+               tag |= (value << i);
+       }
+       return tag;
+}
+
+static int ioat_dca_dev_managed(struct dca_provider *dca,
+                               struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+
+       pdev = to_pci_dev(dev);
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == pdev)
+                       return 1;
+       }
+       return 0;
+}
+
+static struct dca_ops ioat_dca_ops = {
+       .add_requester          = ioat_dca_add_requester,
+       .remove_requester       = ioat_dca_remove_requester,
+       .get_tag                = ioat_dca_get_tag,
+       .dev_managed            = ioat_dca_dev_managed,
+};
+
+
+struct dca_provider * __devinit
+ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+       struct dca_provider *dca;
+       struct ioat_dca_priv *ioatdca;
+       u8 *tag_map = NULL;
+       int i;
+       int err;
+       u8 version;
+       u8 max_requesters;
+
+       if (!system_has_dca_enabled(pdev))
+               return NULL;
+
+       /* I/OAT v1 systems must have a known tag_map to support DCA */
+       switch (pdev->vendor) {
+       case PCI_VENDOR_ID_INTEL:
+               switch (pdev->device) {
+               case PCI_DEVICE_ID_INTEL_IOAT:
+                       tag_map = ioat_tag_map_BNB;
+                       break;
+               case PCI_DEVICE_ID_INTEL_IOAT_CNB:
+                       tag_map = ioat_tag_map_CNB;
+                       break;
+               case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
+                       tag_map = ioat_tag_map_SCNB;
+                       break;
+               }
+               break;
+       case PCI_VENDOR_ID_UNISYS:
+               switch (pdev->device) {
+               case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
+                       tag_map = ioat_tag_map_UNISYS;
+                       break;
+               }
+               break;
+       }
+       if (tag_map == NULL)
+               return NULL;
+
+       version = readb(iobase + IOAT_VER_OFFSET);
+       if (version == IOAT_VER_3_0)
+               max_requesters = IOAT3_DCA_MAX_REQ;
+       else
+               max_requesters = IOAT_DCA_MAX_REQ;
+
+       dca = alloc_dca_provider(&ioat_dca_ops,
+                       sizeof(*ioatdca) +
+                       (sizeof(struct ioat_dca_slot) * max_requesters));
+       if (!dca)
+               return NULL;
+
+       ioatdca = dca_priv(dca);
+       ioatdca->max_requesters = max_requesters;
+       ioatdca->dca_base = iobase + 0x54;
+
+       /* copy over the APIC ID to DCA tag mapping */
+       for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
+               ioatdca->tag_map[i] = tag_map[i];
+
+       err = register_dca_provider(dca, &pdev->dev);
+       if (err) {
+               free_dca_provider(dca);
+               return NULL;
+       }
+
+       return dca;
+}
+
+
+static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+       u16 id;
+       u16 global_req_table;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+       id = dcaid_from_pcidev(pdev);
+
+       if (ioatdca->requester_count == ioatdca->max_requesters)
+               return -ENODEV;
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == NULL) {
+                       /* found an empty slot */
+                       ioatdca->requester_count++;
+                       ioatdca->req_slots[i].pdev = pdev;
+                       ioatdca->req_slots[i].rid = id;
+                       global_req_table =
+                             readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+                       writel(id | IOAT_DCA_GREQID_VALID,
+                              ioatdca->iobase + global_req_table + (i * 4));
+                       return i;
+               }
+       }
+       /* Error, ioatdma->requester_count is out of whack */
+       return -EFAULT;
+}
+
+static int ioat2_dca_remove_requester(struct dca_provider *dca,
+                                     struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+       u16 global_req_table;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == pdev) {
+                       global_req_table =
+                             readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+                       writel(0, ioatdca->iobase + global_req_table + (i * 4));
+                       ioatdca->req_slots[i].pdev = NULL;
+                       ioatdca->req_slots[i].rid = 0;
+                       ioatdca->requester_count--;
+                       return i;
+               }
+       }
+       return -ENODEV;
+}
+
+static u8 ioat2_dca_get_tag(struct dca_provider *dca,
+                           struct device *dev,
+                           int cpu)
+{
+       u8 tag;
+
+       tag = ioat_dca_get_tag(dca, dev, cpu);
+       tag = (~tag) & 0x1F;
+       return tag;
+}
+
+static struct dca_ops ioat2_dca_ops = {
+       .add_requester          = ioat2_dca_add_requester,
+       .remove_requester       = ioat2_dca_remove_requester,
+       .get_tag                = ioat2_dca_get_tag,
+       .dev_managed            = ioat_dca_dev_managed,
+};
+
+static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
+{
+       int slots = 0;
+       u32 req;
+       u16 global_req_table;
+
+       global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
+       if (global_req_table == 0)
+               return 0;
+       do {
+               req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+               slots++;
+       } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+       return slots;
+}
+
+struct dca_provider * __devinit
+ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+       struct dca_provider *dca;
+       struct ioat_dca_priv *ioatdca;
+       int slots;
+       int i;
+       int err;
+       u32 tag_map;
+       u16 dca_offset;
+       u16 csi_fsb_control;
+       u16 pcie_control;
+       u8 bit;
+
+       if (!system_has_dca_enabled(pdev))
+               return NULL;
+
+       dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+       if (dca_offset == 0)
+               return NULL;
+
+       slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
+       if (slots == 0)
+               return NULL;
+
+       dca = alloc_dca_provider(&ioat2_dca_ops,
+                                sizeof(*ioatdca)
+                                     + (sizeof(struct ioat_dca_slot) * slots));
+       if (!dca)
+               return NULL;
+
+       ioatdca = dca_priv(dca);
+       ioatdca->iobase = iobase;
+       ioatdca->dca_base = iobase + dca_offset;
+       ioatdca->max_requesters = slots;
+
+       /* some bios might not know to turn these on */
+       csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+       if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
+               csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
+               writew(csi_fsb_control,
+                      ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+       }
+       pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+       if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
+               pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
+               writew(pcie_control,
+                      ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+       }
+
+
+       /* TODO version, compatibility and configuration checks */
+
+       /* copy out the APIC to DCA tag map */
+       tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
+       for (i = 0; i < 5; i++) {
+               bit = (tag_map >> (4 * i)) & 0x0f;
+               if (bit < 8)
+                       ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
+               else
+                       ioatdca->tag_map[i] = 0;
+       }
+
+       if (!dca2_tag_map_valid(ioatdca->tag_map)) {
+               dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, "
+                       "disabling DCA\n");
+               free_dca_provider(dca);
+               return NULL;
+       }
+
+       err = register_dca_provider(dca, &pdev->dev);
+       if (err) {
+               free_dca_provider(dca);
+               return NULL;
+       }
+
+       return dca;
+}
+
+static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+       u16 id;
+       u16 global_req_table;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+       id = dcaid_from_pcidev(pdev);
+
+       if (ioatdca->requester_count == ioatdca->max_requesters)
+               return -ENODEV;
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == NULL) {
+                       /* found an empty slot */
+                       ioatdca->requester_count++;
+                       ioatdca->req_slots[i].pdev = pdev;
+                       ioatdca->req_slots[i].rid = id;
+                       global_req_table =
+                             readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+                       writel(id | IOAT_DCA_GREQID_VALID,
+                              ioatdca->iobase + global_req_table + (i * 4));
+                       return i;
+               }
+       }
+       /* Error, ioatdma->requester_count is out of whack */
+       return -EFAULT;
+}
+
+static int ioat3_dca_remove_requester(struct dca_provider *dca,
+                                     struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+       u16 global_req_table;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == pdev) {
+                       global_req_table =
+                             readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+                       writel(0, ioatdca->iobase + global_req_table + (i * 4));
+                       ioatdca->req_slots[i].pdev = NULL;
+                       ioatdca->req_slots[i].rid = 0;
+                       ioatdca->requester_count--;
+                       return i;
+               }
+       }
+       return -ENODEV;
+}
+
+static u8 ioat3_dca_get_tag(struct dca_provider *dca,
+                           struct device *dev,
+                           int cpu)
+{
+       u8 tag;
+
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       int i, apic_id, bit, value;
+       u8 entry;
+
+       tag = 0;
+       apic_id = cpu_physical_id(cpu);
+
+       for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+               entry = ioatdca->tag_map[i];
+               if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
+                       bit = entry &
+                               ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
+                       value = (apic_id & (1 << bit)) ? 1 : 0;
+               } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
+                       bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
+                       value = (apic_id & (1 << bit)) ? 0 : 1;
+               } else {
+                       value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
+               }
+               tag |= (value << i);
+       }
+
+       return tag;
+}
+
+static struct dca_ops ioat3_dca_ops = {
+       .add_requester          = ioat3_dca_add_requester,
+       .remove_requester       = ioat3_dca_remove_requester,
+       .get_tag                = ioat3_dca_get_tag,
+       .dev_managed            = ioat_dca_dev_managed,
+};
+
+static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
+{
+       int slots = 0;
+       u32 req;
+       u16 global_req_table;
+
+       global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
+       if (global_req_table == 0)
+               return 0;
+
+       do {
+               req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+               slots++;
+       } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+       return slots;
+}
+
+struct dca_provider * __devinit
+ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+       struct dca_provider *dca;
+       struct ioat_dca_priv *ioatdca;
+       int slots;
+       int i;
+       int err;
+       u16 dca_offset;
+       u16 csi_fsb_control;
+       u16 pcie_control;
+       u8 bit;
+
+       union {
+               u64 full;
+               struct {
+                       u32 low;
+                       u32 high;
+               };
+       } tag_map;
+
+       if (!system_has_dca_enabled(pdev))
+               return NULL;
+
+       dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+       if (dca_offset == 0)
+               return NULL;
+
+       slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
+       if (slots == 0)
+               return NULL;
+
+       dca = alloc_dca_provider(&ioat3_dca_ops,
+                                sizeof(*ioatdca)
+                                     + (sizeof(struct ioat_dca_slot) * slots));
+       if (!dca)
+               return NULL;
+
+       ioatdca = dca_priv(dca);
+       ioatdca->iobase = iobase;
+       ioatdca->dca_base = iobase + dca_offset;
+       ioatdca->max_requesters = slots;
+
+       /* some bios might not know to turn these on */
+       csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+       if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
+               csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
+               writew(csi_fsb_control,
+                      ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+       }
+       pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+       if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
+               pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
+               writew(pcie_control,
+                      ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+       }
+
+
+       /* TODO version, compatibility and configuration checks */
+
+       /* copy out the APIC to DCA tag map */
+       tag_map.low =
+               readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
+       tag_map.high =
+               readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
+       for (i = 0; i < 8; i++) {
+               bit = tag_map.full >> (8 * i);
+               ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
+       }
+
+       err = register_dca_provider(dca, &pdev->dev);
+       if (err) {
+               free_dca_provider(dca);
+               return NULL;
+       }
+
+       return dca;
+}
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c

new file mode 100644 (file)

index 0000000..c524d36
--- /dev/null
+++ b/drivers/dma/ioat/dma.c
@@ -0,0 +1,1238 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/i7300_idle.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+int ioat_pending_level = 4;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+                "high-water mark for pushing ioat descriptors (default: 4)");
+
+/* internal functions */
+static void ioat1_cleanup(struct ioat_dma_chan *ioat);
+static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
+
+/**
+ * ioat_dma_do_interrupt - handler used for single vector interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+{
+       struct ioatdma_device *instance = data;
+       struct ioat_chan_common *chan;
+       unsigned long attnstatus;
+       int bit;
+       u8 intrctrl;
+
+       intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
+
+       if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+               return IRQ_NONE;
+
+       if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+               writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+               return IRQ_NONE;
+       }
+
+       attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+       for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
+               chan = ioat_chan_by_index(instance, bit);
+               tasklet_schedule(&chan->cleanup_task);
+       }
+
+       writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+       return IRQ_HANDLED;
+}
+
+/**
+ * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+{
+       struct ioat_chan_common *chan = data;
+
+       tasklet_schedule(&chan->cleanup_task);
+
+       return IRQ_HANDLED;
+}
+
+static void ioat1_cleanup_tasklet(unsigned long data);
+
+/* common channel initialization */
+void ioat_init_channel(struct ioatdma_device *device,
+                      struct ioat_chan_common *chan, int idx,
+                      void (*timer_fn)(unsigned long),
+                      void (*tasklet)(unsigned long),
+                      unsigned long ioat)
+{
+       struct dma_device *dma = &device->common;
+
+       chan->device = device;
+       chan->reg_base = device->reg_base + (0x80 * (idx + 1));
+       spin_lock_init(&chan->cleanup_lock);
+       chan->common.device = dma;
+       list_add_tail(&chan->common.device_node, &dma->channels);
+       device->idx[idx] = chan;
+       init_timer(&chan->timer);
+       chan->timer.function = timer_fn;
+       chan->timer.data = ioat;
+       tasklet_init(&chan->cleanup_task, tasklet, ioat);
+       tasklet_disable(&chan->cleanup_task);
+}
+
+static void ioat1_timer_event(unsigned long data);
+
+/**
+ * ioat1_dma_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+static int ioat1_enumerate_channels(struct ioatdma_device *device)
+{
+       u8 xfercap_scale;
+       u32 xfercap;
+       int i;
+       struct ioat_dma_chan *ioat;
+       struct device *dev = &device->pdev->dev;
+       struct dma_device *dma = &device->common;
+
+       INIT_LIST_HEAD(&dma->channels);
+       dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+       dma->chancnt &= 0x1f; /* bits [4:0] valid */
+       if (dma->chancnt > ARRAY_SIZE(device->idx)) {
+               dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+                        dma->chancnt, ARRAY_SIZE(device->idx));
+               dma->chancnt = ARRAY_SIZE(device->idx);
+       }
+       xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+       xfercap_scale &= 0x1f; /* bits [4:0] valid */
+       xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
+       dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
+
+#ifdef  CONFIG_I7300_IDLE_IOAT_CHANNEL
+       if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
+               dma->chancnt--;
+#endif
+       for (i = 0; i < dma->chancnt; i++) {
+               ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
+               if (!ioat)
+                       break;
+
+               ioat_init_channel(device, &ioat->base, i,
+                                 ioat1_timer_event,
+                                 ioat1_cleanup_tasklet,
+                                 (unsigned long) ioat);
+               ioat->xfercap = xfercap;
+               spin_lock_init(&ioat->desc_lock);
+               INIT_LIST_HEAD(&ioat->free_desc);
+               INIT_LIST_HEAD(&ioat->used_desc);
+       }
+       dma->chancnt = i;
+       return i;
+}
+
+/**
+ * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
+ *                                 descriptors to hw
+ * @chan: DMA channel handle
+ */
+static inline void
+__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
+{
+       void __iomem *reg_base = ioat->base.reg_base;
+
+       dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
+               __func__, ioat->pending);
+       ioat->pending = 0;
+       writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
+}
+
+static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(chan);
+
+       if (ioat->pending > 0) {
+               spin_lock_bh(&ioat->desc_lock);
+               __ioat1_dma_memcpy_issue_pending(ioat);
+               spin_unlock_bh(&ioat->desc_lock);
+       }
+}
+
+/**
+ * ioat1_reset_channel - restart a channel
+ * @ioat: IOAT DMA channel handle
+ */
+static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       void __iomem *reg_base = chan->reg_base;
+       u32 chansts, chanerr;
+
+       dev_warn(to_dev(chan), "reset\n");
+       chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
+       chansts = *chan->completion & IOAT_CHANSTS_STATUS;
+       if (chanerr) {
+               dev_err(to_dev(chan),
+                       "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
+                       chan_num(chan), chansts, chanerr);
+               writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
+       }
+
+       /*
+        * whack it upside the head with a reset
+        * and wait for things to settle out.
+        * force the pending count to a really big negative
+        * to make sure no one forces an issue_pending
+        * while we're waiting.
+        */
+
+       ioat->pending = INT_MIN;
+       writeb(IOAT_CHANCMD_RESET,
+              reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+       set_bit(IOAT_RESET_PENDING, &chan->state);
+       mod_timer(&chan->timer, jiffies + RESET_DELAY);
+}
+
+static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+       struct dma_chan *c = tx->chan;
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+       struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioat_desc_sw *first;
+       struct ioat_desc_sw *chain_tail;
+       dma_cookie_t cookie;
+
+       spin_lock_bh(&ioat->desc_lock);
+       /* cookie incr and addition to used_list must be atomic */
+       cookie = c->cookie;
+       cookie++;
+       if (cookie < 0)
+               cookie = 1;
+       c->cookie = cookie;
+       tx->cookie = cookie;
+       dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+
+       /* write address into NextDescriptor field of last desc in chain */
+       first = to_ioat_desc(desc->tx_list.next);
+       chain_tail = to_ioat_desc(ioat->used_desc.prev);
+       /* make descriptor updates globally visible before chaining */
+       wmb();
+       chain_tail->hw->next = first->txd.phys;
+       list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
+       dump_desc_dbg(ioat, chain_tail);
+       dump_desc_dbg(ioat, first);
+
+       if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
+               mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+       ioat->active += desc->hw->tx_cnt;
+       ioat->pending += desc->hw->tx_cnt;
+       if (ioat->pending >= ioat_pending_level)
+               __ioat1_dma_memcpy_issue_pending(ioat);
+       spin_unlock_bh(&ioat->desc_lock);
+
+       return cookie;
+}
+
+/**
+ * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
+ * @ioat: the channel supplying the memory pool for the descriptors
+ * @flags: allocation flags
+ */
+static struct ioat_desc_sw *
+ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
+{
+       struct ioat_dma_descriptor *desc;
+       struct ioat_desc_sw *desc_sw;
+       struct ioatdma_device *ioatdma_device;
+       dma_addr_t phys;
+
+       ioatdma_device = ioat->base.device;
+       desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
+       if (unlikely(!desc))
+               return NULL;
+
+       desc_sw = kzalloc(sizeof(*desc_sw), flags);
+       if (unlikely(!desc_sw)) {
+               pci_pool_free(ioatdma_device->dma_pool, desc, phys);
+               return NULL;
+       }
+
+       memset(desc, 0, sizeof(*desc));
+
+       INIT_LIST_HEAD(&desc_sw->tx_list);
+       dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
+       desc_sw->txd.tx_submit = ioat1_tx_submit;
+       desc_sw->hw = desc;
+       desc_sw->txd.phys = phys;
+       set_desc_id(desc_sw, -1);
+
+       return desc_sw;
+}
+
+static int ioat_initial_desc_count = 256;
+module_param(ioat_initial_desc_count, int, 0644);
+MODULE_PARM_DESC(ioat_initial_desc_count,
+                "ioat1: initial descriptors per channel (default: 256)");
+/**
+ * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
+ * @chan: the channel to be filled out
+ */
+static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioat_desc_sw *desc;
+       u32 chanerr;
+       int i;
+       LIST_HEAD(tmp_list);
+
+       /* have we already been set up? */
+       if (!list_empty(&ioat->free_desc))
+               return ioat->desccount;
+
+       /* Setup register to interrupt and write completion status on error */
+       writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+       chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+       if (chanerr) {
+               dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
+               writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+       }
+
+       /* Allocate descriptors */
+       for (i = 0; i < ioat_initial_desc_count; i++) {
+               desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
+               if (!desc) {
+                       dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
+                       break;
+               }
+               set_desc_id(desc, i);
+               list_add_tail(&desc->node, &tmp_list);
+       }
+       spin_lock_bh(&ioat->desc_lock);
+       ioat->desccount = i;
+       list_splice(&tmp_list, &ioat->free_desc);
+       spin_unlock_bh(&ioat->desc_lock);
+
+       /* allocate a completion writeback area */
+       /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+       chan->completion = pci_pool_alloc(chan->device->completion_pool,
+                                         GFP_KERNEL, &chan->completion_dma);
+       memset(chan->completion, 0, sizeof(*chan->completion));
+       writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
+              chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+       writel(((u64) chan->completion_dma) >> 32,
+              chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+       tasklet_enable(&chan->cleanup_task);
+       ioat1_dma_start_null_desc(ioat);  /* give chain to dma device */
+       dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+               __func__, ioat->desccount);
+       return ioat->desccount;
+}
+
+/**
+ * ioat1_dma_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+static void ioat1_dma_free_chan_resources(struct dma_chan *c)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioatdma_device *ioatdma_device = chan->device;
+       struct ioat_desc_sw *desc, *_desc;
+       int in_use_descs = 0;
+
+       /* Before freeing channel resources first check
+        * if they have been previously allocated for this channel.
+        */
+       if (ioat->desccount == 0)
+               return;
+
+       tasklet_disable(&chan->cleanup_task);
+       del_timer_sync(&chan->timer);
+       ioat1_cleanup(ioat);
+
+       /* Delay 100ms after reset to allow internal DMA logic to quiesce
+        * before removing DMA descriptor resources.
+        */
+       writeb(IOAT_CHANCMD_RESET,
+              chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+       mdelay(100);
+
+       spin_lock_bh(&ioat->desc_lock);
+       list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
+               dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
+                       __func__, desc_id(desc));
+               dump_desc_dbg(ioat, desc);
+               in_use_descs++;
+               list_del(&desc->node);
+               pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+                             desc->txd.phys);
+               kfree(desc);
+       }
+       list_for_each_entry_safe(desc, _desc,
+                                &ioat->free_desc, node) {
+               list_del(&desc->node);
+               pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+                             desc->txd.phys);
+               kfree(desc);
+       }
+       spin_unlock_bh(&ioat->desc_lock);
+
+       pci_pool_free(ioatdma_device->completion_pool,
+                     chan->completion,
+                     chan->completion_dma);
+
+       /* one is ok since we left it on there on purpose */
+       if (in_use_descs > 1)
+               dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
+                       in_use_descs - 1);
+
+       chan->last_completion = 0;
+       chan->completion_dma = 0;
+       ioat->pending = 0;
+       ioat->desccount = 0;
+}
+
+/**
+ * ioat1_dma_get_next_descriptor - return the next available descriptor
+ * @ioat: IOAT DMA channel handle
+ *
+ * Gets the next descriptor from the chain, and must be called with the
+ * channel's desc_lock held.  Allocates more descriptors if the channel
+ * has run out.
+ */
+static struct ioat_desc_sw *
+ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
+{
+       struct ioat_desc_sw *new;
+
+       if (!list_empty(&ioat->free_desc)) {
+               new = to_ioat_desc(ioat->free_desc.next);
+               list_del(&new->node);
+       } else {
+               /* try to get another desc */
+               new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
+               if (!new) {
+                       dev_err(to_dev(&ioat->base), "alloc failed\n");
+                       return NULL;
+               }
+       }
+       dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
+               __func__, desc_id(new));
+       prefetch(new->hw);
+       return new;
+}
+
+static struct dma_async_tx_descriptor *
+ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
+                     dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+       struct ioat_desc_sw *desc;
+       size_t copy;
+       LIST_HEAD(chain);
+       dma_addr_t src = dma_src;
+       dma_addr_t dest = dma_dest;
+       size_t total_len = len;
+       struct ioat_dma_descriptor *hw = NULL;
+       int tx_cnt = 0;
+
+       spin_lock_bh(&ioat->desc_lock);
+       desc = ioat1_dma_get_next_descriptor(ioat);
+       do {
+               if (!desc)
+                       break;
+
+               tx_cnt++;
+               copy = min_t(size_t, len, ioat->xfercap);
+
+               hw = desc->hw;
+               hw->size = copy;
+               hw->ctl = 0;
+               hw->src_addr = src;
+               hw->dst_addr = dest;
+
+               list_add_tail(&desc->node, &chain);
+
+               len -= copy;
+               dest += copy;
+               src += copy;
+               if (len) {
+                       struct ioat_desc_sw *next;
+
+                       async_tx_ack(&desc->txd);
+                       next = ioat1_dma_get_next_descriptor(ioat);
+                       hw->next = next ? next->txd.phys : 0;
+                       dump_desc_dbg(ioat, desc);
+                       desc = next;
+               } else
+                       hw->next = 0;
+       } while (len);
+
+       if (!desc) {
+               struct ioat_chan_common *chan = &ioat->base;
+
+               dev_err(to_dev(chan),
+                       "chan%d - get_next_desc failed\n", chan_num(chan));
+               list_splice(&chain, &ioat->free_desc);
+               spin_unlock_bh(&ioat->desc_lock);
+               return NULL;
+       }
+       spin_unlock_bh(&ioat->desc_lock);
+
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       list_splice(&chain, &desc->tx_list);
+       hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       hw->ctl_f.compl_write = 1;
+       hw->tx_cnt = tx_cnt;
+       dump_desc_dbg(ioat, desc);
+
+       return &desc->txd;
+}
+
+static void ioat1_cleanup_tasklet(unsigned long data)
+{
+       struct ioat_dma_chan *chan = (void *)data;
+
+       ioat1_cleanup(chan);
+       writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
+                   size_t len, struct ioat_dma_descriptor *hw)
+{
+       struct pci_dev *pdev = chan->device->pdev;
+       size_t offset = len - hw->size;
+
+       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+               ioat_unmap(pdev, hw->dst_addr - offset, len,
+                          PCI_DMA_FROMDEVICE, flags, 1);
+
+       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
+               ioat_unmap(pdev, hw->src_addr - offset, len,
+                          PCI_DMA_TODEVICE, flags, 0);
+}
+
+unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
+{
+       unsigned long phys_complete;
+       u64 completion;
+
+       completion = *chan->completion;
+       phys_complete = ioat_chansts_to_addr(completion);
+
+       dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
+               (unsigned long long) phys_complete);
+
+       if (is_ioat_halted(completion)) {
+               u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+               dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
+                       chanerr);
+
+               /* TODO do something to salvage the situation */
+       }
+
+       return phys_complete;
+}
+
+bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
+                          unsigned long *phys_complete)
+{
+       *phys_complete = ioat_get_current_completion(chan);
+       if (*phys_complete == chan->last_completion)
+               return false;
+       clear_bit(IOAT_COMPLETION_ACK, &chan->state);
+       mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+       return true;
+}
+
+static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       struct list_head *_desc, *n;
+       struct dma_async_tx_descriptor *tx;
+
+       dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
+                __func__, phys_complete);
+       list_for_each_safe(_desc, n, &ioat->used_desc) {
+               struct ioat_desc_sw *desc;
+
+               prefetch(n);
+               desc = list_entry(_desc, typeof(*desc), node);
+               tx = &desc->txd;
+               /*
+                * Incoming DMA requests may use multiple descriptors,
+                * due to exceeding xfercap, perhaps. If so, only the
+                * last one will have a cookie, and require unmapping.
+                */
+               dump_desc_dbg(ioat, desc);
+               if (tx->cookie) {
+                       chan->completed_cookie = tx->cookie;
+                       tx->cookie = 0;
+                       ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+                       ioat->active -= desc->hw->tx_cnt;
+                       if (tx->callback) {
+                               tx->callback(tx->callback_param);
+                               tx->callback = NULL;
+                       }
+               }
+
+               if (tx->phys != phys_complete) {
+                       /*
+                        * a completed entry, but not the last, so clean
+                        * up if the client is done with the descriptor
+                        */
+                       if (async_tx_test_ack(tx))
+                               list_move_tail(&desc->node, &ioat->free_desc);
+               } else {
+                       /*
+                        * last used desc. Do not remove, so we can
+                        * append from it.
+                        */
+
+                       /* if nothing else is pending, cancel the
+                        * completion timeout
+                        */
+                       if (n == &ioat->used_desc) {
+                               dev_dbg(to_dev(chan),
+                                       "%s cancel completion timeout\n",
+                                       __func__);
+                               clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+                       }
+
+                       /* TODO check status bits? */
+                       break;
+               }
+       }
+
+       chan->last_completion = phys_complete;
+}
+
+/**
+ * ioat1_cleanup - cleanup up finished descriptors
+ * @chan: ioat channel to be cleaned up
+ *
+ * To prevent lock contention we defer cleanup when the locks are
+ * contended with a terminal timeout that forces cleanup and catches
+ * completion notification errors.
+ */
+static void ioat1_cleanup(struct ioat_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       unsigned long phys_complete;
+
+       prefetch(chan->completion);
+
+       if (!spin_trylock_bh(&chan->cleanup_lock))
+               return;
+
+       if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+               spin_unlock_bh(&chan->cleanup_lock);
+               return;
+       }
+
+       if (!spin_trylock_bh(&ioat->desc_lock)) {
+               spin_unlock_bh(&chan->cleanup_lock);
+               return;
+       }
+
+       __cleanup(ioat, phys_complete);
+
+       spin_unlock_bh(&ioat->desc_lock);
+       spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat1_timer_event(unsigned long data)
+{
+       struct ioat_dma_chan *ioat = (void *) data;
+       struct ioat_chan_common *chan = &ioat->base;
+
+       dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state);
+
+       spin_lock_bh(&chan->cleanup_lock);
+       if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) {
+               struct ioat_desc_sw *desc;
+
+               spin_lock_bh(&ioat->desc_lock);
+
+               /* restart active descriptors */
+               desc = to_ioat_desc(ioat->used_desc.prev);
+               ioat_set_chainaddr(ioat, desc->txd.phys);
+               ioat_start(chan);
+
+               ioat->pending = 0;
+               set_bit(IOAT_COMPLETION_PENDING, &chan->state);
+               mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+               spin_unlock_bh(&ioat->desc_lock);
+       } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+               unsigned long phys_complete;
+
+               spin_lock_bh(&ioat->desc_lock);
+               /* if we haven't made progress and we have already
+                * acknowledged a pending completion once, then be more
+                * forceful with a restart
+                */
+               if (ioat_cleanup_preamble(chan, &phys_complete))
+                       __cleanup(ioat, phys_complete);
+               else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+                       ioat1_reset_channel(ioat);
+               else {
+                       u64 status = ioat_chansts(chan);
+
+                       /* manually update the last completion address */
+                       if (ioat_chansts_to_addr(status) != 0)
+                               *chan->completion = status;
+
+                       set_bit(IOAT_COMPLETION_ACK, &chan->state);
+                       mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+               }
+               spin_unlock_bh(&ioat->desc_lock);
+       }
+       spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static enum dma_status
+ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+                     dma_cookie_t *done, dma_cookie_t *used)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+       if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+               return DMA_SUCCESS;
+
+       ioat1_cleanup(ioat);
+
+       return ioat_is_complete(c, cookie, done, used);
+}
+
+static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioat_desc_sw *desc;
+       struct ioat_dma_descriptor *hw;
+
+       spin_lock_bh(&ioat->desc_lock);
+
+       desc = ioat1_dma_get_next_descriptor(ioat);
+
+       if (!desc) {
+               dev_err(to_dev(chan),
+                       "Unable to start null desc - get next desc failed\n");
+               spin_unlock_bh(&ioat->desc_lock);
+               return;
+       }
+
+       hw = desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = 1;
+       hw->ctl_f.compl_write = 1;
+       /* set size to non-zero value (channel returns error when size is 0) */
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       hw->src_addr = 0;
+       hw->dst_addr = 0;
+       async_tx_ack(&desc->txd);
+       hw->next = 0;
+       list_add_tail(&desc->node, &ioat->used_desc);
+       dump_desc_dbg(ioat, desc);
+
+       ioat_set_chainaddr(ioat, desc->txd.phys);
+       ioat_start(chan);
+       spin_unlock_bh(&ioat->desc_lock);
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static void __devinit ioat_dma_test_callback(void *dma_async_param)
+{
+       struct completion *cmp = dma_async_param;
+
+       complete(cmp);
+}
+
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @device: device to be tested
+ */
+int __devinit ioat_dma_self_test(struct ioatdma_device *device)
+{
+       int i;
+       u8 *src;
+       u8 *dest;
+       struct dma_device *dma = &device->common;
+       struct device *dev = &device->pdev->dev;
+       struct dma_chan *dma_chan;
+       struct dma_async_tx_descriptor *tx;
+       dma_addr_t dma_dest, dma_src;
+       dma_cookie_t cookie;
+       int err = 0;
+       struct completion cmp;
+       unsigned long tmo;
+       unsigned long flags;
+
+       src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+       if (!src)
+               return -ENOMEM;
+       dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+       if (!dest) {
+               kfree(src);
+               return -ENOMEM;
+       }
+
+       /* Fill in src buffer */
+       for (i = 0; i < IOAT_TEST_SIZE; i++)
+               src[i] = (u8)i;
+
+       /* Start copy, using first DMA channel */
+       dma_chan = container_of(dma->channels.next, struct dma_chan,
+                               device_node);
+       if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+               dev_err(dev, "selftest cannot allocate chan resource\n");
+               err = -ENODEV;
+               goto out;
+       }
+
+       dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+       dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+       flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE |
+               DMA_PREP_INTERRUPT;
+       tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
+                                                  IOAT_TEST_SIZE, flags);
+       if (!tx) {
+               dev_err(dev, "Self-test prep failed, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test setup failed, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (tmo == 0 ||
+           dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL)
+                                       != DMA_SUCCESS) {
+               dev_err(dev, "Self-test copy timed out, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+               dev_err(dev, "Self-test copy failed compare, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+free_resources:
+       dma->device_free_chan_resources(dma_chan);
+out:
+       kfree(src);
+       kfree(dest);
+       return err;
+}
+
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+                   sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+                "set ioat interrupt style: msix (default), "
+                "msix-single-vector, msi, intx)");
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @device: ioat device
+ */
+static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
+{
+       struct ioat_chan_common *chan;
+       struct pci_dev *pdev = device->pdev;
+       struct device *dev = &pdev->dev;
+       struct msix_entry *msix;
+       int i, j, msixcnt;
+       int err = -EINVAL;
+       u8 intrctrl = 0;
+
+       if (!strcmp(ioat_interrupt_style, "msix"))
+               goto msix;
+       if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
+               goto msix_single_vector;
+       if (!strcmp(ioat_interrupt_style, "msi"))
+               goto msi;
+       if (!strcmp(ioat_interrupt_style, "intx"))
+               goto intx;
+       dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
+       goto err_no_irq;
+
+msix:
+       /* The number of MSI-X vectors should equal the number of channels */
+       msixcnt = device->common.chancnt;
+       for (i = 0; i < msixcnt; i++)
+               device->msix_entries[i].entry = i;
+
+       err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
+       if (err < 0)
+               goto msi;
+       if (err > 0)
+               goto msix_single_vector;
+
+       for (i = 0; i < msixcnt; i++) {
+               msix = &device->msix_entries[i];
+               chan = ioat_chan_by_index(device, i);
+               err = devm_request_irq(dev, msix->vector,
+                                      ioat_dma_do_interrupt_msix, 0,
+                                      "ioat-msix", chan);
+               if (err) {
+                       for (j = 0; j < i; j++) {
+                               msix = &device->msix_entries[j];
+                               chan = ioat_chan_by_index(device, j);
+                               devm_free_irq(dev, msix->vector, chan);
+                       }
+                       goto msix_single_vector;
+               }
+       }
+       intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+       goto done;
+
+msix_single_vector:
+       msix = &device->msix_entries[0];
+       msix->entry = 0;
+       err = pci_enable_msix(pdev, device->msix_entries, 1);
+       if (err)
+               goto msi;
+
+       err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
+                              "ioat-msix", device);
+       if (err) {
+               pci_disable_msix(pdev);
+               goto msi;
+       }
+       goto done;
+
+msi:
+       err = pci_enable_msi(pdev);
+       if (err)
+               goto intx;
+
+       err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
+                              "ioat-msi", device);
+       if (err) {
+               pci_disable_msi(pdev);
+               goto intx;
+       }
+       goto done;
+
+intx:
+       err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
+                              IRQF_SHARED, "ioat-intx", device);
+       if (err)
+               goto err_no_irq;
+
+done:
+       if (device->intr_quirk)
+               device->intr_quirk(device);
+       intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+       writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
+       return 0;
+
+err_no_irq:
+       /* Disable all interrupt generation */
+       writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+       dev_err(dev, "no usable interrupts\n");
+       return err;
+}
+
+static void ioat_disable_interrupts(struct ioatdma_device *device)
+{
+       /* Disable all interrupt generation */
+       writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+}
+
+int __devinit ioat_probe(struct ioatdma_device *device)
+{
+       int err = -ENODEV;
+       struct dma_device *dma = &device->common;
+       struct pci_dev *pdev = device->pdev;
+       struct device *dev = &pdev->dev;
+
+       /* DMA coherent memory pool for DMA descriptor allocations */
+       device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
+                                          sizeof(struct ioat_dma_descriptor),
+                                          64, 0);
+       if (!device->dma_pool) {
+               err = -ENOMEM;
+               goto err_dma_pool;
+       }
+
+       device->completion_pool = pci_pool_create("completion_pool", pdev,
+                                                 sizeof(u64), SMP_CACHE_BYTES,
+                                                 SMP_CACHE_BYTES);
+
+       if (!device->completion_pool) {
+               err = -ENOMEM;
+               goto err_completion_pool;
+       }
+
+       device->enumerate_channels(device);
+
+       dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+       dma->dev = &pdev->dev;
+
+       if (!dma->chancnt) {
+               dev_err(dev, "zero channels detected\n");
+               goto err_setup_interrupts;
+       }
+
+       err = ioat_dma_setup_interrupts(device);
+       if (err)
+               goto err_setup_interrupts;
+
+       err = device->self_test(device);
+       if (err)
+               goto err_self_test;
+
+       return 0;
+
+err_self_test:
+       ioat_disable_interrupts(device);
+err_setup_interrupts:
+       pci_pool_destroy(device->completion_pool);
+err_completion_pool:
+       pci_pool_destroy(device->dma_pool);
+err_dma_pool:
+       return err;
+}
+
+int __devinit ioat_register(struct ioatdma_device *device)
+{
+       int err = dma_async_device_register(&device->common);
+
+       if (err) {
+               ioat_disable_interrupts(device);
+               pci_pool_destroy(device->completion_pool);
+               pci_pool_destroy(device->dma_pool);
+       }
+
+       return err;
+}
+
+/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
+static void ioat1_intr_quirk(struct ioatdma_device *device)
+{
+       struct pci_dev *pdev = device->pdev;
+       u32 dmactrl;
+
+       pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
+       if (pdev->msi_enabled)
+               dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
+       else
+               dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
+       pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+       return sprintf(page, "%d\n", ioat->desccount);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+       return sprintf(page, "%d\n", ioat->active);
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static ssize_t cap_show(struct dma_chan *c, char *page)
+{
+       struct dma_device *dma = c->device;
+
+       return sprintf(page, "copy%s%s%s%s%s%s\n",
+                      dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
+                      dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
+                      dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
+                      dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
+                      dma_has_cap(DMA_MEMSET, dma->cap_mask)  ? " fill" : "",
+                      dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
+
+}
+struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
+
+static ssize_t version_show(struct dma_chan *c, char *page)
+{
+       struct dma_device *dma = c->device;
+       struct ioatdma_device *device = to_ioatdma_device(dma);
+
+       return sprintf(page, "%d.%d\n",
+                      device->version >> 4, device->version & 0xf);
+}
+struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
+
+static struct attribute *ioat1_attrs[] = {
+       &ring_size_attr.attr,
+       &ring_active_attr.attr,
+       &ioat_cap_attr.attr,
+       &ioat_version_attr.attr,
+       NULL,
+};
+
+static ssize_t
+ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+       struct ioat_sysfs_entry *entry;
+       struct ioat_chan_common *chan;
+
+       entry = container_of(attr, struct ioat_sysfs_entry, attr);
+       chan = container_of(kobj, struct ioat_chan_common, kobj);
+
+       if (!entry->show)
+               return -EIO;
+       return entry->show(&chan->common, page);
+}
+
+struct sysfs_ops ioat_sysfs_ops = {
+       .show   = ioat_attr_show,
+};
+
+static struct kobj_type ioat1_ktype = {
+       .sysfs_ops = &ioat_sysfs_ops,
+       .default_attrs = ioat1_attrs,
+};
+
+void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
+{
+       struct dma_device *dma = &device->common;
+       struct dma_chan *c;
+
+       list_for_each_entry(c, &dma->channels, device_node) {
+               struct ioat_chan_common *chan = to_chan_common(c);
+               struct kobject *parent = &c->dev->device.kobj;
+               int err;
+
+               err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
+               if (err) {
+                       dev_warn(to_dev(chan),
+                                "sysfs init error (%d), continuing...\n", err);
+                       kobject_put(&chan->kobj);
+                       set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
+               }
+       }
+}
+
+void ioat_kobject_del(struct ioatdma_device *device)
+{
+       struct dma_device *dma = &device->common;
+       struct dma_chan *c;
+
+       list_for_each_entry(c, &dma->channels, device_node) {
+               struct ioat_chan_common *chan = to_chan_common(c);
+
+               if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
+                       kobject_del(&chan->kobj);
+                       kobject_put(&chan->kobj);
+               }
+       }
+}
+
+int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
+{
+       struct pci_dev *pdev = device->pdev;
+       struct dma_device *dma;
+       int err;
+
+       device->intr_quirk = ioat1_intr_quirk;
+       device->enumerate_channels = ioat1_enumerate_channels;
+       device->self_test = ioat_dma_self_test;
+       dma = &device->common;
+       dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
+       dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
+       dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
+       dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
+       dma->device_is_tx_complete = ioat1_dma_is_complete;
+
+       err = ioat_probe(device);
+       if (err)
+               return err;
+       ioat_set_tcp_copy_break(4096);
+       err = ioat_register(device);
+       if (err)
+               return err;
+       ioat_kobject_add(device, &ioat1_ktype);
+
+       if (dca)
+               device->dca = ioat_dca_init(pdev, device->reg_base);
+
+       return err;
+}
+
+void __devexit ioat_dma_remove(struct ioatdma_device *device)
+{
+       struct dma_device *dma = &device->common;
+
+       ioat_disable_interrupts(device);
+
+       ioat_kobject_del(device);
+
+       dma_async_device_unregister(dma);
+
+       pci_pool_destroy(device->dma_pool);
+       pci_pool_destroy(device->completion_pool);
+
+       INIT_LIST_HEAD(&dma->channels);
+}
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h

new file mode 100644 (file)

index 0000000..c14fdfe
--- /dev/null
+++ b/drivers/dma/ioat/dma.h
@@ -0,0 +1,337 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_H
+#define IOATDMA_H
+
+#include <linux/dmaengine.h>
+#include "hw.h"
+#include "registers.h"
+#include <linux/init.h>
+#include <linux/dmapool.h>
+#include <linux/cache.h>
+#include <linux/pci_ids.h>
+#include <net/tcp.h>
+
+#define IOAT_DMA_VERSION  "4.00"
+
+#define IOAT_LOW_COMPLETION_MASK       0xffffffc0
+#define IOAT_DMA_DCA_ANY_CPU           ~0
+
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
+#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
+#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd)
+#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev)
+
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+/**
+ * struct ioatdma_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+ * @reg_base: MMIO register space base address
+ * @dma_pool: for allocating DMA descriptors
+ * @common: embedded struct dma_device
+ * @version: version of ioatdma device
+ * @msix_entries: irq handlers
+ * @idx: per channel data
+ * @dca: direct cache access context
+ * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
+ * @enumerate_channels: hw version specific channel enumeration
+ * @cleanup_tasklet: select between the v2 and v3 cleanup routines
+ * @timer_fn: select between the v2 and v3 timer watchdog routines
+ * @self_test: hardware version specific self test for each supported op type
+ *
+ * Note: the v3 cleanup routine supports raid operations
+ */
+struct ioatdma_device {
+       struct pci_dev *pdev;
+       void __iomem *reg_base;
+       struct pci_pool *dma_pool;
+       struct pci_pool *completion_pool;
+       struct dma_device common;
+       u8 version;
+       struct msix_entry msix_entries[4];
+       struct ioat_chan_common *idx[4];
+       struct dca_provider *dca;
+       void (*intr_quirk)(struct ioatdma_device *device);
+       int (*enumerate_channels)(struct ioatdma_device *device);
+       void (*cleanup_tasklet)(unsigned long data);
+       void (*timer_fn)(unsigned long data);
+       int (*self_test)(struct ioatdma_device *device);
+};
+
+struct ioat_chan_common {
+       struct dma_chan common;
+       void __iomem *reg_base;
+       unsigned long last_completion;
+       spinlock_t cleanup_lock;
+       dma_cookie_t completed_cookie;
+       unsigned long state;
+       #define IOAT_COMPLETION_PENDING 0
+       #define IOAT_COMPLETION_ACK 1
+       #define IOAT_RESET_PENDING 2
+       #define IOAT_KOBJ_INIT_FAIL 3
+       struct timer_list timer;
+       #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
+       #define IDLE_TIMEOUT msecs_to_jiffies(2000)
+       #define RESET_DELAY msecs_to_jiffies(100)
+       struct ioatdma_device *device;
+       dma_addr_t completion_dma;
+       u64 *completion;
+       struct tasklet_struct cleanup_task;
+       struct kobject kobj;
+};
+
+struct ioat_sysfs_entry {
+       struct attribute attr;
+       ssize_t (*show)(struct dma_chan *, char *);
+};
+
+/**
+ * struct ioat_dma_chan - internal representation of a DMA channel
+ */
+struct ioat_dma_chan {
+       struct ioat_chan_common base;
+
+       size_t xfercap; /* XFERCAP register value expanded out */
+
+       spinlock_t desc_lock;
+       struct list_head free_desc;
+       struct list_head used_desc;
+
+       int pending;
+       u16 desccount;
+       u16 active;
+};
+
+static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
+{
+       return container_of(c, struct ioat_chan_common, common);
+}
+
+static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
+{
+       struct ioat_chan_common *chan = to_chan_common(c);
+
+       return container_of(chan, struct ioat_dma_chan, base);
+}
+
+/**
+ * ioat_is_complete - poll the status of an ioat transaction
+ * @c: channel handle
+ * @cookie: transaction identifier
+ * @done: if set, updated with last completed transaction
+ * @used: if set, updated with last used transaction
+ */
+static inline enum dma_status
+ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+                dma_cookie_t *done, dma_cookie_t *used)
+{
+       struct ioat_chan_common *chan = to_chan_common(c);
+       dma_cookie_t last_used;
+       dma_cookie_t last_complete;
+
+       last_used = c->cookie;
+       last_complete = chan->completed_cookie;
+
+       if (done)
+               *done = last_complete;
+       if (used)
+               *used = last_used;
+
+       return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+/* wrapper around hardware descriptor format + additional software fields */
+
+/**
+ * struct ioat_desc_sw - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @node: this descriptor will either be on the free list,
+ *     or attached to a transaction list (tx_list)
+ * @txd: the generic software descriptor for all engines
+ * @id: identifier for debug
+ */
+struct ioat_desc_sw {
+       struct ioat_dma_descriptor *hw;
+       struct list_head node;
+       size_t len;
+       struct list_head tx_list;
+       struct dma_async_tx_descriptor txd;
+       #ifdef DEBUG
+       int id;
+       #endif
+};
+
+#ifdef DEBUG
+#define set_desc_id(desc, i) ((desc)->id = (i))
+#define desc_id(desc) ((desc)->id)
+#else
+#define set_desc_id(desc, i)
+#define desc_id(desc) (0)
+#endif
+
+static inline void
+__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
+               struct dma_async_tx_descriptor *tx, int id)
+{
+       struct device *dev = to_dev(chan);
+
+       dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
+               " ctl: %#x (op: %d int_en: %d compl: %d)\n", id,
+               (unsigned long long) tx->phys,
+               (unsigned long long) hw->next, tx->cookie, tx->flags,
+               hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
+}
+
+#define dump_desc_dbg(c, d) \
+       ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
+
+static inline void ioat_set_tcp_copy_break(unsigned long copybreak)
+{
+       #ifdef CONFIG_NET_DMA
+       sysctl_tcp_dma_copybreak = copybreak;
+       #endif
+}
+
+static inline struct ioat_chan_common *
+ioat_chan_by_index(struct ioatdma_device *device, int index)
+{
+       return device->idx[index];
+}
+
+static inline u64 ioat_chansts(struct ioat_chan_common *chan)
+{
+       u8 ver = chan->device->version;
+       u64 status;
+       u32 status_lo;
+
+       /* We need to read the low address first as this causes the
+        * chipset to latch the upper bits for the subsequent read
+        */
+       status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
+       status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
+       status <<= 32;
+       status |= status_lo;
+
+       return status;
+}
+
+static inline void ioat_start(struct ioat_chan_common *chan)
+{
+       u8 ver = chan->device->version;
+
+       writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline u64 ioat_chansts_to_addr(u64 status)
+{
+       return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+}
+
+static inline u32 ioat_chanerr(struct ioat_chan_common *chan)
+{
+       return readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+}
+
+static inline void ioat_suspend(struct ioat_chan_common *chan)
+{
+       u8 ver = chan->device->version;
+
+       writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+
+       writel(addr & 0x00000000FFFFFFFF,
+              chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
+       writel(addr >> 32,
+              chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
+}
+
+static inline bool is_ioat_active(unsigned long status)
+{
+       return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+static inline bool is_ioat_idle(unsigned long status)
+{
+       return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
+}
+
+static inline bool is_ioat_halted(unsigned long status)
+{
+       return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
+}
+
+static inline bool is_ioat_suspended(unsigned long status)
+{
+       return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
+}
+
+/* channel was fatally programmed */
+static inline bool is_ioat_bug(unsigned long err)
+{
+       return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR|
+                        IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR|
+                        IOAT_CHANERR_LENGTH_ERR));
+}
+
+static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
+                             int direction, enum dma_ctrl_flags flags, bool dst)
+{
+       if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
+           (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
+               pci_unmap_single(pdev, addr, len, direction);
+       else
+               pci_unmap_page(pdev, addr, len, direction);
+}
+
+int __devinit ioat_probe(struct ioatdma_device *device);
+int __devinit ioat_register(struct ioatdma_device *device);
+int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
+int __devinit ioat_dma_self_test(struct ioatdma_device *device);
+void __devexit ioat_dma_remove(struct ioatdma_device *device);
+struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
+                                             void __iomem *iobase);
+unsigned long ioat_get_current_completion(struct ioat_chan_common *chan);
+void ioat_init_channel(struct ioatdma_device *device,
+                      struct ioat_chan_common *chan, int idx,
+                      void (*timer_fn)(unsigned long),
+                      void (*tasklet)(unsigned long),
+                      unsigned long ioat);
+void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
+                   size_t len, struct ioat_dma_descriptor *hw);
+bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
+                          unsigned long *phys_complete);
+void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
+void ioat_kobject_del(struct ioatdma_device *device);
+extern struct sysfs_ops ioat_sysfs_ops;
+extern struct ioat_sysfs_entry ioat_version_attr;
+extern struct ioat_sysfs_entry ioat_cap_attr;
+#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c

new file mode 100644 (file)

index 0000000..96ffab7
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.c
@@ -0,0 +1,871 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine (versions >= 2), which
+ * does asynchronous data movement and checksumming operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/i7300_idle.h>
+#include "dma.h"
+#include "dma_v2.h"
+#include "registers.h"
+#include "hw.h"
+
+int ioat_ring_alloc_order = 8;
+module_param(ioat_ring_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_alloc_order,
+                "ioat2+: allocate 2^n descriptors per channel"
+                " (default: 8 max: 16)");
+static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
+module_param(ioat_ring_max_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_max_alloc_order,
+                "ioat2+: upper limit for ring size (default: 16)");
+
+void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
+{
+       void * __iomem reg_base = ioat->base.reg_base;
+
+       ioat->pending = 0;
+       ioat->dmacount += ioat2_ring_pending(ioat);
+       ioat->issued = ioat->head;
+       /* make descriptor updates globally visible before notifying channel */
+       wmb();
+       writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+       dev_dbg(to_dev(&ioat->base),
+               "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+               __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
+}
+
+void ioat2_issue_pending(struct dma_chan *chan)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
+
+       spin_lock_bh(&ioat->ring_lock);
+       if (ioat->pending == 1)
+               __ioat2_issue_pending(ioat);
+       spin_unlock_bh(&ioat->ring_lock);
+}
+
+/**
+ * ioat2_update_pending - log pending descriptors
+ * @ioat: ioat2+ channel
+ *
+ * set pending to '1' unless pending is already set to '2', pending == 2
+ * indicates that submission is temporarily blocked due to an in-flight
+ * reset.  If we are already above the ioat_pending_level threshold then
+ * just issue pending.
+ *
+ * called with ring_lock held
+ */
+static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
+{
+       if (unlikely(ioat->pending == 2))
+               return;
+       else if (ioat2_ring_pending(ioat) > ioat_pending_level)
+               __ioat2_issue_pending(ioat);
+       else
+               ioat->pending = 1;
+}
+
+static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
+{
+       struct ioat_ring_ent *desc;
+       struct ioat_dma_descriptor *hw;
+       int idx;
+
+       if (ioat2_ring_space(ioat) < 1) {
+               dev_err(to_dev(&ioat->base),
+                       "Unable to start null desc - ring full\n");
+               return;
+       }
+
+       dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
+               __func__, ioat->head, ioat->tail, ioat->issued);
+       idx = ioat2_desc_alloc(ioat, 1);
+       desc = ioat2_get_ring_ent(ioat, idx);
+
+       hw = desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = 1;
+       hw->ctl_f.compl_write = 1;
+       /* set size to non-zero value (channel returns error when size is 0) */
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       hw->src_addr = 0;
+       hw->dst_addr = 0;
+       async_tx_ack(&desc->txd);
+       ioat2_set_chainaddr(ioat, desc->txd.phys);
+       dump_desc_dbg(ioat, desc);
+       __ioat2_issue_pending(ioat);
+}
+
+static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
+{
+       spin_lock_bh(&ioat->ring_lock);
+       __ioat2_start_null_desc(ioat);
+       spin_unlock_bh(&ioat->ring_lock);
+}
+
+static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       struct dma_async_tx_descriptor *tx;
+       struct ioat_ring_ent *desc;
+       bool seen_current = false;
+       u16 active;
+       int i;
+
+       dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
+               __func__, ioat->head, ioat->tail, ioat->issued);
+
+       active = ioat2_ring_active(ioat);
+       for (i = 0; i < active && !seen_current; i++) {
+               prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
+               desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+               tx = &desc->txd;
+               dump_desc_dbg(ioat, desc);
+               if (tx->cookie) {
+                       ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+                       chan->completed_cookie = tx->cookie;
+                       tx->cookie = 0;
+                       if (tx->callback) {
+                               tx->callback(tx->callback_param);
+                               tx->callback = NULL;
+                       }
+               }
+
+               if (tx->phys == phys_complete)
+                       seen_current = true;
+       }
+       ioat->tail += i;
+       BUG_ON(!seen_current); /* no active descs have written a completion? */
+
+       chan->last_completion = phys_complete;
+       if (ioat->head == ioat->tail) {
+               dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
+                       __func__);
+               clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+               mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+       }
+}
+
+/**
+ * ioat2_cleanup - clean finished descriptors (advance tail pointer)
+ * @chan: ioat channel to be cleaned up
+ */
+static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       unsigned long phys_complete;
+
+       prefetch(chan->completion);
+
+       if (!spin_trylock_bh(&chan->cleanup_lock))
+               return;
+
+       if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+               spin_unlock_bh(&chan->cleanup_lock);
+               return;
+       }
+
+       if (!spin_trylock_bh(&ioat->ring_lock)) {
+               spin_unlock_bh(&chan->cleanup_lock);
+               return;
+       }
+
+       __cleanup(ioat, phys_complete);
+
+       spin_unlock_bh(&ioat->ring_lock);
+       spin_unlock_bh(&chan->cleanup_lock);
+}
+
+void ioat2_cleanup_tasklet(unsigned long data)
+{
+       struct ioat2_dma_chan *ioat = (void *) data;
+
+       ioat2_cleanup(ioat);
+       writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+
+       /* set the tail to be re-issued */
+       ioat->issued = ioat->tail;
+       ioat->dmacount = 0;
+       set_bit(IOAT_COMPLETION_PENDING, &chan->state);
+       mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+       dev_dbg(to_dev(chan),
+               "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+               __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
+
+       if (ioat2_ring_pending(ioat)) {
+               struct ioat_ring_ent *desc;
+
+               desc = ioat2_get_ring_ent(ioat, ioat->tail);
+               ioat2_set_chainaddr(ioat, desc->txd.phys);
+               __ioat2_issue_pending(ioat);
+       } else
+               __ioat2_start_null_desc(ioat);
+}
+
+static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       unsigned long phys_complete;
+       u32 status;
+
+       status = ioat_chansts(chan);
+       if (is_ioat_active(status) || is_ioat_idle(status))
+               ioat_suspend(chan);
+       while (is_ioat_active(status) || is_ioat_idle(status)) {
+               status = ioat_chansts(chan);
+               cpu_relax();
+       }
+
+       if (ioat_cleanup_preamble(chan, &phys_complete))
+               __cleanup(ioat, phys_complete);
+
+       __ioat2_restart_chan(ioat);
+}
+
+void ioat2_timer_event(unsigned long data)
+{
+       struct ioat2_dma_chan *ioat = (void *) data;
+       struct ioat_chan_common *chan = &ioat->base;
+
+       spin_lock_bh(&chan->cleanup_lock);
+       if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+               unsigned long phys_complete;
+               u64 status;
+
+               spin_lock_bh(&ioat->ring_lock);
+               status = ioat_chansts(chan);
+
+               /* when halted due to errors check for channel
+                * programming errors before advancing the completion state
+                */
+               if (is_ioat_halted(status)) {
+                       u32 chanerr;
+
+                       chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+                       BUG_ON(is_ioat_bug(chanerr));
+               }
+
+               /* if we haven't made progress and we have already
+                * acknowledged a pending completion once, then be more
+                * forceful with a restart
+                */
+               if (ioat_cleanup_preamble(chan, &phys_complete))
+                       __cleanup(ioat, phys_complete);
+               else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+                       ioat2_restart_channel(ioat);
+               else {
+                       set_bit(IOAT_COMPLETION_ACK, &chan->state);
+                       mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+               }
+               spin_unlock_bh(&ioat->ring_lock);
+       } else {
+               u16 active;
+
+               /* if the ring is idle, empty, and oversized try to step
+                * down the size
+                */
+               spin_lock_bh(&ioat->ring_lock);
+               active = ioat2_ring_active(ioat);
+               if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
+                       reshape_ring(ioat, ioat->alloc_order-1);
+               spin_unlock_bh(&ioat->ring_lock);
+
+               /* keep shrinking until we get back to our minimum
+                * default size
+                */
+               if (ioat->alloc_order > ioat_get_alloc_order())
+                       mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+       }
+       spin_unlock_bh(&chan->cleanup_lock);
+}
+
+/**
+ * ioat2_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+int ioat2_enumerate_channels(struct ioatdma_device *device)
+{
+       struct ioat2_dma_chan *ioat;
+       struct device *dev = &device->pdev->dev;
+       struct dma_device *dma = &device->common;
+       u8 xfercap_log;
+       int i;
+
+       INIT_LIST_HEAD(&dma->channels);
+       dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+       dma->chancnt &= 0x1f; /* bits [4:0] valid */
+       if (dma->chancnt > ARRAY_SIZE(device->idx)) {
+               dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+                        dma->chancnt, ARRAY_SIZE(device->idx));
+               dma->chancnt = ARRAY_SIZE(device->idx);
+       }
+       xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+       xfercap_log &= 0x1f; /* bits [4:0] valid */
+       if (xfercap_log == 0)
+               return 0;
+       dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
+
+       /* FIXME which i/oat version is i7300? */
+#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
+       if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
+               dma->chancnt--;
+#endif
+       for (i = 0; i < dma->chancnt; i++) {
+               ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
+               if (!ioat)
+                       break;
+
+               ioat_init_channel(device, &ioat->base, i,
+                                 device->timer_fn,
+                                 device->cleanup_tasklet,
+                                 (unsigned long) ioat);
+               ioat->xfercap_log = xfercap_log;
+               spin_lock_init(&ioat->ring_lock);
+       }
+       dma->chancnt = i;
+       return i;
+}
+
+static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
+{
+       struct dma_chan *c = tx->chan;
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_chan_common *chan = &ioat->base;
+       dma_cookie_t cookie = c->cookie;
+
+       cookie++;
+       if (cookie < 0)
+               cookie = 1;
+       tx->cookie = cookie;
+       c->cookie = cookie;
+       dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+
+       if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
+               mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+       ioat2_update_pending(ioat);
+       spin_unlock_bh(&ioat->ring_lock);
+
+       return cookie;
+}
+
+static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
+{
+       struct ioat_dma_descriptor *hw;
+       struct ioat_ring_ent *desc;
+       struct ioatdma_device *dma;
+       dma_addr_t phys;
+
+       dma = to_ioatdma_device(chan->device);
+       hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
+       if (!hw)
+               return NULL;
+       memset(hw, 0, sizeof(*hw));
+
+       desc = kmem_cache_alloc(ioat2_cache, flags);
+       if (!desc) {
+               pci_pool_free(dma->dma_pool, hw, phys);
+               return NULL;
+       }
+       memset(desc, 0, sizeof(*desc));
+
+       dma_async_tx_descriptor_init(&desc->txd, chan);
+       desc->txd.tx_submit = ioat2_tx_submit_unlock;
+       desc->hw = hw;
+       desc->txd.phys = phys;
+       return desc;
+}
+
+static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
+{
+       struct ioatdma_device *dma;
+
+       dma = to_ioatdma_device(chan->device);
+       pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
+       kmem_cache_free(ioat2_cache, desc);
+}
+
+static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
+{
+       struct ioat_ring_ent **ring;
+       int descs = 1 << order;
+       int i;
+
+       if (order > ioat_get_max_alloc_order())
+               return NULL;
+
+       /* allocate the array to hold the software ring */
+       ring = kcalloc(descs, sizeof(*ring), flags);
+       if (!ring)
+               return NULL;
+       for (i = 0; i < descs; i++) {
+               ring[i] = ioat2_alloc_ring_ent(c, flags);
+               if (!ring[i]) {
+                       while (i--)
+                               ioat2_free_ring_ent(ring[i], c);
+                       kfree(ring);
+                       return NULL;
+               }
+               set_desc_id(ring[i], i);
+       }
+
+       /* link descs */
+       for (i = 0; i < descs-1; i++) {
+               struct ioat_ring_ent *next = ring[i+1];
+               struct ioat_dma_descriptor *hw = ring[i]->hw;
+
+               hw->next = next->txd.phys;
+       }
+       ring[i]->hw->next = ring[0]->txd.phys;
+
+       return ring;
+}
+
+/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
+ * @chan: channel to be initialized
+ */
+int ioat2_alloc_chan_resources(struct dma_chan *c)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioat_ring_ent **ring;
+       u32 chanerr;
+       int order;
+
+       /* have we already been set up? */
+       if (ioat->ring)
+               return 1 << ioat->alloc_order;
+
+       /* Setup register to interrupt and write completion status on error */
+       writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+       chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+       if (chanerr) {
+               dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
+               writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+       }
+
+       /* allocate a completion writeback area */
+       /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+       chan->completion = pci_pool_alloc(chan->device->completion_pool,
+                                         GFP_KERNEL, &chan->completion_dma);
+       if (!chan->completion)
+               return -ENOMEM;
+
+       memset(chan->completion, 0, sizeof(*chan->completion));
+       writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
+              chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+       writel(((u64) chan->completion_dma) >> 32,
+              chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+       order = ioat_get_alloc_order();
+       ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
+       if (!ring)
+               return -ENOMEM;
+
+       spin_lock_bh(&ioat->ring_lock);
+       ioat->ring = ring;
+       ioat->head = 0;
+       ioat->issued = 0;
+       ioat->tail = 0;
+       ioat->pending = 0;
+       ioat->alloc_order = order;
+       spin_unlock_bh(&ioat->ring_lock);
+
+       tasklet_enable(&chan->cleanup_task);
+       ioat2_start_null_desc(ioat);
+
+       return 1 << ioat->alloc_order;
+}
+
+bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
+{
+       /* reshape differs from normal ring allocation in that we want
+        * to allocate a new software ring while only
+        * extending/truncating the hardware ring
+        */
+       struct ioat_chan_common *chan = &ioat->base;
+       struct dma_chan *c = &chan->common;
+       const u16 curr_size = ioat2_ring_mask(ioat) + 1;
+       const u16 active = ioat2_ring_active(ioat);
+       const u16 new_size = 1 << order;
+       struct ioat_ring_ent **ring;
+       u16 i;
+
+       if (order > ioat_get_max_alloc_order())
+               return false;
+
+       /* double check that we have at least 1 free descriptor */
+       if (active == curr_size)
+               return false;
+
+       /* when shrinking, verify that we can hold the current active
+        * set in the new ring
+        */
+       if (active >= new_size)
+               return false;
+
+       /* allocate the array to hold the software ring */
+       ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
+       if (!ring)
+               return false;
+
+       /* allocate/trim descriptors as needed */
+       if (new_size > curr_size) {
+               /* copy current descriptors to the new ring */
+               for (i = 0; i < curr_size; i++) {
+                       u16 curr_idx = (ioat->tail+i) & (curr_size-1);
+                       u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+                       ring[new_idx] = ioat->ring[curr_idx];
+                       set_desc_id(ring[new_idx], new_idx);
+               }
+
+               /* add new descriptors to the ring */
+               for (i = curr_size; i < new_size; i++) {
+                       u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+                       ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
+                       if (!ring[new_idx]) {
+                               while (i--) {
+                                       u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+                                       ioat2_free_ring_ent(ring[new_idx], c);
+                               }
+                               kfree(ring);
+                               return false;
+                       }
+                       set_desc_id(ring[new_idx], new_idx);
+               }
+
+               /* hw link new descriptors */
+               for (i = curr_size-1; i < new_size; i++) {
+                       u16 new_idx = (ioat->tail+i) & (new_size-1);
+                       struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
+                       struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
+
+                       hw->next = next->txd.phys;
+               }
+       } else {
+               struct ioat_dma_descriptor *hw;
+               struct ioat_ring_ent *next;
+
+               /* copy current descriptors to the new ring, dropping the
+                * removed descriptors
+                */
+               for (i = 0; i < new_size; i++) {
+                       u16 curr_idx = (ioat->tail+i) & (curr_size-1);
+                       u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+                       ring[new_idx] = ioat->ring[curr_idx];
+                       set_desc_id(ring[new_idx], new_idx);
+               }
+
+               /* free deleted descriptors */
+               for (i = new_size; i < curr_size; i++) {
+                       struct ioat_ring_ent *ent;
+
+                       ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
+                       ioat2_free_ring_ent(ent, c);
+               }
+
+               /* fix up hardware ring */
+               hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
+               next = ring[(ioat->tail+new_size) & (new_size-1)];
+               hw->next = next->txd.phys;
+       }
+
+       dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+               __func__, new_size);
+
+       kfree(ioat->ring);
+       ioat->ring = ring;
+       ioat->alloc_order = order;
+
+       return true;
+}
+
+/**
+ * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops
+ * @idx: gets starting descriptor index on successful allocation
+ * @ioat: ioat2,3 channel (ring) to operate on
+ * @num_descs: allocation length
+ */
+int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+
+       spin_lock_bh(&ioat->ring_lock);
+       /* never allow the last descriptor to be consumed, we need at
+        * least one free at all times to allow for on-the-fly ring
+        * resizing.
+        */
+       while (unlikely(ioat2_ring_space(ioat) <= num_descs)) {
+               if (reshape_ring(ioat, ioat->alloc_order + 1) &&
+                   ioat2_ring_space(ioat) > num_descs)
+                               break;
+
+               if (printk_ratelimit())
+                       dev_dbg(to_dev(chan),
+                               "%s: ring full! num_descs: %d (%x:%x:%x)\n",
+                               __func__, num_descs, ioat->head, ioat->tail,
+                               ioat->issued);
+               spin_unlock_bh(&ioat->ring_lock);
+
+               /* progress reclaim in the allocation failure case we
+                * may be called under bh_disabled so we need to trigger
+                * the timer event directly
+                */
+               spin_lock_bh(&chan->cleanup_lock);
+               if (jiffies > chan->timer.expires &&
+                   timer_pending(&chan->timer)) {
+                       struct ioatdma_device *device = chan->device;
+
+                       mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+                       spin_unlock_bh(&chan->cleanup_lock);
+                       device->timer_fn((unsigned long) ioat);
+               } else
+                       spin_unlock_bh(&chan->cleanup_lock);
+               return -ENOMEM;
+       }
+
+       dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
+               __func__, num_descs, ioat->head, ioat->tail, ioat->issued);
+
+       *idx = ioat2_desc_alloc(ioat, num_descs);
+       return 0;  /* with ioat->ring_lock held */
+}
+
+struct dma_async_tx_descriptor *
+ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+                          dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_dma_descriptor *hw;
+       struct ioat_ring_ent *desc;
+       dma_addr_t dst = dma_dest;
+       dma_addr_t src = dma_src;
+       size_t total_len = len;
+       int num_descs;
+       u16 idx;
+       int i;
+
+       num_descs = ioat2_xferlen_to_descs(ioat, len);
+       if (likely(num_descs) &&
+           ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
+               /* pass */;
+       else
+               return NULL;
+       i = 0;
+       do {
+               size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+               desc = ioat2_get_ring_ent(ioat, idx + i);
+               hw = desc->hw;
+
+               hw->size = copy;
+               hw->ctl = 0;
+               hw->src_addr = src;
+               hw->dst_addr = dst;
+
+               len -= copy;
+               dst += copy;
+               src += copy;
+               dump_desc_dbg(ioat, desc);
+       } while (++i < num_descs);
+
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+       hw->ctl_f.compl_write = 1;
+       dump_desc_dbg(ioat, desc);
+       /* we leave the channel locked to ensure in order submission */
+
+       return &desc->txd;
+}
+
+/**
+ * ioat2_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+void ioat2_free_chan_resources(struct dma_chan *c)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioatdma_device *device = chan->device;
+       struct ioat_ring_ent *desc;
+       const u16 total_descs = 1 << ioat->alloc_order;
+       int descs;
+       int i;
+
+       /* Before freeing channel resources first check
+        * if they have been previously allocated for this channel.
+        */
+       if (!ioat->ring)
+               return;
+
+       tasklet_disable(&chan->cleanup_task);
+       del_timer_sync(&chan->timer);
+       device->cleanup_tasklet((unsigned long) ioat);
+
+       /* Delay 100ms after reset to allow internal DMA logic to quiesce
+        * before removing DMA descriptor resources.
+        */
+       writeb(IOAT_CHANCMD_RESET,
+              chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+       mdelay(100);
+
+       spin_lock_bh(&ioat->ring_lock);
+       descs = ioat2_ring_space(ioat);
+       dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
+       for (i = 0; i < descs; i++) {
+               desc = ioat2_get_ring_ent(ioat, ioat->head + i);
+               ioat2_free_ring_ent(desc, c);
+       }
+
+       if (descs < total_descs)
+               dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
+                       total_descs - descs);
+
+       for (i = 0; i < total_descs - descs; i++) {
+               desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+               dump_desc_dbg(ioat, desc);
+               ioat2_free_ring_ent(desc, c);
+       }
+
+       kfree(ioat->ring);
+       ioat->ring = NULL;
+       ioat->alloc_order = 0;
+       pci_pool_free(device->completion_pool, chan->completion,
+                     chan->completion_dma);
+       spin_unlock_bh(&ioat->ring_lock);
+
+       chan->last_completion = 0;
+       chan->completion_dma = 0;
+       ioat->pending = 0;
+       ioat->dmacount = 0;
+}
+
+enum dma_status
+ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+                    dma_cookie_t *done, dma_cookie_t *used)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioatdma_device *device = ioat->base.device;
+
+       if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+               return DMA_SUCCESS;
+
+       device->cleanup_tasklet((unsigned long) ioat);
+
+       return ioat_is_complete(c, cookie, done, used);
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+       return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+       /* ...taken outside the lock, no need to be precise */
+       return sprintf(page, "%d\n", ioat2_ring_active(ioat));
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static struct attribute *ioat2_attrs[] = {
+       &ring_size_attr.attr,
+       &ring_active_attr.attr,
+       &ioat_cap_attr.attr,
+       &ioat_version_attr.attr,
+       NULL,
+};
+
+struct kobj_type ioat2_ktype = {
+       .sysfs_ops = &ioat_sysfs_ops,
+       .default_attrs = ioat2_attrs,
+};
+
+int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
+{
+       struct pci_dev *pdev = device->pdev;
+       struct dma_device *dma;
+       struct dma_chan *c;
+       struct ioat_chan_common *chan;
+       int err;
+
+       device->enumerate_channels = ioat2_enumerate_channels;
+       device->cleanup_tasklet = ioat2_cleanup_tasklet;
+       device->timer_fn = ioat2_timer_event;
+       device->self_test = ioat_dma_self_test;
+       dma = &device->common;
+       dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
+       dma->device_issue_pending = ioat2_issue_pending;
+       dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
+       dma->device_free_chan_resources = ioat2_free_chan_resources;
+       dma->device_is_tx_complete = ioat2_is_complete;
+
+       err = ioat_probe(device);
+       if (err)
+               return err;
+       ioat_set_tcp_copy_break(2048);
+
+       list_for_each_entry(c, &dma->channels, device_node) {
+               chan = to_chan_common(c);
+               writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
+                      chan->reg_base + IOAT_DCACTRL_OFFSET);
+       }
+
+       err = ioat_register(device);
+       if (err)
+               return err;
+
+       ioat_kobject_add(device, &ioat2_ktype);
+
+       if (dca)
+               device->dca = ioat2_dca_init(pdev, device->reg_base);
+
+       return err;
+}
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h

new file mode 100644 (file)

index 0000000..1d849ef
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_V2_H
+#define IOATDMA_V2_H
+
+#include <linux/dmaengine.h>
+#include "dma.h"
+#include "hw.h"
+
+
+extern int ioat_pending_level;
+extern int ioat_ring_alloc_order;
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+#define IOAT_MAX_ORDER 16
+#define ioat_get_alloc_order() \
+       (min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
+#define ioat_get_max_alloc_order() \
+       (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
+
+/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes
+ * @base: common ioat channel parameters
+ * @xfercap_log; log2 of channel max transfer length (for fast division)
+ * @head: allocated index
+ * @issued: hardware notification point
+ * @tail: cleanup index
+ * @pending: lock free indicator for issued != head
+ * @dmacount: identical to 'head' except for occasionally resetting to zero
+ * @alloc_order: log2 of the number of allocated descriptors
+ * @ring: software ring buffer implementation of hardware ring
+ * @ring_lock: protects ring attributes
+ */
+struct ioat2_dma_chan {
+       struct ioat_chan_common base;
+       size_t xfercap_log;
+       u16 head;
+       u16 issued;
+       u16 tail;
+       u16 dmacount;
+       u16 alloc_order;
+       int pending;
+       struct ioat_ring_ent **ring;
+       spinlock_t ring_lock;
+};
+
+static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c)
+{
+       struct ioat_chan_common *chan = to_chan_common(c);
+
+       return container_of(chan, struct ioat2_dma_chan, base);
+}
+
+static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat)
+{
+       return (1 << ioat->alloc_order) - 1;
+}
+
+/* count of descriptors in flight with the engine */
+static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat)
+{
+       return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat);
+}
+
+/* count of descriptors pending submission to hardware */
+static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat)
+{
+       return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat);
+}
+
+static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat)
+{
+       u16 num_descs = ioat2_ring_mask(ioat) + 1;
+       u16 active = ioat2_ring_active(ioat);
+
+       BUG_ON(active > num_descs);
+
+       return num_descs - active;
+}
+
+/* assumes caller already checked space */
+static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len)
+{
+       ioat->head += len;
+       return ioat->head - len;
+}
+
+static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len)
+{
+       u16 num_descs = len >> ioat->xfercap_log;
+
+       num_descs += !!(len & ((1 << ioat->xfercap_log) - 1));
+       return num_descs;
+}
+
+/**
+ * struct ioat_ring_ent - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @fill: hardware fill descriptor
+ * @xor: hardware xor descriptor
+ * @xor_ex: hardware xor extension descriptor
+ * @pq: hardware pq descriptor
+ * @pq_ex: hardware pq extension descriptor
+ * @pqu: hardware pq update descriptor
+ * @raw: hardware raw (un-typed) descriptor
+ * @txd: the generic software descriptor for all engines
+ * @len: total transaction length for unmap
+ * @result: asynchronous result of validate operations
+ * @id: identifier for debug
+ */
+
+struct ioat_ring_ent {
+       union {
+               struct ioat_dma_descriptor *hw;
+               struct ioat_fill_descriptor *fill;
+               struct ioat_xor_descriptor *xor;
+               struct ioat_xor_ext_descriptor *xor_ex;
+               struct ioat_pq_descriptor *pq;
+               struct ioat_pq_ext_descriptor *pq_ex;
+               struct ioat_pq_update_descriptor *pqu;
+               struct ioat_raw_descriptor *raw;
+       };
+       size_t len;
+       struct dma_async_tx_descriptor txd;
+       enum sum_check_flags *result;
+       #ifdef DEBUG
+       int id;
+       #endif
+};
+
+static inline struct ioat_ring_ent *
+ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx)
+{
+       return ioat->ring[idx & ioat2_ring_mask(ioat)];
+}
+
+static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+
+       writel(addr & 0x00000000FFFFFFFF,
+              chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+       writel(addr >> 32,
+              chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+}
+
+int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca);
+int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
+struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
+int ioat2_enumerate_channels(struct ioatdma_device *device);
+struct dma_async_tx_descriptor *
+ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+                          dma_addr_t dma_src, size_t len, unsigned long flags);
+void ioat2_issue_pending(struct dma_chan *chan);
+int ioat2_alloc_chan_resources(struct dma_chan *c);
+void ioat2_free_chan_resources(struct dma_chan *c);
+enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+                                 dma_cookie_t *done, dma_cookie_t *used);
+void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
+bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
+void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
+void ioat2_cleanup_tasklet(unsigned long data);
+void ioat2_timer_event(unsigned long data);
+extern struct kobj_type ioat2_ktype;
+extern struct kmem_cache *ioat2_cache;
+#endif /* IOATDMA_V2_H */
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c

new file mode 100644 (file)

index 0000000..35d1e33
--- /dev/null
+++ b/drivers/dma/ioat/dma_v3.c
@@ -0,0 +1,1223 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Support routines for v3+ hardware
+ */
+
+#include <linux/pci.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include "registers.h"
+#include "hw.h"
+#include "dma.h"
+#include "dma_v2.h"
+
+/* ioat hardware assumes at least two sources for raid operations */
+#define src_cnt_to_sw(x) ((x) + 2)
+#define src_cnt_to_hw(x) ((x) - 2)
+
+/* provide a lookup table for setting the source address in the base or
+ * extended descriptor of an xor or pq descriptor
+ */
+static const u8 xor_idx_to_desc __read_mostly = 0xd0;
+static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 };
+static const u8 pq_idx_to_desc __read_mostly = 0xf8;
+static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 };
+
+static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+       struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+       return raw->field[xor_idx_to_field[idx]];
+}
+
+static void xor_set_src(struct ioat_raw_descriptor *descs[2],
+                       dma_addr_t addr, u32 offset, int idx)
+{
+       struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+       raw->field[xor_idx_to_field[idx]] = addr + offset;
+}
+
+static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+       struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+       return raw->field[pq_idx_to_field[idx]];
+}
+
+static void pq_set_src(struct ioat_raw_descriptor *descs[2],
+                      dma_addr_t addr, u32 offset, u8 coef, int idx)
+{
+       struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
+       struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+       raw->field[pq_idx_to_field[idx]] = addr + offset;
+       pq->coef[idx] = coef;
+}
+
+static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
+                           struct ioat_ring_ent *desc, int idx)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       struct pci_dev *pdev = chan->device->pdev;
+       size_t len = desc->len;
+       size_t offset = len - desc->hw->size;
+       struct dma_async_tx_descriptor *tx = &desc->txd;
+       enum dma_ctrl_flags flags = tx->flags;
+
+       switch (desc->hw->ctl_f.op) {
+       case IOAT_OP_COPY:
+               if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
+                       ioat_dma_unmap(chan, flags, len, desc->hw);
+               break;
+       case IOAT_OP_FILL: {
+               struct ioat_fill_descriptor *hw = desc->fill;
+
+               if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+                       ioat_unmap(pdev, hw->dst_addr - offset, len,
+                                  PCI_DMA_FROMDEVICE, flags, 1);
+               break;
+       }
+       case IOAT_OP_XOR_VAL:
+       case IOAT_OP_XOR: {
+               struct ioat_xor_descriptor *xor = desc->xor;
+               struct ioat_ring_ent *ext;
+               struct ioat_xor_ext_descriptor *xor_ex = NULL;
+               int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
+               struct ioat_raw_descriptor *descs[2];
+               int i;
+
+               if (src_cnt > 5) {
+                       ext = ioat2_get_ring_ent(ioat, idx + 1);
+                       xor_ex = ext->xor_ex;
+               }
+
+               if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+                       descs[0] = (struct ioat_raw_descriptor *) xor;
+                       descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+                       for (i = 0; i < src_cnt; i++) {
+                               dma_addr_t src = xor_get_src(descs, i);
+
+                               ioat_unmap(pdev, src - offset, len,
+                                          PCI_DMA_TODEVICE, flags, 0);
+                       }
+
+                       /* dest is a source in xor validate operations */
+                       if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
+                               ioat_unmap(pdev, xor->dst_addr - offset, len,
+                                          PCI_DMA_TODEVICE, flags, 1);
+                               break;
+                       }
+               }
+
+               if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+                       ioat_unmap(pdev, xor->dst_addr - offset, len,
+                                  PCI_DMA_FROMDEVICE, flags, 1);
+               break;
+       }
+       case IOAT_OP_PQ_VAL:
+       case IOAT_OP_PQ: {
+               struct ioat_pq_descriptor *pq = desc->pq;
+               struct ioat_ring_ent *ext;
+               struct ioat_pq_ext_descriptor *pq_ex = NULL;
+               int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+               struct ioat_raw_descriptor *descs[2];
+               int i;
+
+               if (src_cnt > 3) {
+                       ext = ioat2_get_ring_ent(ioat, idx + 1);
+                       pq_ex = ext->pq_ex;
+               }
+
+               /* in the 'continue' case don't unmap the dests as sources */
+               if (dmaf_p_disabled_continue(flags))
+                       src_cnt--;
+               else if (dmaf_continue(flags))
+                       src_cnt -= 3;
+
+               if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+                       descs[0] = (struct ioat_raw_descriptor *) pq;
+                       descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+                       for (i = 0; i < src_cnt; i++) {
+                               dma_addr_t src = pq_get_src(descs, i);
+
+                               ioat_unmap(pdev, src - offset, len,
+                                          PCI_DMA_TODEVICE, flags, 0);
+                       }
+
+                       /* the dests are sources in pq validate operations */
+                       if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
+                               if (!(flags & DMA_PREP_PQ_DISABLE_P))
+                                       ioat_unmap(pdev, pq->p_addr - offset,
+                                                  len, PCI_DMA_TODEVICE, flags, 0);
+                               if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+                                       ioat_unmap(pdev, pq->q_addr - offset,
+                                                  len, PCI_DMA_TODEVICE, flags, 0);
+                               break;
+                       }
+               }
+
+               if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+                       if (!(flags & DMA_PREP_PQ_DISABLE_P))
+                               ioat_unmap(pdev, pq->p_addr - offset, len,
+                                          PCI_DMA_BIDIRECTIONAL, flags, 1);
+                       if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+                               ioat_unmap(pdev, pq->q_addr - offset, len,
+                                          PCI_DMA_BIDIRECTIONAL, flags, 1);
+               }
+               break;
+       }
+       default:
+               dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
+                       __func__, desc->hw->ctl_f.op);
+       }
+}
+
+static bool desc_has_ext(struct ioat_ring_ent *desc)
+{
+       struct ioat_dma_descriptor *hw = desc->hw;
+
+       if (hw->ctl_f.op == IOAT_OP_XOR ||
+           hw->ctl_f.op == IOAT_OP_XOR_VAL) {
+               struct ioat_xor_descriptor *xor = desc->xor;
+
+               if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
+                       return true;
+       } else if (hw->ctl_f.op == IOAT_OP_PQ ||
+                  hw->ctl_f.op == IOAT_OP_PQ_VAL) {
+               struct ioat_pq_descriptor *pq = desc->pq;
+
+               if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
+                       return true;
+       }
+
+       return false;
+}
+
+/**
+ * __cleanup - reclaim used descriptors
+ * @ioat: channel (ring) to clean
+ *
+ * The difference from the dma_v2.c __cleanup() is that this routine
+ * handles extended descriptors and dma-unmapping raid operations.
+ */
+static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioat_ring_ent *desc;
+       bool seen_current = false;
+       u16 active;
+       int i;
+
+       dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
+               __func__, ioat->head, ioat->tail, ioat->issued);
+
+       active = ioat2_ring_active(ioat);
+       for (i = 0; i < active && !seen_current; i++) {
+               struct dma_async_tx_descriptor *tx;
+
+               prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
+               desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+               dump_desc_dbg(ioat, desc);
+               tx = &desc->txd;
+               if (tx->cookie) {
+                       chan->completed_cookie = tx->cookie;
+                       ioat3_dma_unmap(ioat, desc, ioat->tail + i);
+                       tx->cookie = 0;
+                       if (tx->callback) {
+                               tx->callback(tx->callback_param);
+                               tx->callback = NULL;
+                       }
+               }
+
+               if (tx->phys == phys_complete)
+                       seen_current = true;
+
+               /* skip extended descriptors */
+               if (desc_has_ext(desc)) {
+                       BUG_ON(i + 1 >= active);
+                       i++;
+               }
+       }
+       ioat->tail += i;
+       BUG_ON(!seen_current); /* no active descs have written a completion? */
+       chan->last_completion = phys_complete;
+       if (ioat->head == ioat->tail) {
+               dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
+                       __func__);
+               clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+               mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+       }
+}
+
+static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       unsigned long phys_complete;
+
+       prefetch(chan->completion);
+
+       if (!spin_trylock_bh(&chan->cleanup_lock))
+               return;
+
+       if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+               spin_unlock_bh(&chan->cleanup_lock);
+               return;
+       }
+
+       if (!spin_trylock_bh(&ioat->ring_lock)) {
+               spin_unlock_bh(&chan->cleanup_lock);
+               return;
+       }
+
+       __cleanup(ioat, phys_complete);
+
+       spin_unlock_bh(&ioat->ring_lock);
+       spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat3_cleanup_tasklet(unsigned long data)
+{
+       struct ioat2_dma_chan *ioat = (void *) data;
+
+       ioat3_cleanup(ioat);
+       writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN,
+              ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       unsigned long phys_complete;
+       u32 status;
+
+       status = ioat_chansts(chan);
+       if (is_ioat_active(status) || is_ioat_idle(status))
+               ioat_suspend(chan);
+       while (is_ioat_active(status) || is_ioat_idle(status)) {
+               status = ioat_chansts(chan);
+               cpu_relax();
+       }
+
+       if (ioat_cleanup_preamble(chan, &phys_complete))
+               __cleanup(ioat, phys_complete);
+
+       __ioat2_restart_chan(ioat);
+}
+
+static void ioat3_timer_event(unsigned long data)
+{
+       struct ioat2_dma_chan *ioat = (void *) data;
+       struct ioat_chan_common *chan = &ioat->base;
+
+       spin_lock_bh(&chan->cleanup_lock);
+       if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+               unsigned long phys_complete;
+               u64 status;
+
+               spin_lock_bh(&ioat->ring_lock);
+               status = ioat_chansts(chan);
+
+               /* when halted due to errors check for channel
+                * programming errors before advancing the completion state
+                */
+               if (is_ioat_halted(status)) {
+                       u32 chanerr;
+
+                       chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+                       BUG_ON(is_ioat_bug(chanerr));
+               }
+
+               /* if we haven't made progress and we have already
+                * acknowledged a pending completion once, then be more
+                * forceful with a restart
+                */
+               if (ioat_cleanup_preamble(chan, &phys_complete))
+                       __cleanup(ioat, phys_complete);
+               else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+                       ioat3_restart_channel(ioat);
+               else {
+                       set_bit(IOAT_COMPLETION_ACK, &chan->state);
+                       mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+               }
+               spin_unlock_bh(&ioat->ring_lock);
+       } else {
+               u16 active;
+
+               /* if the ring is idle, empty, and oversized try to step
+                * down the size
+                */
+               spin_lock_bh(&ioat->ring_lock);
+               active = ioat2_ring_active(ioat);
+               if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
+                       reshape_ring(ioat, ioat->alloc_order-1);
+               spin_unlock_bh(&ioat->ring_lock);
+
+               /* keep shrinking until we get back to our minimum
+                * default size
+                */
+               if (ioat->alloc_order > ioat_get_alloc_order())
+                       mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+       }
+       spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static enum dma_status
+ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+                 dma_cookie_t *done, dma_cookie_t *used)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+       if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+               return DMA_SUCCESS;
+
+       ioat3_cleanup(ioat);
+
+       return ioat_is_complete(c, cookie, done, used);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
+                      size_t len, unsigned long flags)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_ring_ent *desc;
+       size_t total_len = len;
+       struct ioat_fill_descriptor *fill;
+       int num_descs;
+       u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
+       u16 idx;
+       int i;
+
+       num_descs = ioat2_xferlen_to_descs(ioat, len);
+       if (likely(num_descs) &&
+           ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
+               /* pass */;
+       else
+               return NULL;
+       i = 0;
+       do {
+               size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+               desc = ioat2_get_ring_ent(ioat, idx + i);
+               fill = desc->fill;
+
+               fill->size = xfer_size;
+               fill->src_data = src_data;
+               fill->dst_addr = dest;
+               fill->ctl = 0;
+               fill->ctl_f.op = IOAT_OP_FILL;
+
+               len -= xfer_size;
+               dest += xfer_size;
+               dump_desc_dbg(ioat, desc);
+       } while (++i < num_descs);
+
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+       fill->ctl_f.compl_write = 1;
+       dump_desc_dbg(ioat, desc);
+
+       /* we leave the channel locked to ensure in order submission */
+       return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
+                     dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
+                     size_t len, unsigned long flags)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_ring_ent *compl_desc;
+       struct ioat_ring_ent *desc;
+       struct ioat_ring_ent *ext;
+       size_t total_len = len;
+       struct ioat_xor_descriptor *xor;
+       struct ioat_xor_ext_descriptor *xor_ex = NULL;
+       struct ioat_dma_descriptor *hw;
+       u32 offset = 0;
+       int num_descs;
+       int with_ext;
+       int i;
+       u16 idx;
+       u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
+
+       BUG_ON(src_cnt < 2);
+
+       num_descs = ioat2_xferlen_to_descs(ioat, len);
+       /* we need 2x the number of descriptors to cover greater than 5
+        * sources
+        */
+       if (src_cnt > 5) {
+               with_ext = 1;
+               num_descs *= 2;
+       } else
+               with_ext = 0;
+
+       /* completion writes from the raid engine may pass completion
+        * writes from the legacy engine, so we need one extra null
+        * (legacy) descriptor to ensure all completion writes arrive in
+        * order.
+        */
+       if (likely(num_descs) &&
+           ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
+               /* pass */;
+       else
+               return NULL;
+       i = 0;
+       do {
+               struct ioat_raw_descriptor *descs[2];
+               size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+               int s;
+
+               desc = ioat2_get_ring_ent(ioat, idx + i);
+               xor = desc->xor;
+
+               /* save a branch by unconditionally retrieving the
+                * extended descriptor xor_set_src() knows to not write
+                * to it in the single descriptor case
+                */
+               ext = ioat2_get_ring_ent(ioat, idx + i + 1);
+               xor_ex = ext->xor_ex;
+
+               descs[0] = (struct ioat_raw_descriptor *) xor;
+               descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+               for (s = 0; s < src_cnt; s++)
+                       xor_set_src(descs, src[s], offset, s);
+               xor->size = xfer_size;
+               xor->dst_addr = dest + offset;
+               xor->ctl = 0;
+               xor->ctl_f.op = op;
+               xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
+
+               len -= xfer_size;
+               offset += xfer_size;
+               dump_desc_dbg(ioat, desc);
+       } while ((i += 1 + with_ext) < num_descs);
+
+       /* last xor descriptor carries the unmap parameters and fence bit */
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       if (result)
+               desc->result = result;
+       xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+       /* completion descriptor carries interrupt bit */
+       compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+       compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+       hw = compl_desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       hw->ctl_f.compl_write = 1;
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       dump_desc_dbg(ioat, compl_desc);
+
+       /* we leave the channel locked to ensure in order submission */
+       return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+              unsigned int src_cnt, size_t len, unsigned long flags)
+{
+       return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+                   unsigned int src_cnt, size_t len,
+                   enum sum_check_flags *result, unsigned long flags)
+{
+       /* the cleanup routine only sets bits on validate failure, it
+        * does not clear bits on validate success... so clear it here
+        */
+       *result = 0;
+
+       return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
+                                    src_cnt - 1, len, flags);
+}
+
+static void
+dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
+{
+       struct device *dev = to_dev(&ioat->base);
+       struct ioat_pq_descriptor *pq = desc->pq;
+       struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
+       struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
+       int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+       int i;
+
+       dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+               " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
+               desc_id(desc), (unsigned long long) desc->txd.phys,
+               (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
+               desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
+               pq->ctl_f.compl_write,
+               pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+               pq->ctl_f.src_cnt);
+       for (i = 0; i < src_cnt; i++)
+               dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+                       (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
+       dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+       dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+}
+
+static struct dma_async_tx_descriptor *
+__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
+                    const dma_addr_t *dst, const dma_addr_t *src,
+                    unsigned int src_cnt, const unsigned char *scf,
+                    size_t len, unsigned long flags)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioat_ring_ent *compl_desc;
+       struct ioat_ring_ent *desc;
+       struct ioat_ring_ent *ext;
+       size_t total_len = len;
+       struct ioat_pq_descriptor *pq;
+       struct ioat_pq_ext_descriptor *pq_ex = NULL;
+       struct ioat_dma_descriptor *hw;
+       u32 offset = 0;
+       int num_descs;
+       int with_ext;
+       int i, s;
+       u16 idx;
+       u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
+
+       dev_dbg(to_dev(chan), "%s\n", __func__);
+       /* the engine requires at least two sources (we provide
+        * at least 1 implied source in the DMA_PREP_CONTINUE case)
+        */
+       BUG_ON(src_cnt + dmaf_continue(flags) < 2);
+
+       num_descs = ioat2_xferlen_to_descs(ioat, len);
+       /* we need 2x the number of descriptors to cover greater than 3
+        * sources
+        */
+       if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) {
+               with_ext = 1;
+               num_descs *= 2;
+       } else
+               with_ext = 0;
+
+       /* completion writes from the raid engine may pass completion
+        * writes from the legacy engine, so we need one extra null
+        * (legacy) descriptor to ensure all completion writes arrive in
+        * order.
+        */
+       if (likely(num_descs) &&
+           ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
+               /* pass */;
+       else
+               return NULL;
+       i = 0;
+       do {
+               struct ioat_raw_descriptor *descs[2];
+               size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+               desc = ioat2_get_ring_ent(ioat, idx + i);
+               pq = desc->pq;
+
+               /* save a branch by unconditionally retrieving the
+                * extended descriptor pq_set_src() knows to not write
+                * to it in the single descriptor case
+                */
+               ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
+               pq_ex = ext->pq_ex;
+
+               descs[0] = (struct ioat_raw_descriptor *) pq;
+               descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+
+               for (s = 0; s < src_cnt; s++)
+                       pq_set_src(descs, src[s], offset, scf[s], s);
+
+               /* see the comment for dma_maxpq in include/linux/dmaengine.h */
+               if (dmaf_p_disabled_continue(flags))
+                       pq_set_src(descs, dst[1], offset, 1, s++);
+               else if (dmaf_continue(flags)) {
+                       pq_set_src(descs, dst[0], offset, 0, s++);
+                       pq_set_src(descs, dst[1], offset, 1, s++);
+                       pq_set_src(descs, dst[1], offset, 0, s++);
+               }
+               pq->size = xfer_size;
+               pq->p_addr = dst[0] + offset;
+               pq->q_addr = dst[1] + offset;
+               pq->ctl = 0;
+               pq->ctl_f.op = op;
+               pq->ctl_f.src_cnt = src_cnt_to_hw(s);
+               pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+               pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+               len -= xfer_size;
+               offset += xfer_size;
+       } while ((i += 1 + with_ext) < num_descs);
+
+       /* last pq descriptor carries the unmap parameters and fence bit */
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       if (result)
+               desc->result = result;
+       pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+       dump_pq_desc_dbg(ioat, desc, ext);
+
+       /* completion descriptor carries interrupt bit */
+       compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+       compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+       hw = compl_desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       hw->ctl_f.compl_write = 1;
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       dump_desc_dbg(ioat, compl_desc);
+
+       /* we leave the channel locked to ensure in order submission */
+       return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+             unsigned int src_cnt, const unsigned char *scf, size_t len,
+             unsigned long flags)
+{
+       /* handle the single source multiply case from the raid6
+        * recovery path
+        */
+       if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) {
+               dma_addr_t single_source[2];
+               unsigned char single_source_coef[2];
+
+               BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
+               single_source[0] = src[0];
+               single_source[1] = src[0];
+               single_source_coef[0] = scf[0];
+               single_source_coef[1] = 0;
+
+               return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
+                                           single_source_coef, len, flags);
+       } else
+               return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
+                                           len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+                 unsigned int src_cnt, const unsigned char *scf, size_t len,
+                 enum sum_check_flags *pqres, unsigned long flags)
+{
+       /* the cleanup routine only sets bits on validate failure, it
+        * does not clear bits on validate success... so clear it here
+        */
+       *pqres = 0;
+
+       return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
+                                   flags);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+                unsigned int src_cnt, size_t len, unsigned long flags)
+{
+       unsigned char scf[src_cnt];
+       dma_addr_t pq[2];
+
+       memset(scf, 0, src_cnt);
+       flags |= DMA_PREP_PQ_DISABLE_Q;
+       pq[0] = dst;
+       pq[1] = ~0;
+
+       return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
+                                   flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+                    unsigned int src_cnt, size_t len,
+                    enum sum_check_flags *result, unsigned long flags)
+{
+       unsigned char scf[src_cnt];
+       dma_addr_t pq[2];
+
+       /* the cleanup routine only sets bits on validate failure, it
+        * does not clear bits on validate success... so clear it here
+        */
+       *result = 0;
+
+       memset(scf, 0, src_cnt);
+       flags |= DMA_PREP_PQ_DISABLE_Q;
+       pq[0] = src[0];
+       pq[1] = ~0;
+
+       return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
+                                   len, flags);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_ring_ent *desc;
+       struct ioat_dma_descriptor *hw;
+       u16 idx;
+
+       if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0)
+               desc = ioat2_get_ring_ent(ioat, idx);
+       else
+               return NULL;
+
+       hw = desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = 1;
+       hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+       hw->ctl_f.compl_write = 1;
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       hw->src_addr = 0;
+       hw->dst_addr = 0;
+
+       desc->txd.flags = flags;
+       desc->len = 1;
+
+       dump_desc_dbg(ioat, desc);
+
+       /* we leave the channel locked to ensure in order submission */
+       return &desc->txd;
+}
+
+static void __devinit ioat3_dma_test_callback(void *dma_async_param)
+{
+       struct completion *cmp = dma_async_param;
+
+       complete(cmp);
+}
+
+#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
+static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
+{
+       int i, src_idx;
+       struct page *dest;
+       struct page *xor_srcs[IOAT_NUM_SRC_TEST];
+       struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
+       dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
+       dma_addr_t dma_addr, dest_dma;
+       struct dma_async_tx_descriptor *tx;
+       struct dma_chan *dma_chan;
+       dma_cookie_t cookie;
+       u8 cmp_byte = 0;
+       u32 cmp_word;
+       u32 xor_val_result;
+       int err = 0;
+       struct completion cmp;
+       unsigned long tmo;
+       struct device *dev = &device->pdev->dev;
+       struct dma_device *dma = &device->common;
+
+       dev_dbg(dev, "%s\n", __func__);
+
+       if (!dma_has_cap(DMA_XOR, dma->cap_mask))
+               return 0;
+
+       for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+               xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+               if (!xor_srcs[src_idx]) {
+                       while (src_idx--)
+                               __free_page(xor_srcs[src_idx]);
+                       return -ENOMEM;
+               }
+       }
+
+       dest = alloc_page(GFP_KERNEL);
+       if (!dest) {
+               while (src_idx--)
+                       __free_page(xor_srcs[src_idx]);
+               return -ENOMEM;
+       }
+
+       /* Fill in src buffers */
+       for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+               u8 *ptr = page_address(xor_srcs[src_idx]);
+               for (i = 0; i < PAGE_SIZE; i++)
+                       ptr[i] = (1 << src_idx);
+       }
+
+       for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
+               cmp_byte ^= (u8) (1 << src_idx);
+
+       cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+                       (cmp_byte << 8) | cmp_byte;
+
+       memset(page_address(dest), 0, PAGE_SIZE);
+
+       dma_chan = container_of(dma->channels.next, struct dma_chan,
+                               device_node);
+       if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+               err = -ENODEV;
+               goto out;
+       }
+
+       /* test xor */
+       dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+       for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+               dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
+                                          DMA_TO_DEVICE);
+       tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+                                     IOAT_NUM_SRC_TEST, PAGE_SIZE,
+                                     DMA_PREP_INTERRUPT);
+
+       if (!tx) {
+               dev_err(dev, "Self-test xor prep failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat3_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test xor setup failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+               dev_err(dev, "Self-test xor timed out\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+       for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+               u32 *ptr = page_address(dest);
+               if (ptr[i] != cmp_word) {
+                       dev_err(dev, "Self-test xor failed compare\n");
+                       err = -ENODEV;
+                       goto free_resources;
+               }
+       }
+       dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
+
+       /* skip validate if the capability is not present */
+       if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+               goto free_resources;
+
+       /* validate the sources with the destintation page */
+       for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+               xor_val_srcs[i] = xor_srcs[i];
+       xor_val_srcs[i] = dest;
+
+       xor_val_result = 1;
+
+       for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+               dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+                                          DMA_TO_DEVICE);
+       tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+                                         IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+                                         &xor_val_result, DMA_PREP_INTERRUPT);
+       if (!tx) {
+               dev_err(dev, "Self-test zero prep failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat3_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test zero setup failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+               dev_err(dev, "Self-test validate timed out\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       if (xor_val_result != 0) {
+               dev_err(dev, "Self-test validate failed compare\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       /* skip memset if the capability is not present */
+       if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
+               goto free_resources;
+
+       /* test memset */
+       dma_addr = dma_map_page(dev, dest, 0,
+                       PAGE_SIZE, DMA_FROM_DEVICE);
+       tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
+                                        DMA_PREP_INTERRUPT);
+       if (!tx) {
+               dev_err(dev, "Self-test memset prep failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat3_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test memset setup failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+               dev_err(dev, "Self-test memset timed out\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
+               u32 *ptr = page_address(dest);
+               if (ptr[i]) {
+                       dev_err(dev, "Self-test memset failed compare\n");
+                       err = -ENODEV;
+                       goto free_resources;
+               }
+       }
+
+       /* test for non-zero parity sum */
+       xor_val_result = 0;
+       for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+               dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+                                          DMA_TO_DEVICE);
+       tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+                                         IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+                                         &xor_val_result, DMA_PREP_INTERRUPT);
+       if (!tx) {
+               dev_err(dev, "Self-test 2nd zero prep failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat3_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test  2nd zero setup failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+               dev_err(dev, "Self-test 2nd validate timed out\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       if (xor_val_result != SUM_CHECK_P_RESULT) {
+               dev_err(dev, "Self-test validate failed compare\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+free_resources:
+       dma->device_free_chan_resources(dma_chan);
+out:
+       src_idx = IOAT_NUM_SRC_TEST;
+       while (src_idx--)
+               __free_page(xor_srcs[src_idx]);
+       __free_page(dest);
+       return err;
+}
+
+static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
+{
+       int rc = ioat_dma_self_test(device);
+
+       if (rc)
+               return rc;
+
+       rc = ioat_xor_val_self_test(device);
+       if (rc)
+               return rc;
+
+       return 0;
+}
+
+int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
+{
+       struct pci_dev *pdev = device->pdev;
+       struct dma_device *dma;
+       struct dma_chan *c;
+       struct ioat_chan_common *chan;
+       bool is_raid_device = false;
+       int err;
+       u16 dev_id;
+       u32 cap;
+
+       device->enumerate_channels = ioat2_enumerate_channels;
+       device->self_test = ioat3_dma_self_test;
+       dma = &device->common;
+       dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
+       dma->device_issue_pending = ioat2_issue_pending;
+       dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
+       dma->device_free_chan_resources = ioat2_free_chan_resources;
+
+       dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
+       dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
+
+       cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
+       if (cap & IOAT_CAP_XOR) {
+               is_raid_device = true;
+               dma->max_xor = 8;
+               dma->xor_align = 2;
+
+               dma_cap_set(DMA_XOR, dma->cap_mask);
+               dma->device_prep_dma_xor = ioat3_prep_xor;
+
+               dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+               dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
+       }
+       if (cap & IOAT_CAP_PQ) {
+               is_raid_device = true;
+               dma_set_maxpq(dma, 8, 0);
+               dma->pq_align = 2;
+
+               dma_cap_set(DMA_PQ, dma->cap_mask);
+               dma->device_prep_dma_pq = ioat3_prep_pq;
+
+               dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
+               dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
+
+               if (!(cap & IOAT_CAP_XOR)) {
+                       dma->max_xor = 8;
+                       dma->xor_align = 2;
+
+                       dma_cap_set(DMA_XOR, dma->cap_mask);
+                       dma->device_prep_dma_xor = ioat3_prep_pqxor;
+
+                       dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+                       dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
+               }
+       }
+       if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
+               dma_cap_set(DMA_MEMSET, dma->cap_mask);
+               dma->device_prep_dma_memset = ioat3_prep_memset_lock;
+       }
+
+
+       if (is_raid_device) {
+               dma->device_is_tx_complete = ioat3_is_complete;
+               device->cleanup_tasklet = ioat3_cleanup_tasklet;
+               device->timer_fn = ioat3_timer_event;
+       } else {
+               dma->device_is_tx_complete = ioat2_is_complete;
+               device->cleanup_tasklet = ioat2_cleanup_tasklet;
+               device->timer_fn = ioat2_timer_event;
+       }
+
+       /* -= IOAT ver.3 workarounds =- */
+       /* Write CHANERRMSK_INT with 3E07h to mask out the errors
+        * that can cause stability issues for IOAT ver.3
+        */
+       pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
+
+       /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+        * (workaround for spurious config parity error after restart)
+        */
+       pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
+       if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
+               pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
+
+       err = ioat_probe(device);
+       if (err)
+               return err;
+       ioat_set_tcp_copy_break(262144);
+
+       list_for_each_entry(c, &dma->channels, device_node) {
+               chan = to_chan_common(c);
+               writel(IOAT_DMA_DCA_ANY_CPU,
+                      chan->reg_base + IOAT_DCACTRL_OFFSET);
+       }
+
+       err = ioat_register(device);
+       if (err)
+               return err;
+
+       ioat_kobject_add(device, &ioat2_ktype);
+
+       if (dca)
+               device->dca = ioat3_dca_init(pdev, device->reg_base);
+
+       return 0;
+}
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h

new file mode 100644 (file)

index 0000000..99afb12
--- /dev/null
+++ b/drivers/dma/ioat/hw.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_HW_H_
+#define _IOAT_HW_H_
+
+/* PCI Configuration Space Values */
+#define IOAT_PCI_VID            0x8086
+#define IOAT_MMIO_BAR          0
+
+/* CB device ID's */
+#define IOAT_PCI_DID_5000       0x1A38
+#define IOAT_PCI_DID_CNB        0x360B
+#define IOAT_PCI_DID_SCNB       0x65FF
+#define IOAT_PCI_DID_SNB        0x402F
+
+#define IOAT_PCI_RID            0x00
+#define IOAT_PCI_SVID           0x8086
+#define IOAT_PCI_SID            0x8086
+#define IOAT_VER_1_2            0x12    /* Version 1.2 */
+#define IOAT_VER_2_0            0x20    /* Version 2.0 */
+#define IOAT_VER_3_0            0x30    /* Version 3.0 */
+#define IOAT_VER_3_2            0x32    /* Version 3.2 */
+
+struct ioat_dma_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int src_snoop_dis:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int null:1;
+                       unsigned int src_brk:1;
+                       unsigned int dest_brk:1;
+                       unsigned int bundle:1;
+                       unsigned int dest_dca:1;
+                       unsigned int hint:1;
+                       unsigned int rsvd2:13;
+                       #define IOAT_OP_COPY 0x00
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_addr;
+       uint64_t        dst_addr;
+       uint64_t        next;
+       uint64_t        rsv1;
+       uint64_t        rsv2;
+       /* store some driver data in an unused portion of the descriptor */
+       union {
+               uint64_t        user1;
+               uint64_t        tx_cnt;
+       };
+       uint64_t        user2;
+};
+
+struct ioat_fill_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int rsvd:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int rsvd2:2;
+                       unsigned int dest_brk:1;
+                       unsigned int bundle:1;
+                       unsigned int rsvd4:15;
+                       #define IOAT_OP_FILL 0x01
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_data;
+       uint64_t        dst_addr;
+       uint64_t        next;
+       uint64_t        rsv1;
+       uint64_t        next_dst_addr;
+       uint64_t        user1;
+       uint64_t        user2;
+};
+
+struct ioat_xor_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int src_snoop_dis:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int src_cnt:3;
+                       unsigned int bundle:1;
+                       unsigned int dest_dca:1;
+                       unsigned int hint:1;
+                       unsigned int rsvd:13;
+                       #define IOAT_OP_XOR 0x87
+                       #define IOAT_OP_XOR_VAL 0x88
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_addr;
+       uint64_t        dst_addr;
+       uint64_t        next;
+       uint64_t        src_addr2;
+       uint64_t        src_addr3;
+       uint64_t        src_addr4;
+       uint64_t        src_addr5;
+};
+
+struct ioat_xor_ext_descriptor {
+       uint64_t        src_addr6;
+       uint64_t        src_addr7;
+       uint64_t        src_addr8;
+       uint64_t        next;
+       uint64_t        rsvd[4];
+};
+
+struct ioat_pq_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int src_snoop_dis:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int src_cnt:3;
+                       unsigned int bundle:1;
+                       unsigned int dest_dca:1;
+                       unsigned int hint:1;
+                       unsigned int p_disable:1;
+                       unsigned int q_disable:1;
+                       unsigned int rsvd:11;
+                       #define IOAT_OP_PQ 0x89
+                       #define IOAT_OP_PQ_VAL 0x8a
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_addr;
+       uint64_t        p_addr;
+       uint64_t        next;
+       uint64_t        src_addr2;
+       uint64_t        src_addr3;
+       uint8_t         coef[8];
+       uint64_t        q_addr;
+};
+
+struct ioat_pq_ext_descriptor {
+       uint64_t        src_addr4;
+       uint64_t        src_addr5;
+       uint64_t        src_addr6;
+       uint64_t        next;
+       uint64_t        src_addr7;
+       uint64_t        src_addr8;
+       uint64_t        rsvd[2];
+};
+
+struct ioat_pq_update_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int src_snoop_dis:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int src_cnt:3;
+                       unsigned int bundle:1;
+                       unsigned int dest_dca:1;
+                       unsigned int hint:1;
+                       unsigned int p_disable:1;
+                       unsigned int q_disable:1;
+                       unsigned int rsvd:3;
+                       unsigned int coef:8;
+                       #define IOAT_OP_PQ_UP 0x8b
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_addr;
+       uint64_t        p_addr;
+       uint64_t        next;
+       uint64_t        src_addr2;
+       uint64_t        p_src;
+       uint64_t        q_src;
+       uint64_t        q_addr;
+};
+
+struct ioat_raw_descriptor {
+       uint64_t        field[8];
+};
+#endif
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c

new file mode 100644 (file)

index 0000000..d545fae
--- /dev/null
+++ b/drivers/dma/ioat/pci.c
@@ -0,0 +1,210 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+#include "dma.h"
+#include "dma_v2.h"
+#include "registers.h"
+#include "hw.h"
+
+MODULE_VERSION(IOAT_DMA_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static struct pci_device_id ioat_pci_tbl[] = {
+       /* I/OAT v1 platforms */
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB)  },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
+       { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
+
+       /* I/OAT v2 platforms */
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
+
+       /* I/OAT v3 platforms */
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+       /* I/OAT v3.2 platforms */
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
+
+static int __devinit ioat_pci_probe(struct pci_dev *pdev,
+                                   const struct pci_device_id *id);
+static void __devexit ioat_remove(struct pci_dev *pdev);
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+
+struct kmem_cache *ioat2_cache;
+
+#define DRV_NAME "ioatdma"
+
+static struct pci_driver ioat_pci_driver = {
+       .name           = DRV_NAME,
+       .id_table       = ioat_pci_tbl,
+       .probe          = ioat_pci_probe,
+       .remove         = __devexit_p(ioat_remove),
+};
+
+static struct ioatdma_device *
+alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
+{
+       struct device *dev = &pdev->dev;
+       struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+
+       if (!d)
+               return NULL;
+       d->pdev = pdev;
+       d->reg_base = iobase;
+       return d;
+}
+
+static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       void __iomem * const *iomap;
+       struct device *dev = &pdev->dev;
+       struct ioatdma_device *device;
+       int err;
+
+       err = pcim_enable_device(pdev);
+       if (err)
+               return err;
+
+       err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
+       if (err)
+               return err;
+       iomap = pcim_iomap_table(pdev);
+       if (!iomap)
+               return -ENOMEM;
+
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (err)
+               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+       if (err)
+               return err;
+
+       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (err)
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+       if (err)
+               return err;
+
+       device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL);
+       if (!device)
+               return -ENOMEM;
+
+       pci_set_master(pdev);
+
+       device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
+       if (!device)
+               return -ENOMEM;
+       pci_set_drvdata(pdev, device);
+
+       device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+       if (device->version == IOAT_VER_1_2)
+               err = ioat1_dma_probe(device, ioat_dca_enabled);
+       else if (device->version == IOAT_VER_2_0)
+               err = ioat2_dma_probe(device, ioat_dca_enabled);
+       else if (device->version >= IOAT_VER_3_0)
+               err = ioat3_dma_probe(device, ioat_dca_enabled);
+       else
+               return -ENODEV;
+
+       if (err) {
+               dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+static void __devexit ioat_remove(struct pci_dev *pdev)
+{
+       struct ioatdma_device *device = pci_get_drvdata(pdev);
+
+       if (!device)
+               return;
+
+       dev_err(&pdev->dev, "Removing dma and dca services\n");
+       if (device->dca) {
+               unregister_dca_provider(device->dca, &pdev->dev);
+               free_dca_provider(device->dca);
+               device->dca = NULL;
+       }
+       ioat_dma_remove(device);
+}
+
+static int __init ioat_init_module(void)
+{
+       int err;
+
+       pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
+               DRV_NAME, IOAT_DMA_VERSION);
+
+       ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
+                                       0, SLAB_HWCACHE_ALIGN, NULL);
+       if (!ioat2_cache)
+               return -ENOMEM;
+
+       err = pci_register_driver(&ioat_pci_driver);
+       if (err)
+               kmem_cache_destroy(ioat2_cache);
+
+       return err;
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+       pci_unregister_driver(&ioat_pci_driver);
+       kmem_cache_destroy(ioat2_cache);
+}
+module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h

new file mode 100644 (file)

index 0000000..63038e1
--- /dev/null
+++ b/drivers/dma/ioat/registers.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define IOAT_PCI_DMACTRL_OFFSET                        0x48
+#define IOAT_PCI_DMACTRL_DMA_EN                        0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN                        0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET              0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET           0x148
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET                0x184
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET                    0x00    /*  8-bit */
+
+#define IOAT_XFERCAP_OFFSET                    0x01    /*  8-bit */
+#define IOAT_XFERCAP_4KB                       12
+#define IOAT_XFERCAP_8KB                       13
+#define IOAT_XFERCAP_16KB                      14
+#define IOAT_XFERCAP_32KB                      15
+#define IOAT_XFERCAP_32GB                      0
+
+#define IOAT_GENCTRL_OFFSET                    0x02    /*  8-bit */
+#define IOAT_GENCTRL_DEBUG_EN                  0x01
+
+#define IOAT_INTRCTRL_OFFSET                   0x03    /*  8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN            0x01    /* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS               0x02    /* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT                      0x04    /* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL      0x08    /* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET                 0x04    /* Each bit is a channel */
+
+#define IOAT_VER_OFFSET                                0x08    /*  8-bit */
+#define IOAT_VER_MAJOR_MASK                    0xF0
+#define IOAT_VER_MINOR_MASK                    0x0F
+#define GET_IOAT_VER_MAJOR(x)                  (((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x)                  ((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET              0x0A    /* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET                  0x0C    /* 16-bit */
+#define IOAT_INTRDELAY_INT_DELAY_MASK          0x3FFF  /* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT         0x8000  /* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET              0x0E    /* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE       0x0001
+#define IOAT_DEVICE_MMIO_RESTRICTED            0x0002
+#define IOAT_DEVICE_MEMORY_BYPASS              0x0004
+#define IOAT_DEVICE_ADDRESS_REMAPPING          0x0008
+
+#define IOAT_DMA_CAP_OFFSET                    0x10    /* 32-bit */
+#define IOAT_CAP_PAGE_BREAK                    0x00000001
+#define IOAT_CAP_CRC                           0x00000002
+#define IOAT_CAP_SKIP_MARKER                   0x00000004
+#define IOAT_CAP_DCA                           0x00000010
+#define IOAT_CAP_CRC_MOVE                      0x00000020
+#define IOAT_CAP_FILL_BLOCK                    0x00000040
+#define IOAT_CAP_APIC                          0x00000080
+#define IOAT_CAP_XOR                           0x00000100
+#define IOAT_CAP_PQ                            0x00000200
+
+#define IOAT_CHANNEL_MMIO_SIZE                 0x80    /* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET                   0x00    /* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK    0xF000
+#define IOAT3_CHANCTRL_COMPL_DCA_EN            0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE           0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL    0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN               0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN         0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN                0x0004
+#define IOAT_CHANCTRL_INT_REARM                        0x0001
+#define IOAT_CHANCTRL_RUN                      (IOAT_CHANCTRL_INT_REARM |\
+                                                IOAT_CHANCTRL_ERR_COMPLETION_EN |\
+                                                IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\
+                                                IOAT_CHANCTRL_ERR_INT_EN)
+
+#define IOAT_DMA_COMP_OFFSET                   0x02    /* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1                       0x0001  /* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2                       0x0002  /* Compatibility with DMA version 2 */
+
+
+#define IOAT1_CHANSTS_OFFSET           0x04    /* 64-bit Channel Status Register */
+#define IOAT2_CHANSTS_OFFSET           0x08    /* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_OFFSET(ver)               ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
+#define IOAT1_CHANSTS_OFFSET_LOW       0x04
+#define IOAT2_CHANSTS_OFFSET_LOW       0x08
+#define IOAT_CHANSTS_OFFSET_LOW(ver)           ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
+#define IOAT1_CHANSTS_OFFSET_HIGH      0x08
+#define IOAT2_CHANSTS_OFFSET_HIGH      0x0C
+#define IOAT_CHANSTS_OFFSET_HIGH(ver)          ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
+#define IOAT_CHANSTS_SOFT_ERR                  0x10ULL
+#define IOAT_CHANSTS_UNAFFILIATED_ERR          0x8ULL
+#define IOAT_CHANSTS_STATUS    0x7ULL
+#define IOAT_CHANSTS_ACTIVE    0x0
+#define IOAT_CHANSTS_DONE      0x1
+#define IOAT_CHANSTS_SUSPENDED 0x2
+#define IOAT_CHANSTS_HALTED    0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET      0x06    /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET         0x30   /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK   0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET       0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET         0x00
+#define IOAT_DCA_VER_MAJOR_MASK     0xF0
+#define IOAT_DCA_VER_MINOR_MASK     0x0F
+
+#define IOAT_DCA_COMP_OFFSET        0x02
+#define IOAT_DCA_COMP_V1            0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET  0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH    0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET  0x06
+#define IOAT_PCI_CAPABILITY_MEMWR   0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET  0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH    0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET  0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR   0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET  0x0C
+#define IOAT_APICID_TAG_MAP_TAG0    0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1    0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2    0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3    0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4    0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID   0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET      0x10
+#define IOAT_DCA_GREQID_SIZE        0x04
+#define IOAT_DCA_GREQID_MASK        0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN   0x10000000
+#define IOAT_DCA_GREQID_VALID       0x20000000
+#define IOAT_DCA_GREQID_LASTID      0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH    0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR  0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET    0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH  0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET    0x0E
+#define IOAT3_PCI_CONTROL_MEMWR     0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW  0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET     0x02
+
+#define IOAT1_CHAINADDR_OFFSET         0x0C    /* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET         0x10    /* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver)             ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW     0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW     0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver)         ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH    0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH    0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver)                ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET           0x14    /*  8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET           0x04    /*  8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver)               ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET                     0x20
+#define IOAT_CHANCMD_RESUME                    0x10
+#define IOAT_CHANCMD_ABORT                     0x08
+#define IOAT_CHANCMD_SUSPEND                   0x04
+#define IOAT_CHANCMD_APPEND                    0x02
+#define IOAT_CHANCMD_START                     0x01
+
+#define IOAT_CHANCMP_OFFSET                    0x18    /* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW                        0x18
+#define IOAT_CHANCMP_OFFSET_HIGH               0x1C
+
+#define IOAT_CDAR_OFFSET                       0x20    /* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW                   0x20
+#define IOAT_CDAR_OFFSET_HIGH                  0x24
+
+#define IOAT_CHANERR_OFFSET                    0x28    /* 32-bit Channel Error Register */
+#define IOAT_CHANERR_SRC_ADDR_ERR      0x0001
+#define IOAT_CHANERR_DEST_ADDR_ERR     0x0002
+#define IOAT_CHANERR_NEXT_ADDR_ERR     0x0004
+#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR       0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR      0x0010
+#define IOAT_CHANERR_CHANCMD_ERR               0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR  0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR      0x0080
+#define IOAT_CHANERR_READ_DATA_ERR             0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR            0x0200
+#define IOAT_CHANERR_CONTROL_ERR       0x0400
+#define IOAT_CHANERR_LENGTH_ERR        0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR       0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR     0x2000
+#define IOAT_CHANERR_SOFT_ERR                  0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR          0x8000
+#define IOAT_CHANERR_XOR_P_OR_CRC_ERR          0x10000
+#define IOAT_CHANERR_XOR_Q_ERR                 0x20000
+#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR      0x40000
+
+#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+
+#define IOAT_CHANERR_MASK_OFFSET               0x2C    /* 32-bit Channel Error Register */
+
+#endif /* _IOAT_REGISTERS_H_ */
diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c

deleted file mode 100644 (file)

index c012a1e..0000000
--- a/drivers/dma/ioat_dca.c
+++ /dev/null
@@ -1,681 +0,0 @@
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2007 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/dca.h>
-
-/* either a kernel change is needed, or we need something like this in kernel */
-#ifndef CONFIG_SMP
-#include <asm/smp.h>
-#undef cpu_physical_id
-#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
-#endif
-
-#include "ioatdma.h"
-#include "ioatdma_registers.h"
-
-/*
- * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
- * contain the bit number of the APIC ID to map into the DCA tag.  If the valid
- * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
- */
-#define DCA_TAG_MAP_VALID 0x80
-
-#define DCA3_TAG_MAP_BIT_TO_INV 0x80
-#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
-#define DCA3_TAG_MAP_LITERAL_VAL 0x1
-
-#define DCA_TAG_MAP_MASK 0xDF
-
-/* expected tag map bytes for I/OAT ver.2 */
-#define DCA2_TAG_MAP_BYTE0 0x80
-#define DCA2_TAG_MAP_BYTE1 0x0
-#define DCA2_TAG_MAP_BYTE2 0x81
-#define DCA2_TAG_MAP_BYTE3 0x82
-#define DCA2_TAG_MAP_BYTE4 0x82
-
-/* verify if tag map matches expected values */
-static inline int dca2_tag_map_valid(u8 *tag_map)
-{
-       return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) &&
-               (tag_map[1] == DCA2_TAG_MAP_BYTE1) &&
-               (tag_map[2] == DCA2_TAG_MAP_BYTE2) &&
-               (tag_map[3] == DCA2_TAG_MAP_BYTE3) &&
-               (tag_map[4] == DCA2_TAG_MAP_BYTE4));
-}
-
-/*
- * "Legacy" DCA systems do not implement the DCA register set in the
- * I/OAT device.  Software needs direct support for their tag mappings.
- */
-
-#define APICID_BIT(x)          (DCA_TAG_MAP_VALID | (x))
-#define IOAT_TAG_MAP_LEN       8
-
-static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
-       1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
-static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
-       1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
-static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
-       1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
-static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
-
-/* pack PCI B/D/F into a u16 */
-static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
-{
-       return (pci->bus->number << 8) | pci->devfn;
-}
-
-static int dca_enabled_in_bios(struct pci_dev *pdev)
-{
-       /* CPUID level 9 returns DCA configuration */
-       /* Bit 0 indicates DCA enabled by the BIOS */
-       unsigned long cpuid_level_9;
-       int res;
-
-       cpuid_level_9 = cpuid_eax(9);
-       res = test_bit(0, &cpuid_level_9);
-       if (!res)
-               dev_err(&pdev->dev, "DCA is disabled in BIOS\n");
-
-       return res;
-}
-
-static int system_has_dca_enabled(struct pci_dev *pdev)
-{
-       if (boot_cpu_has(X86_FEATURE_DCA))
-               return dca_enabled_in_bios(pdev);
-
-       dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
-       return 0;
-}
-
-struct ioat_dca_slot {
-       struct pci_dev *pdev;   /* requester device */
-       u16 rid;                /* requester id, as used by IOAT */
-};
-
-#define IOAT_DCA_MAX_REQ 6
-#define IOAT3_DCA_MAX_REQ 2
-
-struct ioat_dca_priv {
-       void __iomem            *iobase;
-       void __iomem            *dca_base;
-       int                      max_requesters;
-       int                      requester_count;
-       u8                       tag_map[IOAT_TAG_MAP_LEN];
-       struct ioat_dca_slot     req_slots[0];
-};
-
-/* 5000 series chipset DCA Port Requester ID Table Entry Format
- * [15:8]      PCI-Express Bus Number
- * [7:3]       PCI-Express Device Number
- * [2:0]       PCI-Express Function Number
- *
- * 5000 series chipset DCA control register format
- * [7:1]       Reserved (0)
- * [0]         Ignore Function Number
- */
-
-static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-       u16 id;
-
-       /* This implementation only supports PCI-Express */
-       if (dev->bus != &pci_bus_type)
-               return -ENODEV;
-       pdev = to_pci_dev(dev);
-       id = dcaid_from_pcidev(pdev);
-
-       if (ioatdca->requester_count == ioatdca->max_requesters)
-               return -ENODEV;
-
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == NULL) {
-                       /* found an empty slot */
-                       ioatdca->requester_count++;
-                       ioatdca->req_slots[i].pdev = pdev;
-                       ioatdca->req_slots[i].rid = id;
-                       writew(id, ioatdca->dca_base + (i * 4));
-                       /* make sure the ignore function bit is off */
-                       writeb(0, ioatdca->dca_base + (i * 4) + 2);
-                       return i;
-               }
-       }
-       /* Error, ioatdma->requester_count is out of whack */
-       return -EFAULT;
-}
-
-static int ioat_dca_remove_requester(struct dca_provider *dca,
-                                    struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-
-       /* This implementation only supports PCI-Express */
-       if (dev->bus != &pci_bus_type)
-               return -ENODEV;
-       pdev = to_pci_dev(dev);
-
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == pdev) {
-                       writew(0, ioatdca->dca_base + (i * 4));
-                       ioatdca->req_slots[i].pdev = NULL;
-                       ioatdca->req_slots[i].rid = 0;
-                       ioatdca->requester_count--;
-                       return i;
-               }
-       }
-       return -ENODEV;
-}
-
-static u8 ioat_dca_get_tag(struct dca_provider *dca,
-                          struct device *dev,
-                          int cpu)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       int i, apic_id, bit, value;
-       u8 entry, tag;
-
-       tag = 0;
-       apic_id = cpu_physical_id(cpu);
-
-       for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
-               entry = ioatdca->tag_map[i];
-               if (entry & DCA_TAG_MAP_VALID) {
-                       bit = entry & ~DCA_TAG_MAP_VALID;
-                       value = (apic_id & (1 << bit)) ? 1 : 0;
-               } else {
-                       value = entry ? 1 : 0;
-               }
-               tag |= (value << i);
-       }
-       return tag;
-}
-
-static int ioat_dca_dev_managed(struct dca_provider *dca,
-                               struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-
-       pdev = to_pci_dev(dev);
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == pdev)
-                       return 1;
-       }
-       return 0;
-}
-
-static struct dca_ops ioat_dca_ops = {
-       .add_requester          = ioat_dca_add_requester,
-       .remove_requester       = ioat_dca_remove_requester,
-       .get_tag                = ioat_dca_get_tag,
-       .dev_managed            = ioat_dca_dev_managed,
-};
-
-
-struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
-{
-       struct dca_provider *dca;
-       struct ioat_dca_priv *ioatdca;
-       u8 *tag_map = NULL;
-       int i;
-       int err;
-       u8 version;
-       u8 max_requesters;
-
-       if (!system_has_dca_enabled(pdev))
-               return NULL;
-
-       /* I/OAT v1 systems must have a known tag_map to support DCA */
-       switch (pdev->vendor) {
-       case PCI_VENDOR_ID_INTEL:
-               switch (pdev->device) {
-               case PCI_DEVICE_ID_INTEL_IOAT:
-                       tag_map = ioat_tag_map_BNB;
-                       break;
-               case PCI_DEVICE_ID_INTEL_IOAT_CNB:
-                       tag_map = ioat_tag_map_CNB;
-                       break;
-               case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
-                       tag_map = ioat_tag_map_SCNB;
-                       break;
-               }
-               break;
-       case PCI_VENDOR_ID_UNISYS:
-               switch (pdev->device) {
-               case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
-                       tag_map = ioat_tag_map_UNISYS;
-                       break;
-               }
-               break;
-       }
-       if (tag_map == NULL)
-               return NULL;
-
-       version = readb(iobase + IOAT_VER_OFFSET);
-       if (version == IOAT_VER_3_0)
-               max_requesters = IOAT3_DCA_MAX_REQ;
-       else
-               max_requesters = IOAT_DCA_MAX_REQ;
-
-       dca = alloc_dca_provider(&ioat_dca_ops,
-                       sizeof(*ioatdca) +
-                       (sizeof(struct ioat_dca_slot) * max_requesters));
-       if (!dca)
-               return NULL;
-
-       ioatdca = dca_priv(dca);
-       ioatdca->max_requesters = max_requesters;
-       ioatdca->dca_base = iobase + 0x54;
-
-       /* copy over the APIC ID to DCA tag mapping */
-       for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
-               ioatdca->tag_map[i] = tag_map[i];
-
-       err = register_dca_provider(dca, &pdev->dev);
-       if (err) {
-               free_dca_provider(dca);
-               return NULL;
-       }
-
-       return dca;
-}
-
-
-static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-       u16 id;
-       u16 global_req_table;
-
-       /* This implementation only supports PCI-Express */
-       if (dev->bus != &pci_bus_type)
-               return -ENODEV;
-       pdev = to_pci_dev(dev);
-       id = dcaid_from_pcidev(pdev);
-
-       if (ioatdca->requester_count == ioatdca->max_requesters)
-               return -ENODEV;
-
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == NULL) {
-                       /* found an empty slot */
-                       ioatdca->requester_count++;
-                       ioatdca->req_slots[i].pdev = pdev;
-                       ioatdca->req_slots[i].rid = id;
-                       global_req_table =
-                             readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
-                       writel(id | IOAT_DCA_GREQID_VALID,
-                              ioatdca->iobase + global_req_table + (i * 4));
-                       return i;
-               }
-       }
-       /* Error, ioatdma->requester_count is out of whack */
-       return -EFAULT;
-}
-
-static int ioat2_dca_remove_requester(struct dca_provider *dca,
-                                     struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-       u16 global_req_table;
-
-       /* This implementation only supports PCI-Express */
-       if (dev->bus != &pci_bus_type)
-               return -ENODEV;
-       pdev = to_pci_dev(dev);
-
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == pdev) {
-                       global_req_table =
-                             readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
-                       writel(0, ioatdca->iobase + global_req_table + (i * 4));
-                       ioatdca->req_slots[i].pdev = NULL;
-                       ioatdca->req_slots[i].rid = 0;
-                       ioatdca->requester_count--;
-                       return i;
-               }
-       }
-       return -ENODEV;
-}
-
-static u8 ioat2_dca_get_tag(struct dca_provider *dca,
-                           struct device *dev,
-                           int cpu)
-{
-       u8 tag;
-
-       tag = ioat_dca_get_tag(dca, dev, cpu);
-       tag = (~tag) & 0x1F;
-       return tag;
-}
-
-static struct dca_ops ioat2_dca_ops = {
-       .add_requester          = ioat2_dca_add_requester,
-       .remove_requester       = ioat2_dca_remove_requester,
-       .get_tag                = ioat2_dca_get_tag,
-       .dev_managed            = ioat_dca_dev_managed,
-};
-
-static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
-{
-       int slots = 0;
-       u32 req;
-       u16 global_req_table;
-
-       global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
-       if (global_req_table == 0)
-               return 0;
-       do {
-               req = readl(iobase + global_req_table + (slots * sizeof(u32)));
-               slots++;
-       } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
-
-       return slots;
-}
-
-struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
-{
-       struct dca_provider *dca;
-       struct ioat_dca_priv *ioatdca;
-       int slots;
-       int i;
-       int err;
-       u32 tag_map;
-       u16 dca_offset;
-       u16 csi_fsb_control;
-       u16 pcie_control;
-       u8 bit;
-
-       if (!system_has_dca_enabled(pdev))
-               return NULL;
-
-       dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
-       if (dca_offset == 0)
-               return NULL;
-
-       slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
-       if (slots == 0)
-               return NULL;
-
-       dca = alloc_dca_provider(&ioat2_dca_ops,
-                                sizeof(*ioatdca)
-                                     + (sizeof(struct ioat_dca_slot) * slots));
-       if (!dca)
-               return NULL;
-
-       ioatdca = dca_priv(dca);
-       ioatdca->iobase = iobase;
-       ioatdca->dca_base = iobase + dca_offset;
-       ioatdca->max_requesters = slots;
-
-       /* some bios might not know to turn these on */
-       csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
-       if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
-               csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
-               writew(csi_fsb_control,
-                      ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
-       }
-       pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
-       if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
-               pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
-               writew(pcie_control,
-                      ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
-       }
-
-
-       /* TODO version, compatibility and configuration checks */
-
-       /* copy out the APIC to DCA tag map */
-       tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
-       for (i = 0; i < 5; i++) {
-               bit = (tag_map >> (4 * i)) & 0x0f;
-               if (bit < 8)
-                       ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
-               else
-                       ioatdca->tag_map[i] = 0;
-       }
-
-       if (!dca2_tag_map_valid(ioatdca->tag_map)) {
-               dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, "
-                       "disabling DCA\n");
-               free_dca_provider(dca);
-               return NULL;
-       }
-
-       err = register_dca_provider(dca, &pdev->dev);
-       if (err) {
-               free_dca_provider(dca);
-               return NULL;
-       }
-
-       return dca;
-}
-
-static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-       u16 id;
-       u16 global_req_table;
-
-       /* This implementation only supports PCI-Express */
-       if (dev->bus != &pci_bus_type)
-               return -ENODEV;
-       pdev = to_pci_dev(dev);
-       id = dcaid_from_pcidev(pdev);
-
-       if (ioatdca->requester_count == ioatdca->max_requesters)
-               return -ENODEV;
-
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == NULL) {
-                       /* found an empty slot */
-                       ioatdca->requester_count++;
-                       ioatdca->req_slots[i].pdev = pdev;
-                       ioatdca->req_slots[i].rid = id;
-                       global_req_table =
-                             readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
-                       writel(id | IOAT_DCA_GREQID_VALID,
-                              ioatdca->iobase + global_req_table + (i * 4));
-                       return i;
-               }
-       }
-       /* Error, ioatdma->requester_count is out of whack */
-       return -EFAULT;
-}
-
-static int ioat3_dca_remove_requester(struct dca_provider *dca,
-                                     struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-       u16 global_req_table;
-
-       /* This implementation only supports PCI-Express */
-       if (dev->bus != &pci_bus_type)
-               return -ENODEV;
-       pdev = to_pci_dev(dev);
-
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == pdev) {
-                       global_req_table =
-                             readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
-                       writel(0, ioatdca->iobase + global_req_table + (i * 4));
-                       ioatdca->req_slots[i].pdev = NULL;
-                       ioatdca->req_slots[i].rid = 0;
-                       ioatdca->requester_count--;
-                       return i;
-               }
-       }
-       return -ENODEV;
-}
-
-static u8 ioat3_dca_get_tag(struct dca_provider *dca,
-                           struct device *dev,
-                           int cpu)
-{
-       u8 tag;
-
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       int i, apic_id, bit, value;
-       u8 entry;
-
-       tag = 0;
-       apic_id = cpu_physical_id(cpu);
-
-       for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
-               entry = ioatdca->tag_map[i];
-               if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
-                       bit = entry &
-                               ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
-                       value = (apic_id & (1 << bit)) ? 1 : 0;
-               } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
-                       bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
-                       value = (apic_id & (1 << bit)) ? 0 : 1;
-               } else {
-                       value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
-               }
-               tag |= (value << i);
-       }
-
-       return tag;
-}
-
-static struct dca_ops ioat3_dca_ops = {
-       .add_requester          = ioat3_dca_add_requester,
-       .remove_requester       = ioat3_dca_remove_requester,
-       .get_tag                = ioat3_dca_get_tag,
-       .dev_managed            = ioat_dca_dev_managed,
-};
-
-static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
-{
-       int slots = 0;
-       u32 req;
-       u16 global_req_table;
-
-       global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
-       if (global_req_table == 0)
-               return 0;
-
-       do {
-               req = readl(iobase + global_req_table + (slots * sizeof(u32)));
-               slots++;
-       } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
-
-       return slots;
-}
-
-struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
-{
-       struct dca_provider *dca;
-       struct ioat_dca_priv *ioatdca;
-       int slots;
-       int i;
-       int err;
-       u16 dca_offset;
-       u16 csi_fsb_control;
-       u16 pcie_control;
-       u8 bit;
-
-       union {
-               u64 full;
-               struct {
-                       u32 low;
-                       u32 high;
-               };
-       } tag_map;
-
-       if (!system_has_dca_enabled(pdev))
-               return NULL;
-
-       dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
-       if (dca_offset == 0)
-               return NULL;
-
-       slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
-       if (slots == 0)
-               return NULL;
-
-       dca = alloc_dca_provider(&ioat3_dca_ops,
-                                sizeof(*ioatdca)
-                                     + (sizeof(struct ioat_dca_slot) * slots));
-       if (!dca)
-               return NULL;
-
-       ioatdca = dca_priv(dca);
-       ioatdca->iobase = iobase;
-       ioatdca->dca_base = iobase + dca_offset;
-       ioatdca->max_requesters = slots;
-
-       /* some bios might not know to turn these on */
-       csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
-       if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
-               csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
-               writew(csi_fsb_control,
-                      ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
-       }
-       pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
-       if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
-               pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
-               writew(pcie_control,
-                      ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
-       }
-
-
-       /* TODO version, compatibility and configuration checks */
-
-       /* copy out the APIC to DCA tag map */
-       tag_map.low =
-               readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
-       tag_map.high =
-               readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
-       for (i = 0; i < 8; i++) {
-               bit = tag_map.full >> (8 * i);
-               ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
-       }
-
-       err = register_dca_provider(dca, &pdev->dev);
-       if (err) {
-               free_dca_provider(dca);
-               return NULL;
-       }
-
-       return dca;
-}
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c

deleted file mode 100644 (file)

index a600fc0..0000000
--- a/drivers/dma/ioat_dma.c
+++ /dev/null
@@ -1,1741 +0,0 @@
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2004 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-/*
- * This driver supports an Intel I/OAT DMA engine, which does asynchronous
- * copy operations.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/workqueue.h>
-#include <linux/i7300_idle.h>
-#include "ioatdma.h"
-#include "ioatdma_registers.h"
-#include "ioatdma_hw.h"
-
-#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
-#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
-#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
-#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
-
-#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
-static int ioat_pending_level = 4;
-module_param(ioat_pending_level, int, 0644);
-MODULE_PARM_DESC(ioat_pending_level,
-                "high-water mark for pushing ioat descriptors (default: 4)");
-
-#define RESET_DELAY  msecs_to_jiffies(100)
-#define WATCHDOG_DELAY  round_jiffies(msecs_to_jiffies(2000))
-static void ioat_dma_chan_reset_part2(struct work_struct *work);
-static void ioat_dma_chan_watchdog(struct work_struct *work);
-
-/*
- * workaround for IOAT ver.3.0 null descriptor issue
- * (channel returns error when size is 0)
- */
-#define NULL_DESC_BUFFER_SIZE 1
-
-/* internal functions */
-static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
-static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
-
-static struct ioat_desc_sw *
-ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
-static struct ioat_desc_sw *
-ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
-
-static inline struct ioat_dma_chan *ioat_lookup_chan_by_index(
-                                               struct ioatdma_device *device,
-                                               int index)
-{
-       return device->idx[index];
-}
-
-/**
- * ioat_dma_do_interrupt - handler used for single vector interrupt mode
- * @irq: interrupt id
- * @data: interrupt data
- */
-static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
-{
-       struct ioatdma_device *instance = data;
-       struct ioat_dma_chan *ioat_chan;
-       unsigned long attnstatus;
-       int bit;
-       u8 intrctrl;
-
-       intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
-
-       if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
-               return IRQ_NONE;
-
-       if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
-               writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
-               return IRQ_NONE;
-       }
-
-       attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
-       for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
-               ioat_chan = ioat_lookup_chan_by_index(instance, bit);
-               tasklet_schedule(&ioat_chan->cleanup_task);
-       }
-
-       writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
-       return IRQ_HANDLED;
-}
-
-/**
- * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
- * @irq: interrupt id
- * @data: interrupt data
- */
-static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
-{
-       struct ioat_dma_chan *ioat_chan = data;
-
-       tasklet_schedule(&ioat_chan->cleanup_task);
-
-       return IRQ_HANDLED;
-}
-
-static void ioat_dma_cleanup_tasklet(unsigned long data);
-
-/**
- * ioat_dma_enumerate_channels - find and initialize the device's channels
- * @device: the device to be enumerated
- */
-static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
-{
-       u8 xfercap_scale;
-       u32 xfercap;
-       int i;
-       struct ioat_dma_chan *ioat_chan;
-
-       /*
-        * IOAT ver.3 workarounds
-        */
-       if (device->version == IOAT_VER_3_0) {
-               u32 chan_err_mask;
-               u16 dev_id;
-               u32 dmauncerrsts;
-
-               /*
-                * Write CHANERRMSK_INT with 3E07h to mask out the errors
-                * that can cause stability issues for IOAT ver.3
-                */
-               chan_err_mask = 0x3E07;
-               pci_write_config_dword(device->pdev,
-                       IOAT_PCI_CHANERRMASK_INT_OFFSET,
-                       chan_err_mask);
-
-               /*
-                * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
-                * (workaround for spurious config parity error after restart)
-                */
-               pci_read_config_word(device->pdev,
-                       IOAT_PCI_DEVICE_ID_OFFSET,
-                       &dev_id);
-               if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
-                       dmauncerrsts = 0x10;
-                       pci_write_config_dword(device->pdev,
-                               IOAT_PCI_DMAUNCERRSTS_OFFSET,
-                               dmauncerrsts);
-               }
-       }
-
-       device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
-       xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
-       xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
-
-#ifdef  CONFIG_I7300_IDLE_IOAT_CHANNEL
-       if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) {
-               device->common.chancnt--;
-       }
-#endif
-       for (i = 0; i < device->common.chancnt; i++) {
-               ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
-               if (!ioat_chan) {
-                       device->common.chancnt = i;
-                       break;
-               }
-
-               ioat_chan->device = device;
-               ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
-               ioat_chan->xfercap = xfercap;
-               ioat_chan->desccount = 0;
-               INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2);
-               if (ioat_chan->device->version == IOAT_VER_2_0)
-                       writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE |
-                              IOAT_DMA_DCA_ANY_CPU,
-                              ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
-               else if (ioat_chan->device->version == IOAT_VER_3_0)
-                       writel(IOAT_DMA_DCA_ANY_CPU,
-                              ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
-               spin_lock_init(&ioat_chan->cleanup_lock);
-               spin_lock_init(&ioat_chan->desc_lock);
-               INIT_LIST_HEAD(&ioat_chan->free_desc);
-               INIT_LIST_HEAD(&ioat_chan->used_desc);
-               /* This should be made common somewhere in dmaengine.c */
-               ioat_chan->common.device = &device->common;
-               list_add_tail(&ioat_chan->common.device_node,
-                             &device->common.channels);
-               device->idx[i] = ioat_chan;
-               tasklet_init(&ioat_chan->cleanup_task,
-                            ioat_dma_cleanup_tasklet,
-                            (unsigned long) ioat_chan);
-               tasklet_disable(&ioat_chan->cleanup_task);
-       }
-       return device->common.chancnt;
-}
-
-/**
- * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
- *                                 descriptors to hw
- * @chan: DMA channel handle
- */
-static inline void __ioat1_dma_memcpy_issue_pending(
-                                               struct ioat_dma_chan *ioat_chan)
-{
-       ioat_chan->pending = 0;
-       writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET);
-}
-
-static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-
-       if (ioat_chan->pending > 0) {
-               spin_lock_bh(&ioat_chan->desc_lock);
-               __ioat1_dma_memcpy_issue_pending(ioat_chan);
-               spin_unlock_bh(&ioat_chan->desc_lock);
-       }
-}
-
-static inline void __ioat2_dma_memcpy_issue_pending(
-                                               struct ioat_dma_chan *ioat_chan)
-{
-       ioat_chan->pending = 0;
-       writew(ioat_chan->dmacount,
-              ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
-}
-
-static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-
-       if (ioat_chan->pending > 0) {
-               spin_lock_bh(&ioat_chan->desc_lock);
-               __ioat2_dma_memcpy_issue_pending(ioat_chan);
-               spin_unlock_bh(&ioat_chan->desc_lock);
-       }
-}
-
-
-/**
- * ioat_dma_chan_reset_part2 - reinit the channel after a reset
- */
-static void ioat_dma_chan_reset_part2(struct work_struct *work)
-{
-       struct ioat_dma_chan *ioat_chan =
-               container_of(work, struct ioat_dma_chan, work.work);
-       struct ioat_desc_sw *desc;
-
-       spin_lock_bh(&ioat_chan->cleanup_lock);
-       spin_lock_bh(&ioat_chan->desc_lock);
-
-       ioat_chan->completion_virt->low = 0;
-       ioat_chan->completion_virt->high = 0;
-       ioat_chan->pending = 0;
-
-       /*
-        * count the descriptors waiting, and be sure to do it
-        * right for both the CB1 line and the CB2 ring
-        */
-       ioat_chan->dmacount = 0;
-       if (ioat_chan->used_desc.prev) {
-               desc = to_ioat_desc(ioat_chan->used_desc.prev);
-               do {
-                       ioat_chan->dmacount++;
-                       desc = to_ioat_desc(desc->node.next);
-               } while (&desc->node != ioat_chan->used_desc.next);
-       }
-
-       /*
-        * write the new starting descriptor address
-        * this puts channel engine into ARMED state
-        */
-       desc = to_ioat_desc(ioat_chan->used_desc.prev);
-       switch (ioat_chan->device->version) {
-       case IOAT_VER_1_2:
-               writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
-                      ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
-               writel(((u64) desc->async_tx.phys) >> 32,
-                      ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
-
-               writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
-                       + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
-               break;
-       case IOAT_VER_2_0:
-               writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
-                      ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
-               writel(((u64) desc->async_tx.phys) >> 32,
-                      ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
-
-               /* tell the engine to go with what's left to be done */
-               writew(ioat_chan->dmacount,
-                      ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
-
-               break;
-       }
-       dev_err(&ioat_chan->device->pdev->dev,
-               "chan%d reset - %d descs waiting, %d total desc\n",
-               chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
-
-       spin_unlock_bh(&ioat_chan->desc_lock);
-       spin_unlock_bh(&ioat_chan->cleanup_lock);
-}
-
-/**
- * ioat_dma_reset_channel - restart a channel
- * @ioat_chan: IOAT DMA channel handle
- */
-static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan)
-{
-       u32 chansts, chanerr;
-
-       if (!ioat_chan->used_desc.prev)
-               return;
-
-       chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-       chansts = (ioat_chan->completion_virt->low
-                                       & IOAT_CHANSTS_DMA_TRANSFER_STATUS);
-       if (chanerr) {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
-                       chan_num(ioat_chan), chansts, chanerr);
-               writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-       }
-
-       /*
-        * whack it upside the head with a reset
-        * and wait for things to settle out.
-        * force the pending count to a really big negative
-        * to make sure no one forces an issue_pending
-        * while we're waiting.
-        */
-
-       spin_lock_bh(&ioat_chan->desc_lock);
-       ioat_chan->pending = INT_MIN;
-       writeb(IOAT_CHANCMD_RESET,
-              ioat_chan->reg_base
-              + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       /* schedule the 2nd half instead of sleeping a long time */
-       schedule_delayed_work(&ioat_chan->work, RESET_DELAY);
-}
-
-/**
- * ioat_dma_chan_watchdog - watch for stuck channels
- */
-static void ioat_dma_chan_watchdog(struct work_struct *work)
-{
-       struct ioatdma_device *device =
-               container_of(work, struct ioatdma_device, work.work);
-       struct ioat_dma_chan *ioat_chan;
-       int i;
-
-       union {
-               u64 full;
-               struct {
-                       u32 low;
-                       u32 high;
-               };
-       } completion_hw;
-       unsigned long compl_desc_addr_hw;
-
-       for (i = 0; i < device->common.chancnt; i++) {
-               ioat_chan = ioat_lookup_chan_by_index(device, i);
-
-               if (ioat_chan->device->version == IOAT_VER_1_2
-                       /* have we started processing anything yet */
-                   && ioat_chan->last_completion
-                       /* have we completed any since last watchdog cycle? */
-                   && (ioat_chan->last_completion ==
-                               ioat_chan->watchdog_completion)
-                       /* has TCP stuck on one cookie since last watchdog? */
-                   && (ioat_chan->watchdog_tcp_cookie ==
-                               ioat_chan->watchdog_last_tcp_cookie)
-                   && (ioat_chan->watchdog_tcp_cookie !=
-                               ioat_chan->completed_cookie)
-                       /* is there something in the chain to be processed? */
-                       /* CB1 chain always has at least the last one processed */
-                   && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next)
-                   && ioat_chan->pending == 0) {
-
-                       /*
-                        * check CHANSTS register for completed
-                        * descriptor address.
-                        * if it is different than completion writeback,
-                        * it is not zero
-                        * and it has changed since the last watchdog
-                        *     we can assume that channel
-                        *     is still working correctly
-                        *     and the problem is in completion writeback.
-                        *     update completion writeback
-                        *     with actual CHANSTS value
-                        * else
-                        *     try resetting the channel
-                        */
-
-                       completion_hw.low = readl(ioat_chan->reg_base +
-                               IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version));
-                       completion_hw.high = readl(ioat_chan->reg_base +
-                               IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version));
-#if (BITS_PER_LONG == 64)
-                       compl_desc_addr_hw =
-                               completion_hw.full
-                               & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
-#else
-                       compl_desc_addr_hw =
-                               completion_hw.low & IOAT_LOW_COMPLETION_MASK;
-#endif
-
-                       if ((compl_desc_addr_hw != 0)
-                          && (compl_desc_addr_hw != ioat_chan->watchdog_completion)
-                          && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) {
-                               ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
-                               ioat_chan->completion_virt->low = completion_hw.low;
-                               ioat_chan->completion_virt->high = completion_hw.high;
-                       } else {
-                               ioat_dma_reset_channel(ioat_chan);
-                               ioat_chan->watchdog_completion = 0;
-                               ioat_chan->last_compl_desc_addr_hw = 0;
-                       }
-
-               /*
-                * for version 2.0 if there are descriptors yet to be processed
-                * and the last completed hasn't changed since the last watchdog
-                *      if they haven't hit the pending level
-                *          issue the pending to push them through
-                *      else
-                *          try resetting the channel
-                */
-               } else if (ioat_chan->device->version == IOAT_VER_2_0
-                   && ioat_chan->used_desc.prev
-                   && ioat_chan->last_completion
-                   && ioat_chan->last_completion == ioat_chan->watchdog_completion) {
-
-                       if (ioat_chan->pending < ioat_pending_level)
-                               ioat2_dma_memcpy_issue_pending(&ioat_chan->common);
-                       else {
-                               ioat_dma_reset_channel(ioat_chan);
-                               ioat_chan->watchdog_completion = 0;
-                       }
-               } else {
-                       ioat_chan->last_compl_desc_addr_hw = 0;
-                       ioat_chan->watchdog_completion
-                                       = ioat_chan->last_completion;
-               }
-
-               ioat_chan->watchdog_last_tcp_cookie =
-                       ioat_chan->watchdog_tcp_cookie;
-       }
-
-       schedule_delayed_work(&device->work, WATCHDOG_DELAY);
-}
-
-static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
-       struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
-       struct ioat_desc_sw *prev, *new;
-       struct ioat_dma_descriptor *hw;
-       dma_cookie_t cookie;
-       LIST_HEAD(new_chain);
-       u32 copy;
-       size_t len;
-       dma_addr_t src, dst;
-       unsigned long orig_flags;
-       unsigned int desc_count = 0;
-
-       /* src and dest and len are stored in the initial descriptor */
-       len = first->len;
-       src = first->src;
-       dst = first->dst;
-       orig_flags = first->async_tx.flags;
-       new = first;
-
-       spin_lock_bh(&ioat_chan->desc_lock);
-       prev = to_ioat_desc(ioat_chan->used_desc.prev);
-       prefetch(prev->hw);
-       do {
-               copy = min_t(size_t, len, ioat_chan->xfercap);
-
-               async_tx_ack(&new->async_tx);
-
-               hw = new->hw;
-               hw->size = copy;
-               hw->ctl = 0;
-               hw->src_addr = src;
-               hw->dst_addr = dst;
-               hw->next = 0;
-
-               /* chain together the physical address list for the HW */
-               wmb();
-               prev->hw->next = (u64) new->async_tx.phys;
-
-               len -= copy;
-               dst += copy;
-               src += copy;
-
-               list_add_tail(&new->node, &new_chain);
-               desc_count++;
-               prev = new;
-       } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));
-
-       if (!new) {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "tx submit failed\n");
-               spin_unlock_bh(&ioat_chan->desc_lock);
-               return -ENOMEM;
-       }
-
-       hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
-       if (first->async_tx.callback) {
-               hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
-               if (first != new) {
-                       /* move callback into to last desc */
-                       new->async_tx.callback = first->async_tx.callback;
-                       new->async_tx.callback_param
-                                       = first->async_tx.callback_param;
-                       first->async_tx.callback = NULL;
-                       first->async_tx.callback_param = NULL;
-               }
-       }
-
-       new->tx_cnt = desc_count;
-       new->async_tx.flags = orig_flags; /* client is in control of this ack */
-
-       /* store the original values for use in later cleanup */
-       if (new != first) {
-               new->src = first->src;
-               new->dst = first->dst;
-               new->len = first->len;
-       }
-
-       /* cookie incr and addition to used_list must be atomic */
-       cookie = ioat_chan->common.cookie;
-       cookie++;
-       if (cookie < 0)
-               cookie = 1;
-       ioat_chan->common.cookie = new->async_tx.cookie = cookie;
-
-       /* write address into NextDescriptor field of last desc in chain */
-       to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
-                                                       first->async_tx.phys;
-       list_splice_tail(&new_chain, &ioat_chan->used_desc);
-
-       ioat_chan->dmacount += desc_count;
-       ioat_chan->pending += desc_count;
-       if (ioat_chan->pending >= ioat_pending_level)
-               __ioat1_dma_memcpy_issue_pending(ioat_chan);
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       return cookie;
-}
-
-static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
-       struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
-       struct ioat_desc_sw *new;
-       struct ioat_dma_descriptor *hw;
-       dma_cookie_t cookie;
-       u32 copy;
-       size_t len;
-       dma_addr_t src, dst;
-       unsigned long orig_flags;
-       unsigned int desc_count = 0;
-
-       /* src and dest and len are stored in the initial descriptor */
-       len = first->len;
-       src = first->src;
-       dst = first->dst;
-       orig_flags = first->async_tx.flags;
-       new = first;
-
-       /*
-        * ioat_chan->desc_lock is still in force in version 2 path
-        * it gets unlocked at end of this function
-        */
-       do {
-               copy = min_t(size_t, len, ioat_chan->xfercap);
-
-               async_tx_ack(&new->async_tx);
-
-               hw = new->hw;
-               hw->size = copy;
-               hw->ctl = 0;
-               hw->src_addr = src;
-               hw->dst_addr = dst;
-
-               len -= copy;
-               dst += copy;
-               src += copy;
-               desc_count++;
-       } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
-
-       if (!new) {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "tx submit failed\n");
-               spin_unlock_bh(&ioat_chan->desc_lock);
-               return -ENOMEM;
-       }
-
-       hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
-       if (first->async_tx.callback) {
-               hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
-               if (first != new) {
-                       /* move callback into to last desc */
-                       new->async_tx.callback = first->async_tx.callback;
-                       new->async_tx.callback_param
-                                       = first->async_tx.callback_param;
-                       first->async_tx.callback = NULL;
-                       first->async_tx.callback_param = NULL;
-               }
-       }
-
-       new->tx_cnt = desc_count;
-       new->async_tx.flags = orig_flags; /* client is in control of this ack */
-
-       /* store the original values for use in later cleanup */
-       if (new != first) {
-               new->src = first->src;
-               new->dst = first->dst;
-               new->len = first->len;
-       }
-
-       /* cookie incr and addition to used_list must be atomic */
-       cookie = ioat_chan->common.cookie;
-       cookie++;
-       if (cookie < 0)
-               cookie = 1;
-       ioat_chan->common.cookie = new->async_tx.cookie = cookie;
-
-       ioat_chan->dmacount += desc_count;
-       ioat_chan->pending += desc_count;
-       if (ioat_chan->pending >= ioat_pending_level)
-               __ioat2_dma_memcpy_issue_pending(ioat_chan);
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       return cookie;
-}
-
-/**
- * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
- * @ioat_chan: the channel supplying the memory pool for the descriptors
- * @flags: allocation flags
- */
-static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
-                                       struct ioat_dma_chan *ioat_chan,
-                                       gfp_t flags)
-{
-       struct ioat_dma_descriptor *desc;
-       struct ioat_desc_sw *desc_sw;
-       struct ioatdma_device *ioatdma_device;
-       dma_addr_t phys;
-
-       ioatdma_device = to_ioatdma_device(ioat_chan->common.device);
-       desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
-       if (unlikely(!desc))
-               return NULL;
-
-       desc_sw = kzalloc(sizeof(*desc_sw), flags);
-       if (unlikely(!desc_sw)) {
-               pci_pool_free(ioatdma_device->dma_pool, desc, phys);
-               return NULL;
-       }
-
-       memset(desc, 0, sizeof(*desc));
-       dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
-       switch (ioat_chan->device->version) {
-       case IOAT_VER_1_2:
-               desc_sw->async_tx.tx_submit = ioat1_tx_submit;
-               break;
-       case IOAT_VER_2_0:
-       case IOAT_VER_3_0:
-               desc_sw->async_tx.tx_submit = ioat2_tx_submit;
-               break;
-       }
-
-       desc_sw->hw = desc;
-       desc_sw->async_tx.phys = phys;
-
-       return desc_sw;
-}
-
-static int ioat_initial_desc_count = 256;
-module_param(ioat_initial_desc_count, int, 0644);
-MODULE_PARM_DESC(ioat_initial_desc_count,
-                "initial descriptors per channel (default: 256)");
-
-/**
- * ioat2_dma_massage_chan_desc - link the descriptors into a circle
- * @ioat_chan: the channel to be massaged
- */
-static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
-{
-       struct ioat_desc_sw *desc, *_desc;
-
-       /* setup used_desc */
-       ioat_chan->used_desc.next = ioat_chan->free_desc.next;
-       ioat_chan->used_desc.prev = NULL;
-
-       /* pull free_desc out of the circle so that every node is a hw
-        * descriptor, but leave it pointing to the list
-        */
-       ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next;
-       ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev;
-
-       /* circle link the hw descriptors */
-       desc = to_ioat_desc(ioat_chan->free_desc.next);
-       desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
-       list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) {
-               desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
-       }
-}
-
-/**
- * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
- * @chan: the channel to be filled out
- */
-static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-       struct ioat_desc_sw *desc;
-       u16 chanctrl;
-       u32 chanerr;
-       int i;
-       LIST_HEAD(tmp_list);
-
-       /* have we already been set up? */
-       if (!list_empty(&ioat_chan->free_desc))
-               return ioat_chan->desccount;
-
-       /* Setup register to interrupt and write completion status on error */
-       chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
-               IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
-               IOAT_CHANCTRL_ERR_COMPLETION_EN;
-       writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
-
-       chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-       if (chanerr) {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "CHANERR = %x, clearing\n", chanerr);
-               writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-       }
-
-       /* Allocate descriptors */
-       for (i = 0; i < ioat_initial_desc_count; i++) {
-               desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
-               if (!desc) {
-                       dev_err(&ioat_chan->device->pdev->dev,
-                               "Only %d initial descriptors\n", i);
-                       break;
-               }
-               list_add_tail(&desc->node, &tmp_list);
-       }
-       spin_lock_bh(&ioat_chan->desc_lock);
-       ioat_chan->desccount = i;
-       list_splice(&tmp_list, &ioat_chan->free_desc);
-       if (ioat_chan->device->version != IOAT_VER_1_2)
-               ioat2_dma_massage_chan_desc(ioat_chan);
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       /* allocate a completion writeback area */
-       /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
-       ioat_chan->completion_virt =
-               pci_pool_alloc(ioat_chan->device->completion_pool,
-                              GFP_KERNEL,
-                              &ioat_chan->completion_addr);
-       memset(ioat_chan->completion_virt, 0,
-              sizeof(*ioat_chan->completion_virt));
-       writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
-              ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
-       writel(((u64) ioat_chan->completion_addr) >> 32,
-              ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
-
-       tasklet_enable(&ioat_chan->cleanup_task);
-       ioat_dma_start_null_desc(ioat_chan);  /* give chain to dma device */
-       return ioat_chan->desccount;
-}
-
-/**
- * ioat_dma_free_chan_resources - release all the descriptors
- * @chan: the channel to be cleaned
- */
-static void ioat_dma_free_chan_resources(struct dma_chan *chan)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-       struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device);
-       struct ioat_desc_sw *desc, *_desc;
-       int in_use_descs = 0;
-
-       /* Before freeing channel resources first check
-        * if they have been previously allocated for this channel.
-        */
-       if (ioat_chan->desccount == 0)
-               return;
-
-       tasklet_disable(&ioat_chan->cleanup_task);
-       ioat_dma_memcpy_cleanup(ioat_chan);
-
-       /* Delay 100ms after reset to allow internal DMA logic to quiesce
-        * before removing DMA descriptor resources.
-        */
-       writeb(IOAT_CHANCMD_RESET,
-              ioat_chan->reg_base
-                       + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
-       mdelay(100);
-
-       spin_lock_bh(&ioat_chan->desc_lock);
-       switch (ioat_chan->device->version) {
-       case IOAT_VER_1_2:
-               list_for_each_entry_safe(desc, _desc,
-                                        &ioat_chan->used_desc, node) {
-                       in_use_descs++;
-                       list_del(&desc->node);
-                       pci_pool_free(ioatdma_device->dma_pool, desc->hw,
-                                     desc->async_tx.phys);
-                       kfree(desc);
-               }
-               list_for_each_entry_safe(desc, _desc,
-                                        &ioat_chan->free_desc, node) {
-                       list_del(&desc->node);
-                       pci_pool_free(ioatdma_device->dma_pool, desc->hw,
-                                     desc->async_tx.phys);
-                       kfree(desc);
-               }
-               break;
-       case IOAT_VER_2_0:
-       case IOAT_VER_3_0:
-               list_for_each_entry_safe(desc, _desc,
-                                        ioat_chan->free_desc.next, node) {
-                       list_del(&desc->node);
-                       pci_pool_free(ioatdma_device->dma_pool, desc->hw,
-                                     desc->async_tx.phys);
-                       kfree(desc);
-               }
-               desc = to_ioat_desc(ioat_chan->free_desc.next);
-               pci_pool_free(ioatdma_device->dma_pool, desc->hw,
-                             desc->async_tx.phys);
-               kfree(desc);
-               INIT_LIST_HEAD(&ioat_chan->free_desc);
-               INIT_LIST_HEAD(&ioat_chan->used_desc);
-               break;
-       }
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       pci_pool_free(ioatdma_device->completion_pool,
-                     ioat_chan->completion_virt,
-                     ioat_chan->completion_addr);
-
-       /* one is ok since we left it on there on purpose */
-       if (in_use_descs > 1)
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "Freeing %d in use descriptors!\n",
-                       in_use_descs - 1);
-
-       ioat_chan->last_completion = ioat_chan->completion_addr = 0;
-       ioat_chan->pending = 0;
-       ioat_chan->dmacount = 0;
-       ioat_chan->desccount = 0;
-       ioat_chan->watchdog_completion = 0;
-       ioat_chan->last_compl_desc_addr_hw = 0;
-       ioat_chan->watchdog_tcp_cookie =
-               ioat_chan->watchdog_last_tcp_cookie = 0;
-}
-
-/**
- * ioat_dma_get_next_descriptor - return the next available descriptor
- * @ioat_chan: IOAT DMA channel handle
- *
- * Gets the next descriptor from the chain, and must be called with the
- * channel's desc_lock held.  Allocates more descriptors if the channel
- * has run out.
- */
-static struct ioat_desc_sw *
-ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
-{
-       struct ioat_desc_sw *new;
-
-       if (!list_empty(&ioat_chan->free_desc)) {
-               new = to_ioat_desc(ioat_chan->free_desc.next);
-               list_del(&new->node);
-       } else {
-               /* try to get another desc */
-               new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
-               if (!new) {
-                       dev_err(&ioat_chan->device->pdev->dev,
-                               "alloc failed\n");
-                       return NULL;
-               }
-       }
-
-       prefetch(new->hw);
-       return new;
-}
-
-static struct ioat_desc_sw *
-ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
-{
-       struct ioat_desc_sw *new;
-
-       /*
-        * used.prev points to where to start processing
-        * used.next points to next free descriptor
-        * if used.prev == NULL, there are none waiting to be processed
-        * if used.next == used.prev.prev, there is only one free descriptor,
-        *      and we need to use it to as a noop descriptor before
-        *      linking in a new set of descriptors, since the device
-        *      has probably already read the pointer to it
-        */
-       if (ioat_chan->used_desc.prev &&
-           ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) {
-
-               struct ioat_desc_sw *desc;
-               struct ioat_desc_sw *noop_desc;
-               int i;
-
-               /* set up the noop descriptor */
-               noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
-               /* set size to non-zero value (channel returns error when size is 0) */
-               noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
-               noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
-               noop_desc->hw->src_addr = 0;
-               noop_desc->hw->dst_addr = 0;
-
-               ioat_chan->used_desc.next = ioat_chan->used_desc.next->next;
-               ioat_chan->pending++;
-               ioat_chan->dmacount++;
-
-               /* try to get a few more descriptors */
-               for (i = 16; i; i--) {
-                       desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
-                       if (!desc) {
-                               dev_err(&ioat_chan->device->pdev->dev,
-                                       "alloc failed\n");
-                               break;
-                       }
-                       list_add_tail(&desc->node, ioat_chan->used_desc.next);
-
-                       desc->hw->next
-                               = to_ioat_desc(desc->node.next)->async_tx.phys;
-                       to_ioat_desc(desc->node.prev)->hw->next
-                               = desc->async_tx.phys;
-                       ioat_chan->desccount++;
-               }
-
-               ioat_chan->used_desc.next = noop_desc->node.next;
-       }
-       new = to_ioat_desc(ioat_chan->used_desc.next);
-       prefetch(new);
-       ioat_chan->used_desc.next = new->node.next;
-
-       if (ioat_chan->used_desc.prev == NULL)
-               ioat_chan->used_desc.prev = &new->node;
-
-       prefetch(new->hw);
-       return new;
-}
-
-static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
-                                               struct ioat_dma_chan *ioat_chan)
-{
-       if (!ioat_chan)
-               return NULL;
-
-       switch (ioat_chan->device->version) {
-       case IOAT_VER_1_2:
-               return ioat1_dma_get_next_descriptor(ioat_chan);
-       case IOAT_VER_2_0:
-       case IOAT_VER_3_0:
-               return ioat2_dma_get_next_descriptor(ioat_chan);
-       }
-       return NULL;
-}
-
-static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
-                                               struct dma_chan *chan,
-                                               dma_addr_t dma_dest,
-                                               dma_addr_t dma_src,
-                                               size_t len,
-                                               unsigned long flags)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-       struct ioat_desc_sw *new;
-
-       spin_lock_bh(&ioat_chan->desc_lock);
-       new = ioat_dma_get_next_descriptor(ioat_chan);
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       if (new) {
-               new->len = len;
-               new->dst = dma_dest;
-               new->src = dma_src;
-               new->async_tx.flags = flags;
-               return &new->async_tx;
-       } else {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
-                       chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
-               return NULL;
-       }
-}
-
-static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
-                                               struct dma_chan *chan,
-                                               dma_addr_t dma_dest,
-                                               dma_addr_t dma_src,
-                                               size_t len,
-                                               unsigned long flags)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-       struct ioat_desc_sw *new;
-
-       spin_lock_bh(&ioat_chan->desc_lock);
-       new = ioat2_dma_get_next_descriptor(ioat_chan);
-
-       /*
-        * leave ioat_chan->desc_lock set in ioat 2 path
-        * it will get unlocked at end of tx_submit
-        */
-
-       if (new) {
-               new->len = len;
-               new->dst = dma_dest;
-               new->src = dma_src;
-               new->async_tx.flags = flags;
-               return &new->async_tx;
-       } else {
-               spin_unlock_bh(&ioat_chan->desc_lock);
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
-                       chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
-               return NULL;
-       }
-}
-
-static void ioat_dma_cleanup_tasklet(unsigned long data)
-{
-       struct ioat_dma_chan *chan = (void *)data;
-       ioat_dma_memcpy_cleanup(chan);
-       writew(IOAT_CHANCTRL_INT_DISABLE,
-              chan->reg_base + IOAT_CHANCTRL_OFFSET);
-}
-
-static void
-ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc)
-{
-       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                       pci_unmap_single(ioat_chan->device->pdev,
-                                        pci_unmap_addr(desc, dst),
-                                        pci_unmap_len(desc, len),
-                                        PCI_DMA_FROMDEVICE);
-               else
-                       pci_unmap_page(ioat_chan->device->pdev,
-                                      pci_unmap_addr(desc, dst),
-                                      pci_unmap_len(desc, len),
-                                      PCI_DMA_FROMDEVICE);
-       }
-
-       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                       pci_unmap_single(ioat_chan->device->pdev,
-                                        pci_unmap_addr(desc, src),
-                                        pci_unmap_len(desc, len),
-                                        PCI_DMA_TODEVICE);
-               else
-                       pci_unmap_page(ioat_chan->device->pdev,
-                                      pci_unmap_addr(desc, src),
-                                      pci_unmap_len(desc, len),
-                                      PCI_DMA_TODEVICE);
-       }
-}
-
-/**
- * ioat_dma_memcpy_cleanup - cleanup up finished descriptors
- * @chan: ioat channel to be cleaned up
- */
-static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
-{
-       unsigned long phys_complete;
-       struct ioat_desc_sw *desc, *_desc;
-       dma_cookie_t cookie = 0;
-       unsigned long desc_phys;
-       struct ioat_desc_sw *latest_desc;
-
-       prefetch(ioat_chan->completion_virt);
-
-       if (!spin_trylock_bh(&ioat_chan->cleanup_lock))
-               return;
-
-       /* The completion writeback can happen at any time,
-          so reads by the driver need to be atomic operations
-          The descriptor physical addresses are limited to 32-bits
-          when the CPU can only do a 32-bit mov */
-
-#if (BITS_PER_LONG == 64)
-       phys_complete =
-               ioat_chan->completion_virt->full
-               & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
-#else
-       phys_complete =
-               ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
-#endif
-
-       if ((ioat_chan->completion_virt->full
-               & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
-                               IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "Channel halted, chanerr = %x\n",
-                       readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET));
-
-               /* TODO do something to salvage the situation */
-       }
-
-       if (phys_complete == ioat_chan->last_completion) {
-               spin_unlock_bh(&ioat_chan->cleanup_lock);
-               /*
-                * perhaps we're stuck so hard that the watchdog can't go off?
-                * try to catch it after 2 seconds
-                */
-               if (ioat_chan->device->version != IOAT_VER_3_0) {
-                       if (time_after(jiffies,
-                                      ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
-                               ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
-                               ioat_chan->last_completion_time = jiffies;
-                       }
-               }
-               return;
-       }
-       ioat_chan->last_completion_time = jiffies;
-
-       cookie = 0;
-       if (!spin_trylock_bh(&ioat_chan->desc_lock)) {
-               spin_unlock_bh(&ioat_chan->cleanup_lock);
-               return;
-       }
-
-       switch (ioat_chan->device->version) {
-       case IOAT_VER_1_2:
-               list_for_each_entry_safe(desc, _desc,
-                                        &ioat_chan->used_desc, node) {
-
-                       /*
-                        * Incoming DMA requests may use multiple descriptors,
-                        * due to exceeding xfercap, perhaps. If so, only the
-                        * last one will have a cookie, and require unmapping.
-                        */
-                       if (desc->async_tx.cookie) {
-                               cookie = desc->async_tx.cookie;
-                               ioat_dma_unmap(ioat_chan, desc);
-                               if (desc->async_tx.callback) {
-                                       desc->async_tx.callback(desc->async_tx.callback_param);
-                                       desc->async_tx.callback = NULL;
-                               }
-                       }
-
-                       if (desc->async_tx.phys != phys_complete) {
-                               /*
-                                * a completed entry, but not the last, so clean
-                                * up if the client is done with the descriptor
-                                */
-                               if (async_tx_test_ack(&desc->async_tx)) {
-                                       list_move_tail(&desc->node,
-                                                      &ioat_chan->free_desc);
-                               } else
-                                       desc->async_tx.cookie = 0;
-                       } else {
-                               /*
-                                * last used desc. Do not remove, so we can
-                                * append from it, but don't look at it next
-                                * time, either
-                                */
-                               desc->async_tx.cookie = 0;
-
-                               /* TODO check status bits? */
-                               break;
-                       }
-               }
-               break;
-       case IOAT_VER_2_0:
-       case IOAT_VER_3_0:
-               /* has some other thread has already cleaned up? */
-               if (ioat_chan->used_desc.prev == NULL)
-                       break;
-
-               /* work backwards to find latest finished desc */
-               desc = to_ioat_desc(ioat_chan->used_desc.next);
-               latest_desc = NULL;
-               do {
-                       desc = to_ioat_desc(desc->node.prev);
-                       desc_phys = (unsigned long)desc->async_tx.phys
-                                      & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
-                       if (desc_phys == phys_complete) {
-                               latest_desc = desc;
-                               break;
-                       }
-               } while (&desc->node != ioat_chan->used_desc.prev);
-
-               if (latest_desc != NULL) {
-
-                       /* work forwards to clear finished descriptors */
-                       for (desc = to_ioat_desc(ioat_chan->used_desc.prev);
-                            &desc->node != latest_desc->node.next &&
-                            &desc->node != ioat_chan->used_desc.next;
-                            desc = to_ioat_desc(desc->node.next)) {
-                               if (desc->async_tx.cookie) {
-                                       cookie = desc->async_tx.cookie;
-                                       desc->async_tx.cookie = 0;
-                                       ioat_dma_unmap(ioat_chan, desc);
-                                       if (desc->async_tx.callback) {
-                                               desc->async_tx.callback(desc->async_tx.callback_param);
-                                               desc->async_tx.callback = NULL;
-                                       }
-                               }
-                       }
-
-                       /* move used.prev up beyond those that are finished */
-                       if (&desc->node == ioat_chan->used_desc.next)
-                               ioat_chan->used_desc.prev = NULL;
-                       else
-                               ioat_chan->used_desc.prev = &desc->node;
-               }
-               break;
-       }
-
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       ioat_chan->last_completion = phys_complete;
-       if (cookie != 0)
-               ioat_chan->completed_cookie = cookie;
-
-       spin_unlock_bh(&ioat_chan->cleanup_lock);
-}
-
-/**
- * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
- * @chan: IOAT DMA channel handle
- * @cookie: DMA transaction identifier
- * @done: if not %NULL, updated with last completed transaction
- * @used: if not %NULL, updated with last used transaction
- */
-static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
-                                           dma_cookie_t cookie,
-                                           dma_cookie_t *done,
-                                           dma_cookie_t *used)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-       dma_cookie_t last_used;
-       dma_cookie_t last_complete;
-       enum dma_status ret;
-
-       last_used = chan->cookie;
-       last_complete = ioat_chan->completed_cookie;
-       ioat_chan->watchdog_tcp_cookie = cookie;
-
-       if (done)
-               *done = last_complete;
-       if (used)
-               *used = last_used;
-
-       ret = dma_async_is_complete(cookie, last_complete, last_used);
-       if (ret == DMA_SUCCESS)
-               return ret;
-
-       ioat_dma_memcpy_cleanup(ioat_chan);
-
-       last_used = chan->cookie;
-       last_complete = ioat_chan->completed_cookie;
-
-       if (done)
-               *done = last_complete;
-       if (used)
-               *used = last_used;
-
-       return dma_async_is_complete(cookie, last_complete, last_used);
-}
-
-static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
-{
-       struct ioat_desc_sw *desc;
-
-       spin_lock_bh(&ioat_chan->desc_lock);
-
-       desc = ioat_dma_get_next_descriptor(ioat_chan);
-
-       if (!desc) {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "Unable to start null desc - get next desc failed\n");
-               spin_unlock_bh(&ioat_chan->desc_lock);
-               return;
-       }
-
-       desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
-                               | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
-                               | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
-       /* set size to non-zero value (channel returns error when size is 0) */
-       desc->hw->size = NULL_DESC_BUFFER_SIZE;
-       desc->hw->src_addr = 0;
-       desc->hw->dst_addr = 0;
-       async_tx_ack(&desc->async_tx);
-       switch (ioat_chan->device->version) {
-       case IOAT_VER_1_2:
-               desc->hw->next = 0;
-               list_add_tail(&desc->node, &ioat_chan->used_desc);
-
-               writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
-                      ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
-               writel(((u64) desc->async_tx.phys) >> 32,
-                      ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
-
-               writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
-                       + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
-               break;
-       case IOAT_VER_2_0:
-       case IOAT_VER_3_0:
-               writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
-                      ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
-               writel(((u64) desc->async_tx.phys) >> 32,
-                      ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
-
-               ioat_chan->dmacount++;
-               __ioat2_dma_memcpy_issue_pending(ioat_chan);
-               break;
-       }
-       spin_unlock_bh(&ioat_chan->desc_lock);
-}
-
-/*
- * Perform a IOAT transaction to verify the HW works.
- */
-#define IOAT_TEST_SIZE 2000
-
-static void ioat_dma_test_callback(void *dma_async_param)
-{
-       struct completion *cmp = dma_async_param;
-
-       complete(cmp);
-}
-
-/**
- * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
- * @device: device to be tested
- */
-static int ioat_dma_self_test(struct ioatdma_device *device)
-{
-       int i;
-       u8 *src;
-       u8 *dest;
-       struct dma_chan *dma_chan;
-       struct dma_async_tx_descriptor *tx;
-       dma_addr_t dma_dest, dma_src;
-       dma_cookie_t cookie;
-       int err = 0;
-       struct completion cmp;
-       unsigned long tmo;
-       unsigned long flags;
-
-       src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
-       if (!src)
-               return -ENOMEM;
-       dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
-       if (!dest) {
-               kfree(src);
-               return -ENOMEM;
-       }
-
-       /* Fill in src buffer */
-       for (i = 0; i < IOAT_TEST_SIZE; i++)
-               src[i] = (u8)i;
-
-       /* Start copy, using first DMA channel */
-       dma_chan = container_of(device->common.channels.next,
-                               struct dma_chan,
-                               device_node);
-       if (device->common.device_alloc_chan_resources(dma_chan) < 1) {
-               dev_err(&device->pdev->dev,
-                       "selftest cannot allocate chan resource\n");
-               err = -ENODEV;
-               goto out;
-       }
-
-       dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
-                                DMA_TO_DEVICE);
-       dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
-                                 DMA_FROM_DEVICE);
-       flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE;
-       tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
-                                                  IOAT_TEST_SIZE, flags);
-       if (!tx) {
-               dev_err(&device->pdev->dev,
-                       "Self-test prep failed, disabling\n");
-               err = -ENODEV;
-               goto free_resources;
-       }
-
-       async_tx_ack(tx);
-       init_completion(&cmp);
-       tx->callback = ioat_dma_test_callback;
-       tx->callback_param = &cmp;
-       cookie = tx->tx_submit(tx);
-       if (cookie < 0) {
-               dev_err(&device->pdev->dev,
-                       "Self-test setup failed, disabling\n");
-               err = -ENODEV;
-               goto free_resources;
-       }
-       device->common.device_issue_pending(dma_chan);
-
-       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
-
-       if (tmo == 0 ||
-           device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL)
-                                       != DMA_SUCCESS) {
-               dev_err(&device->pdev->dev,
-                       "Self-test copy timed out, disabling\n");
-               err = -ENODEV;
-               goto free_resources;
-       }
-       if (memcmp(src, dest, IOAT_TEST_SIZE)) {
-               dev_err(&device->pdev->dev,
-                       "Self-test copy failed compare, disabling\n");
-               err = -ENODEV;
-               goto free_resources;
-       }
-
-free_resources:
-       device->common.device_free_chan_resources(dma_chan);
-out:
-       kfree(src);
-       kfree(dest);
-       return err;
-}
-
-static char ioat_interrupt_style[32] = "msix";
-module_param_string(ioat_interrupt_style, ioat_interrupt_style,
-                   sizeof(ioat_interrupt_style), 0644);
-MODULE_PARM_DESC(ioat_interrupt_style,
-                "set ioat interrupt style: msix (default), "
-                "msix-single-vector, msi, intx)");
-
-/**
- * ioat_dma_setup_interrupts - setup interrupt handler
- * @device: ioat device
- */
-static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
-{
-       struct ioat_dma_chan *ioat_chan;
-       int err, i, j, msixcnt;
-       u8 intrctrl = 0;
-
-       if (!strcmp(ioat_interrupt_style, "msix"))
-               goto msix;
-       if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
-               goto msix_single_vector;
-       if (!strcmp(ioat_interrupt_style, "msi"))
-               goto msi;
-       if (!strcmp(ioat_interrupt_style, "intx"))
-               goto intx;
-       dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n",
-               ioat_interrupt_style);
-       goto err_no_irq;
-
-msix:
-       /* The number of MSI-X vectors should equal the number of channels */
-       msixcnt = device->common.chancnt;
-       for (i = 0; i < msixcnt; i++)
-               device->msix_entries[i].entry = i;
-
-       err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt);
-       if (err < 0)
-               goto msi;
-       if (err > 0)
-               goto msix_single_vector;
-
-       for (i = 0; i < msixcnt; i++) {
-               ioat_chan = ioat_lookup_chan_by_index(device, i);
-               err = request_irq(device->msix_entries[i].vector,
-                                 ioat_dma_do_interrupt_msix,
-                                 0, "ioat-msix", ioat_chan);
-               if (err) {
-                       for (j = 0; j < i; j++) {
-                               ioat_chan =
-                                       ioat_lookup_chan_by_index(device, j);
-                               free_irq(device->msix_entries[j].vector,
-                                        ioat_chan);
-                       }
-                       goto msix_single_vector;
-               }
-       }
-       intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
-       device->irq_mode = msix_multi_vector;
-       goto done;
-
-msix_single_vector:
-       device->msix_entries[0].entry = 0;
-       err = pci_enable_msix(device->pdev, device->msix_entries, 1);
-       if (err)
-               goto msi;
-
-       err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt,
-                         0, "ioat-msix", device);
-       if (err) {
-               pci_disable_msix(device->pdev);
-               goto msi;
-       }
-       device->irq_mode = msix_single_vector;
-       goto done;
-
-msi:
-       err = pci_enable_msi(device->pdev);
-       if (err)
-               goto intx;
-
-       err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
-                         0, "ioat-msi", device);
-       if (err) {
-               pci_disable_msi(device->pdev);
-               goto intx;
-       }
-       /*
-        * CB 1.2 devices need a bit set in configuration space to enable MSI
-        */
-       if (device->version == IOAT_VER_1_2) {
-               u32 dmactrl;
-               pci_read_config_dword(device->pdev,
-                                     IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
-               dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
-               pci_write_config_dword(device->pdev,
-                                      IOAT_PCI_DMACTRL_OFFSET, dmactrl);
-       }
-       device->irq_mode = msi;
-       goto done;
-
-intx:
-       err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
-                         IRQF_SHARED, "ioat-intx", device);
-       if (err)
-               goto err_no_irq;
-       device->irq_mode = intx;
-
-done:
-       intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
-       writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
-       return 0;
-
-err_no_irq:
-       /* Disable all interrupt generation */
-       writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
-       dev_err(&device->pdev->dev, "no usable interrupts\n");
-       device->irq_mode = none;
-       return -1;
-}
-
-/**
- * ioat_dma_remove_interrupts - remove whatever interrupts were set
- * @device: ioat device
- */
-static void ioat_dma_remove_interrupts(struct ioatdma_device *device)
-{
-       struct ioat_dma_chan *ioat_chan;
-       int i;
-
-       /* Disable all interrupt generation */
-       writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
-
-       switch (device->irq_mode) {
-       case msix_multi_vector:
-               for (i = 0; i < device->common.chancnt; i++) {
-                       ioat_chan = ioat_lookup_chan_by_index(device, i);
-                       free_irq(device->msix_entries[i].vector, ioat_chan);
-               }
-               pci_disable_msix(device->pdev);
-               break;
-       case msix_single_vector:
-               free_irq(device->msix_entries[0].vector, device);
-               pci_disable_msix(device->pdev);
-               break;
-       case msi:
-               free_irq(device->pdev->irq, device);
-               pci_disable_msi(device->pdev);
-               break;
-       case intx:
-               free_irq(device->pdev->irq, device);
-               break;
-       case none:
-               dev_warn(&device->pdev->dev,
-                        "call to %s without interrupts setup\n", __func__);
-       }
-       device->irq_mode = none;
-}
-
-struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
-                                     void __iomem *iobase)
-{
-       int err;
-       struct ioatdma_device *device;
-
-       device = kzalloc(sizeof(*device), GFP_KERNEL);
-       if (!device) {
-               err = -ENOMEM;
-               goto err_kzalloc;
-       }
-       device->pdev = pdev;
-       device->reg_base = iobase;
-       device->version = readb(device->reg_base + IOAT_VER_OFFSET);
-
-       /* DMA coherent memory pool for DMA descriptor allocations */
-       device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
-                                          sizeof(struct ioat_dma_descriptor),
-                                          64, 0);
-       if (!device->dma_pool) {
-               err = -ENOMEM;
-               goto err_dma_pool;
-       }
-
-       device->completion_pool = pci_pool_create("completion_pool", pdev,
-                                                 sizeof(u64), SMP_CACHE_BYTES,
-                                                 SMP_CACHE_BYTES);
-       if (!device->completion_pool) {
-               err = -ENOMEM;
-               goto err_completion_pool;
-       }
-
-       INIT_LIST_HEAD(&device->common.channels);
-       ioat_dma_enumerate_channels(device);
-
-       device->common.device_alloc_chan_resources =
-                                               ioat_dma_alloc_chan_resources;
-       device->common.device_free_chan_resources =
-                                               ioat_dma_free_chan_resources;
-       device->common.dev = &pdev->dev;
-
-       dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
-       device->common.device_is_tx_complete = ioat_dma_is_complete;
-       switch (device->version) {
-       case IOAT_VER_1_2:
-               device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
-               device->common.device_issue_pending =
-                                               ioat1_dma_memcpy_issue_pending;
-               break;
-       case IOAT_VER_2_0:
-       case IOAT_VER_3_0:
-               device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
-               device->common.device_issue_pending =
-                                               ioat2_dma_memcpy_issue_pending;
-               break;
-       }
-
-       dev_err(&device->pdev->dev,
-               "Intel(R) I/OAT DMA Engine found,"
-               " %d channels, device version 0x%02x, driver version %s\n",
-               device->common.chancnt, device->version, IOAT_DMA_VERSION);
-
-       if (!device->common.chancnt) {
-               dev_err(&device->pdev->dev,
-                       "Intel(R) I/OAT DMA Engine problem found: "
-                       "zero channels detected\n");
-               goto err_setup_interrupts;
-       }
-
-       err = ioat_dma_setup_interrupts(device);
-       if (err)
-               goto err_setup_interrupts;
-
-       err = ioat_dma_self_test(device);
-       if (err)
-               goto err_self_test;
-
-       ioat_set_tcp_copy_break(device);
-
-       dma_async_device_register(&device->common);
-
-       if (device->version != IOAT_VER_3_0) {
-               INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
-               schedule_delayed_work(&device->work,
-                                     WATCHDOG_DELAY);
-       }
-
-       return device;
-
-err_self_test:
-       ioat_dma_remove_interrupts(device);
-err_setup_interrupts:
-       pci_pool_destroy(device->completion_pool);
-err_completion_pool:
-       pci_pool_destroy(device->dma_pool);
-err_dma_pool:
-       kfree(device);
-err_kzalloc:
-       dev_err(&pdev->dev,
-               "Intel(R) I/OAT DMA Engine initialization failed\n");
-       return NULL;
-}
-
-void ioat_dma_remove(struct ioatdma_device *device)
-{
-       struct dma_chan *chan, *_chan;
-       struct ioat_dma_chan *ioat_chan;
-
-       if (device->version != IOAT_VER_3_0)
-               cancel_delayed_work(&device->work);
-
-       ioat_dma_remove_interrupts(device);
-
-       dma_async_device_unregister(&device->common);
-
-       pci_pool_destroy(device->dma_pool);
-       pci_pool_destroy(device->completion_pool);
-
-       iounmap(device->reg_base);
-       pci_release_regions(device->pdev);
-       pci_disable_device(device->pdev);
-
-       list_for_each_entry_safe(chan, _chan,
-                                &device->common.channels, device_node) {
-               ioat_chan = to_ioat_chan(chan);
-               list_del(&chan->device_node);
-               kfree(ioat_chan);
-       }
-       kfree(device);
-}
-
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h

deleted file mode 100644 (file)

index a52ff4b..0000000
--- a/drivers/dma/ioatdma.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef IOATDMA_H
-#define IOATDMA_H
-
-#include <linux/dmaengine.h>
-#include "ioatdma_hw.h"
-#include <linux/init.h>
-#include <linux/dmapool.h>
-#include <linux/cache.h>
-#include <linux/pci_ids.h>
-#include <net/tcp.h>
-
-#define IOAT_DMA_VERSION  "3.64"
-
-enum ioat_interrupt {
-       none = 0,
-       msix_multi_vector = 1,
-       msix_single_vector = 2,
-       msi = 3,
-       intx = 4,
-};
-
-#define IOAT_LOW_COMPLETION_MASK       0xffffffc0
-#define IOAT_DMA_DCA_ANY_CPU           ~0
-#define IOAT_WATCHDOG_PERIOD           (2 * HZ)
-
-
-/**
- * struct ioatdma_device - internal representation of a IOAT device
- * @pdev: PCI-Express device
- * @reg_base: MMIO register space base address
- * @dma_pool: for allocating DMA descriptors
- * @common: embedded struct dma_device
- * @version: version of ioatdma device
- * @irq_mode: which style irq to use
- * @msix_entries: irq handlers
- * @idx: per channel data
- */
-
-struct ioatdma_device {
-       struct pci_dev *pdev;
-       void __iomem *reg_base;
-       struct pci_pool *dma_pool;
-       struct pci_pool *completion_pool;
-       struct dma_device common;
-       u8 version;
-       enum ioat_interrupt irq_mode;
-       struct delayed_work work;
-       struct msix_entry msix_entries[4];
-       struct ioat_dma_chan *idx[4];
-};
-
-/**
- * struct ioat_dma_chan - internal representation of a DMA channel
- */
-struct ioat_dma_chan {
-
-       void __iomem *reg_base;
-
-       dma_cookie_t completed_cookie;
-       unsigned long last_completion;
-       unsigned long last_completion_time;
-
-       size_t xfercap; /* XFERCAP register value expanded out */
-
-       spinlock_t cleanup_lock;
-       spinlock_t desc_lock;
-       struct list_head free_desc;
-       struct list_head used_desc;
-       unsigned long watchdog_completion;
-       int watchdog_tcp_cookie;
-       u32 watchdog_last_tcp_cookie;
-       struct delayed_work work;
-
-       int pending;
-       int dmacount;
-       int desccount;
-
-       struct ioatdma_device *device;
-       struct dma_chan common;
-
-       dma_addr_t completion_addr;
-       union {
-               u64 full; /* HW completion writeback */
-               struct {
-                       u32 low;
-                       u32 high;
-               };
-       } *completion_virt;
-       unsigned long last_compl_desc_addr_hw;
-       struct tasklet_struct cleanup_task;
-};
-
-/* wrapper around hardware descriptor format + additional software fields */
-
-/**
- * struct ioat_desc_sw - wrapper around hardware descriptor
- * @hw: hardware DMA descriptor
- * @node: this descriptor will either be on the free list,
- *     or attached to a transaction list (async_tx.tx_list)
- * @tx_cnt: number of descriptors required to complete the transaction
- * @async_tx: the generic software descriptor for all engines
- */
-struct ioat_desc_sw {
-       struct ioat_dma_descriptor *hw;
-       struct list_head node;
-       int tx_cnt;
-       size_t len;
-       dma_addr_t src;
-       dma_addr_t dst;
-       struct dma_async_tx_descriptor async_tx;
-};
-
-static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
-{
-       #ifdef CONFIG_NET_DMA
-       switch (dev->version) {
-       case IOAT_VER_1_2:
-               sysctl_tcp_dma_copybreak = 4096;
-               break;
-       case IOAT_VER_2_0:
-               sysctl_tcp_dma_copybreak = 2048;
-               break;
-       case IOAT_VER_3_0:
-               sysctl_tcp_dma_copybreak = 262144;
-               break;
-       }
-       #endif
-}
-
-#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
-struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
-                                     void __iomem *iobase);
-void ioat_dma_remove(struct ioatdma_device *device);
-struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-#else
-#define ioat_dma_probe(pdev, iobase)    NULL
-#define ioat_dma_remove(device)         do { } while (0)
-#define ioat_dca_init(pdev, iobase)    NULL
-#define ioat2_dca_init(pdev, iobase)   NULL
-#define ioat3_dca_init(pdev, iobase)   NULL
-#endif
-
-#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h

deleted file mode 100644 (file)

index afa57ee..0000000
--- a/drivers/dma/ioatdma_hw.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef _IOAT_HW_H_
-#define _IOAT_HW_H_
-
-/* PCI Configuration Space Values */
-#define IOAT_PCI_VID            0x8086
-
-/* CB device ID's */
-#define IOAT_PCI_DID_5000       0x1A38
-#define IOAT_PCI_DID_CNB        0x360B
-#define IOAT_PCI_DID_SCNB       0x65FF
-#define IOAT_PCI_DID_SNB        0x402F
-
-#define IOAT_PCI_RID            0x00
-#define IOAT_PCI_SVID           0x8086
-#define IOAT_PCI_SID            0x8086
-#define IOAT_VER_1_2            0x12    /* Version 1.2 */
-#define IOAT_VER_2_0            0x20    /* Version 2.0 */
-#define IOAT_VER_3_0            0x30    /* Version 3.0 */
-
-struct ioat_dma_descriptor {
-       uint32_t        size;
-       uint32_t        ctl;
-       uint64_t        src_addr;
-       uint64_t        dst_addr;
-       uint64_t        next;
-       uint64_t        rsv1;
-       uint64_t        rsv2;
-       uint64_t        user1;
-       uint64_t        user2;
-};
-
-#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001
-#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002
-#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004
-#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008
-#define IOAT_DMA_DESCRIPTOR_CTL_FRAME  0x00000010
-#define IOAT_DMA_DESCRIPTOR_NUL                0x00000020
-#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040
-#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080
-#define IOAT_DMA_DESCRIPTOR_CTL_BNDL   0x00000100
-#define IOAT_DMA_DESCRIPTOR_CTL_DCA    0x00000200
-#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT        0x00000400
-
-#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000
-#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA     0x00000000
-
-#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA    0x00000001
-#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK    0xFF000000
-
-#endif
diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h

deleted file mode 100644 (file)

index 49bc277..0000000
--- a/drivers/dma/ioatdma_registers.h
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef _IOAT_REGISTERS_H_
-#define _IOAT_REGISTERS_H_
-
-#define IOAT_PCI_DMACTRL_OFFSET                        0x48
-#define IOAT_PCI_DMACTRL_DMA_EN                        0x00000001
-#define IOAT_PCI_DMACTRL_MSI_EN                        0x00000002
-
-#define IOAT_PCI_DEVICE_ID_OFFSET              0x02
-#define IOAT_PCI_DMAUNCERRSTS_OFFSET           0x148
-#define IOAT_PCI_CHANERRMASK_INT_OFFSET                0x184
-
-/* MMIO Device Registers */
-#define IOAT_CHANCNT_OFFSET                    0x00    /*  8-bit */
-
-#define IOAT_XFERCAP_OFFSET                    0x01    /*  8-bit */
-#define IOAT_XFERCAP_4KB                       12
-#define IOAT_XFERCAP_8KB                       13
-#define IOAT_XFERCAP_16KB                      14
-#define IOAT_XFERCAP_32KB                      15
-#define IOAT_XFERCAP_32GB                      0
-
-#define IOAT_GENCTRL_OFFSET                    0x02    /*  8-bit */
-#define IOAT_GENCTRL_DEBUG_EN                  0x01
-
-#define IOAT_INTRCTRL_OFFSET                   0x03    /*  8-bit */
-#define IOAT_INTRCTRL_MASTER_INT_EN            0x01    /* Master Interrupt Enable */
-#define IOAT_INTRCTRL_INT_STATUS               0x02    /* ATTNSTATUS -or- Channel Int */
-#define IOAT_INTRCTRL_INT                      0x04    /* INT_STATUS -and- MASTER_INT_EN */
-#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL      0x08    /* Enable all MSI-X vectors */
-
-#define IOAT_ATTNSTATUS_OFFSET                 0x04    /* Each bit is a channel */
-
-#define IOAT_VER_OFFSET                                0x08    /*  8-bit */
-#define IOAT_VER_MAJOR_MASK                    0xF0
-#define IOAT_VER_MINOR_MASK                    0x0F
-#define GET_IOAT_VER_MAJOR(x)                  (((x) & IOAT_VER_MAJOR_MASK) >> 4)
-#define GET_IOAT_VER_MINOR(x)                  ((x) & IOAT_VER_MINOR_MASK)
-
-#define IOAT_PERPORTOFFSET_OFFSET              0x0A    /* 16-bit */
-
-#define IOAT_INTRDELAY_OFFSET                  0x0C    /* 16-bit */
-#define IOAT_INTRDELAY_INT_DELAY_MASK          0x3FFF  /* Interrupt Delay Time */
-#define IOAT_INTRDELAY_COALESE_SUPPORT         0x8000  /* Interrupt Coalescing Supported */
-
-#define IOAT_DEVICE_STATUS_OFFSET              0x0E    /* 16-bit */
-#define IOAT_DEVICE_STATUS_DEGRADED_MODE       0x0001
-
-#define IOAT_CHANNEL_MMIO_SIZE                 0x80    /* Each Channel MMIO space is this size */
-
-/* DMA Channel Registers */
-#define IOAT_CHANCTRL_OFFSET                   0x00    /* 16-bit Channel Control Register */
-#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK    0xF000
-#define IOAT_CHANCTRL_CHANNEL_IN_USE           0x0100
-#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL    0x0020
-#define IOAT_CHANCTRL_ERR_INT_EN               0x0010
-#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN         0x0008
-#define IOAT_CHANCTRL_ERR_COMPLETION_EN                0x0004
-#define IOAT_CHANCTRL_INT_DISABLE              0x0001
-
-#define IOAT_DMA_COMP_OFFSET                   0x02    /* 16-bit DMA channel compatibility */
-#define IOAT_DMA_COMP_V1                       0x0001  /* Compatibility with DMA version 1 */
-#define IOAT_DMA_COMP_V2                       0x0002  /* Compatibility with DMA version 2 */
-
-
-#define IOAT1_CHANSTS_OFFSET           0x04    /* 64-bit Channel Status Register */
-#define IOAT2_CHANSTS_OFFSET           0x08    /* 64-bit Channel Status Register */
-#define IOAT_CHANSTS_OFFSET(ver)               ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
-#define IOAT1_CHANSTS_OFFSET_LOW       0x04
-#define IOAT2_CHANSTS_OFFSET_LOW       0x08
-#define IOAT_CHANSTS_OFFSET_LOW(ver)           ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
-#define IOAT1_CHANSTS_OFFSET_HIGH      0x08
-#define IOAT2_CHANSTS_OFFSET_HIGH      0x0C
-#define IOAT_CHANSTS_OFFSET_HIGH(ver)          ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
-#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F
-#define IOAT_CHANSTS_SOFT_ERR                  0x0000000000000010
-#define IOAT_CHANSTS_UNAFFILIATED_ERR          0x0000000000000008
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS       0x0000000000000007
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE        0x0
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE  0x1
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED     0x2
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED        0x3
-
-
-
-#define IOAT_CHAN_DMACOUNT_OFFSET      0x06    /* 16-bit DMA Count register */
-
-#define IOAT_DCACTRL_OFFSET         0x30   /* 32 bit Direct Cache Access Control Register */
-#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
-#define IOAT_DCACTRL_TARGET_CPU_MASK   0xFFFF /* APIC ID */
-
-/* CB DCA Memory Space Registers */
-#define IOAT_DCAOFFSET_OFFSET       0x14
-/* CB_BAR + IOAT_DCAOFFSET value */
-#define IOAT_DCA_VER_OFFSET         0x00
-#define IOAT_DCA_VER_MAJOR_MASK     0xF0
-#define IOAT_DCA_VER_MINOR_MASK     0x0F
-
-#define IOAT_DCA_COMP_OFFSET        0x02
-#define IOAT_DCA_COMP_V1            0x1
-
-#define IOAT_FSB_CAPABILITY_OFFSET  0x04
-#define IOAT_FSB_CAPABILITY_PREFETCH    0x1
-
-#define IOAT_PCI_CAPABILITY_OFFSET  0x06
-#define IOAT_PCI_CAPABILITY_MEMWR   0x1
-
-#define IOAT_FSB_CAP_ENABLE_OFFSET  0x08
-#define IOAT_FSB_CAP_ENABLE_PREFETCH    0x1
-
-#define IOAT_PCI_CAP_ENABLE_OFFSET  0x0A
-#define IOAT_PCI_CAP_ENABLE_MEMWR   0x1
-
-#define IOAT_APICID_TAG_MAP_OFFSET  0x0C
-#define IOAT_APICID_TAG_MAP_TAG0    0x0000000F
-#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
-#define IOAT_APICID_TAG_MAP_TAG1    0x000000F0
-#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
-#define IOAT_APICID_TAG_MAP_TAG2    0x00000F00
-#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
-#define IOAT_APICID_TAG_MAP_TAG3    0x0000F000
-#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
-#define IOAT_APICID_TAG_MAP_TAG4    0x000F0000
-#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
-#define IOAT_APICID_TAG_CB2_VALID   0x8080808080
-
-#define IOAT_DCA_GREQID_OFFSET      0x10
-#define IOAT_DCA_GREQID_SIZE        0x04
-#define IOAT_DCA_GREQID_MASK        0xFFFF
-#define IOAT_DCA_GREQID_IGNOREFUN   0x10000000
-#define IOAT_DCA_GREQID_VALID       0x20000000
-#define IOAT_DCA_GREQID_LASTID      0x80000000
-
-#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
-#define IOAT3_CSI_CAPABILITY_PREFETCH    0x1
-
-#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
-#define IOAT3_PCI_CAPABILITY_MEMWR  0x1
-
-#define IOAT3_CSI_CONTROL_OFFSET    0x0C
-#define IOAT3_CSI_CONTROL_PREFETCH  0x1
-
-#define IOAT3_PCI_CONTROL_OFFSET    0x0E
-#define IOAT3_PCI_CONTROL_MEMWR     0x1
-
-#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
-#define IOAT3_APICID_TAG_MAP_OFFSET_LOW  0x10
-#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
-
-#define IOAT3_DCA_GREQID_OFFSET     0x02
-
-#define IOAT1_CHAINADDR_OFFSET         0x0C    /* 64-bit Descriptor Chain Address Register */
-#define IOAT2_CHAINADDR_OFFSET         0x10    /* 64-bit Descriptor Chain Address Register */
-#define IOAT_CHAINADDR_OFFSET(ver)             ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
-#define IOAT1_CHAINADDR_OFFSET_LOW     0x0C
-#define IOAT2_CHAINADDR_OFFSET_LOW     0x10
-#define IOAT_CHAINADDR_OFFSET_LOW(ver)         ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
-#define IOAT1_CHAINADDR_OFFSET_HIGH    0x10
-#define IOAT2_CHAINADDR_OFFSET_HIGH    0x14
-#define IOAT_CHAINADDR_OFFSET_HIGH(ver)                ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
-
-#define IOAT1_CHANCMD_OFFSET           0x14    /*  8-bit DMA Channel Command Register */
-#define IOAT2_CHANCMD_OFFSET           0x04    /*  8-bit DMA Channel Command Register */
-#define IOAT_CHANCMD_OFFSET(ver)               ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
-#define IOAT_CHANCMD_RESET                     0x20
-#define IOAT_CHANCMD_RESUME                    0x10
-#define IOAT_CHANCMD_ABORT                     0x08
-#define IOAT_CHANCMD_SUSPEND                   0x04
-#define IOAT_CHANCMD_APPEND                    0x02
-#define IOAT_CHANCMD_START                     0x01
-
-#define IOAT_CHANCMP_OFFSET                    0x18    /* 64-bit Channel Completion Address Register */
-#define IOAT_CHANCMP_OFFSET_LOW                        0x18
-#define IOAT_CHANCMP_OFFSET_HIGH               0x1C
-
-#define IOAT_CDAR_OFFSET                       0x20    /* 64-bit Current Descriptor Address Register */
-#define IOAT_CDAR_OFFSET_LOW                   0x20
-#define IOAT_CDAR_OFFSET_HIGH                  0x24
-
-#define IOAT_CHANERR_OFFSET                    0x28    /* 32-bit Channel Error Register */
-#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001
-#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR        0x0002
-#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR  0x0004
-#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR     0x0008
-#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR      0x0010
-#define IOAT_CHANERR_CHANCMD_ERR               0x0020
-#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR  0x0040
-#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR      0x0080
-#define IOAT_CHANERR_READ_DATA_ERR             0x0100
-#define IOAT_CHANERR_WRITE_DATA_ERR            0x0200
-#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR    0x0400
-#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR     0x0800
-#define IOAT_CHANERR_COMPLETION_ADDR_ERR       0x1000
-#define IOAT_CHANERR_INT_CONFIGURATION_ERR     0x2000
-#define IOAT_CHANERR_SOFT_ERR                  0x4000
-#define IOAT_CHANERR_UNAFFILIATED_ERR          0x8000
-
-#define IOAT_CHANERR_MASK_OFFSET               0x2C    /* 32-bit Channel Error Register */
-
-#endif /* _IOAT_REGISTERS_H_ */
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c

index 2f052265122f62e2681bbdb0dfbbd558cef24713..645ca8d54ec43350059bd8d7c8802f017b4a4abd 100644 (file)
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -31,6 +31,7 @@
  #include <linux/platform_device.h>
  #include <linux/memory.h>
  #include <linux/ioport.h>
+#include <linux/raid/pq.h>
  
  #include <mach/adma.h>
  
@@ -57,65 +58,110 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
         }
  }
  
+static void
+iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+{
+       struct dma_async_tx_descriptor *tx = &desc->async_tx;
+       struct iop_adma_desc_slot *unmap = desc->group_head;
+       struct device *dev = &iop_chan->device->pdev->dev;
+       u32 len = unmap->unmap_len;
+       enum dma_ctrl_flags flags = tx->flags;
+       u32 src_cnt;
+       dma_addr_t addr;
+       dma_addr_t dest;
+
+       src_cnt = unmap->unmap_src_cnt;
+       dest = iop_desc_get_dest_addr(unmap, iop_chan);
+       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+               enum dma_data_direction dir;
+
+               if (src_cnt > 1) /* is xor? */
+                       dir = DMA_BIDIRECTIONAL;
+               else
+                       dir = DMA_FROM_DEVICE;
+
+               dma_unmap_page(dev, dest, len, dir);
+       }
+
+       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+               while (src_cnt--) {
+                       addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
+                       if (addr == dest)
+                               continue;
+                       dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+               }
+       }
+       desc->group_head = NULL;
+}
+
+static void
+iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+{
+       struct dma_async_tx_descriptor *tx = &desc->async_tx;
+       struct iop_adma_desc_slot *unmap = desc->group_head;
+       struct device *dev = &iop_chan->device->pdev->dev;
+       u32 len = unmap->unmap_len;
+       enum dma_ctrl_flags flags = tx->flags;
+       u32 src_cnt = unmap->unmap_src_cnt;
+       dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
+       dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
+       int i;
+
+       if (tx->flags & DMA_PREP_CONTINUE)
+               src_cnt -= 3;
+
+       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
+               dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
+               dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
+       }
+
+       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+               dma_addr_t addr;
+
+               for (i = 0; i < src_cnt; i++) {
+                       addr = iop_desc_get_src_addr(unmap, iop_chan, i);
+                       dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+               }
+               if (desc->pq_check_result) {
+                       dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
+                       dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
+               }
+       }
+
+       desc->group_head = NULL;
+}
+
+
  static dma_cookie_t
  iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
         struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
  {
-       BUG_ON(desc->async_tx.cookie < 0);
-       if (desc->async_tx.cookie > 0) {
-               cookie = desc->async_tx.cookie;
-               desc->async_tx.cookie = 0;
+       struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+       BUG_ON(tx->cookie < 0);
+       if (tx->cookie > 0) {
+               cookie = tx->cookie;
+               tx->cookie = 0;
  
                 /* call the callback (must not sleep or submit new
                  * operations to this channel)
                  */
-               if (desc->async_tx.callback)
-                       desc->async_tx.callback(
-                               desc->async_tx.callback_param);
+               if (tx->callback)
+                       tx->callback(tx->callback_param);
  
                 /* unmap dma addresses
                  * (unmap_single vs unmap_page?)
                  */
                 if (desc->group_head && desc->unmap_len) {
-                       struct iop_adma_desc_slot *unmap = desc->group_head;
-                       struct device *dev =
-                               &iop_chan->device->pdev->dev;
-                       u32 len = unmap->unmap_len;
-                       enum dma_ctrl_flags flags = desc->async_tx.flags;
-                       u32 src_cnt;
-                       dma_addr_t addr;
-                       dma_addr_t dest;
-
-                       src_cnt = unmap->unmap_src_cnt;
-                       dest = iop_desc_get_dest_addr(unmap, iop_chan);
-                       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                               enum dma_data_direction dir;
-
-                               if (src_cnt > 1) /* is xor? */
-                                       dir = DMA_BIDIRECTIONAL;
-                               else
-                                       dir = DMA_FROM_DEVICE;
-
-                               dma_unmap_page(dev, dest, len, dir);
-                       }
-
-                       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                               while (src_cnt--) {
-                                       addr = iop_desc_get_src_addr(unmap,
-                                                                    iop_chan,
-                                                                    src_cnt);
-                                       if (addr == dest)
-                                               continue;
-                                       dma_unmap_page(dev, addr, len,
-                                                      DMA_TO_DEVICE);
-                               }
-                       }
-                       desc->group_head = NULL;
+                       if (iop_desc_is_pq(desc))
+                               iop_desc_unmap_pq(iop_chan, desc);
+                       else
+                               iop_desc_unmap(iop_chan, desc);
                 }
         }
  
         /* run dependent operations */
-       dma_run_dependencies(&desc->async_tx);
+       dma_run_dependencies(tx);
  
         return cookie;
  }
@@ -287,7 +333,12 @@ static void iop_adma_tasklet(unsigned long data)
  {
         struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
  
-       spin_lock(&iop_chan->lock);
+       /* lockdep will flag depedency submissions as potentially
+        * recursive locking, this is not the case as a dependency
+        * submission will never recurse a channels submit routine.
+        * There are checks in async_tx.c to prevent this.
+        */
+       spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
         __iop_adma_slot_cleanup(iop_chan);
         spin_unlock(&iop_chan->lock);
  }
@@ -370,7 +421,7 @@ retry:
                         }
                         alloc_tail->group_head = alloc_start;
                         alloc_tail->async_tx.cookie = -EBUSY;
-                       list_splice(&chain, &alloc_tail->async_tx.tx_list);
+                       list_splice(&chain, &alloc_tail->tx_list);
                         iop_chan->last_used = last_used;
                         iop_desc_clear_next_desc(alloc_start);
                         iop_desc_clear_next_desc(alloc_tail);
@@ -429,7 +480,7 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
  
         old_chain_tail = list_entry(iop_chan->chain.prev,
                 struct iop_adma_desc_slot, chain_node);
-       list_splice_init(&sw_desc->async_tx.tx_list,
+       list_splice_init(&sw_desc->tx_list,
                          &old_chain_tail->chain_node);
  
         /* fix up the hardware chain */
@@ -496,6 +547,7 @@ static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
  
                 dma_async_tx_descriptor_init(&slot->async_tx, chan);
                 slot->async_tx.tx_submit = iop_adma_tx_submit;
+               INIT_LIST_HEAD(&slot->tx_list);
                 INIT_LIST_HEAD(&slot->chain_node);
                 INIT_LIST_HEAD(&slot->slot_node);
                 hw_desc = (char *) iop_chan->device->dma_desc_pool;
@@ -660,9 +712,9 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
  }
  
  static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
-                          unsigned int src_cnt, size_t len, u32 *result,
-                          unsigned long flags)
+iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
+                         unsigned int src_cnt, size_t len, u32 *result,
+                         unsigned long flags)
  {
         struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
         struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -696,6 +748,118 @@ iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
         return sw_desc ? &sw_desc->async_tx : NULL;
  }
  
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+                    unsigned int src_cnt, const unsigned char *scf, size_t len,
+                    unsigned long flags)
+{
+       struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+       struct iop_adma_desc_slot *sw_desc, *g;
+       int slot_cnt, slots_per_op;
+       int continue_srcs;
+
+       if (unlikely(!len))
+               return NULL;
+       BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+       dev_dbg(iop_chan->device->common.dev,
+               "%s src_cnt: %d len: %u flags: %lx\n",
+               __func__, src_cnt, len, flags);
+
+       if (dmaf_p_disabled_continue(flags))
+               continue_srcs = 1+src_cnt;
+       else if (dmaf_continue(flags))
+               continue_srcs = 3+src_cnt;
+       else
+               continue_srcs = 0+src_cnt;
+
+       spin_lock_bh(&iop_chan->lock);
+       slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
+       sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+       if (sw_desc) {
+               int i;
+
+               g = sw_desc->group_head;
+               iop_desc_set_byte_count(g, iop_chan, len);
+
+               /* even if P is disabled its destination address (bits
+                * [3:0]) must match Q.  It is ok if P points to an
+                * invalid address, it won't be written.
+                */
+               if (flags & DMA_PREP_PQ_DISABLE_P)
+                       dst[0] = dst[1] & 0x7;
+
+               iop_desc_set_pq_addr(g, dst);
+               sw_desc->unmap_src_cnt = src_cnt;
+               sw_desc->unmap_len = len;
+               sw_desc->async_tx.flags = flags;
+               for (i = 0; i < src_cnt; i++)
+                       iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
+
+               /* if we are continuing a previous operation factor in
+                * the old p and q values, see the comment for dma_maxpq
+                * in include/linux/dmaengine.h
+                */
+               if (dmaf_p_disabled_continue(flags))
+                       iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+               else if (dmaf_continue(flags)) {
+                       iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
+                       iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+                       iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
+               }
+               iop_desc_init_pq(g, i, flags);
+       }
+       spin_unlock_bh(&iop_chan->lock);
+
+       return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+                        unsigned int src_cnt, const unsigned char *scf,
+                        size_t len, enum sum_check_flags *pqres,
+                        unsigned long flags)
+{
+       struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+       struct iop_adma_desc_slot *sw_desc, *g;
+       int slot_cnt, slots_per_op;
+
+       if (unlikely(!len))
+               return NULL;
+       BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+       dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+               __func__, src_cnt, len);
+
+       spin_lock_bh(&iop_chan->lock);
+       slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
+       sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+       if (sw_desc) {
+               /* for validate operations p and q are tagged onto the
+                * end of the source list
+                */
+               int pq_idx = src_cnt;
+
+               g = sw_desc->group_head;
+               iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
+               iop_desc_set_pq_zero_sum_byte_count(g, len);
+               g->pq_check_result = pqres;
+               pr_debug("\t%s: g->pq_check_result: %p\n",
+                       __func__, g->pq_check_result);
+               sw_desc->unmap_src_cnt = src_cnt+2;
+               sw_desc->unmap_len = len;
+               sw_desc->async_tx.flags = flags;
+               while (src_cnt--)
+                       iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
+                                                         src[src_cnt],
+                                                         scf[src_cnt]);
+               iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
+       }
+       spin_unlock_bh(&iop_chan->lock);
+
+       return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
  static void iop_adma_free_chan_resources(struct dma_chan *chan)
  {
         struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
@@ -906,7 +1070,7 @@ out:
  
  #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
  static int __devinit
-iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
+iop_adma_xor_val_self_test(struct iop_adma_device *device)
  {
         int i, src_idx;
         struct page *dest;
@@ -1002,7 +1166,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
                 PAGE_SIZE, DMA_TO_DEVICE);
  
         /* skip zero sum if the capability is not present */
-       if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
+       if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
                 goto free_resources;
  
         /* zero sum the sources with the destintation page */
@@ -1016,10 +1180,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
                 dma_srcs[i] = dma_map_page(dma_chan->device->dev,
                                            zero_sum_srcs[i], 0, PAGE_SIZE,
                                            DMA_TO_DEVICE);
-       tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
-                                       IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
-                                       &zero_sum_result,
-                                       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+       tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+                                      IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+                                      &zero_sum_result,
+                                      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
  
         cookie = iop_adma_tx_submit(tx);
         iop_adma_issue_pending(dma_chan);
@@ -1072,10 +1236,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
                 dma_srcs[i] = dma_map_page(dma_chan->device->dev,
                                            zero_sum_srcs[i], 0, PAGE_SIZE,
                                            DMA_TO_DEVICE);
-       tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
-                                       IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
-                                       &zero_sum_result,
-                                       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+       tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+                                      IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+                                      &zero_sum_result,
+                                      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
  
         cookie = iop_adma_tx_submit(tx);
         iop_adma_issue_pending(dma_chan);
@@ -1105,6 +1269,170 @@ out:
         return err;
  }
  
+#ifdef CONFIG_MD_RAID6_PQ
+static int __devinit
+iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
+{
+       /* combined sources, software pq results, and extra hw pq results */
+       struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
+       /* ptr to the extra hw pq buffers defined above */
+       struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
+       /* address conversion buffers (dma_map / page_address) */
+       void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
+       dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST];
+       dma_addr_t pq_dest[2];
+
+       int i;
+       struct dma_async_tx_descriptor *tx;
+       struct dma_chan *dma_chan;
+       dma_cookie_t cookie;
+       u32 zero_sum_result;
+       int err = 0;
+       struct device *dev;
+
+       dev_dbg(device->common.dev, "%s\n", __func__);
+
+       for (i = 0; i < ARRAY_SIZE(pq); i++) {
+               pq[i] = alloc_page(GFP_KERNEL);
+               if (!pq[i]) {
+                       while (i--)
+                               __free_page(pq[i]);
+                       return -ENOMEM;
+               }
+       }
+
+       /* Fill in src buffers */
+       for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
+               pq_sw[i] = page_address(pq[i]);
+               memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
+       }
+       pq_sw[i] = page_address(pq[i]);
+       pq_sw[i+1] = page_address(pq[i+1]);
+
+       dma_chan = container_of(device->common.channels.next,
+                               struct dma_chan,
+                               device_node);
+       if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+               err = -ENODEV;
+               goto out;
+       }
+
+       dev = dma_chan->device->dev;
+
+       /* initialize the dests */
+       memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
+       memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
+
+       /* test pq */
+       pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+       pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+       for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+               pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+                                        DMA_TO_DEVICE);
+
+       tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
+                                 IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
+                                 PAGE_SIZE,
+                                 DMA_PREP_INTERRUPT |
+                                 DMA_CTRL_ACK);
+
+       cookie = iop_adma_tx_submit(tx);
+       iop_adma_issue_pending(dma_chan);
+       msleep(8);
+
+       if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+               DMA_SUCCESS) {
+               dev_err(dev, "Self-test pq timed out, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
+
+       if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
+                  page_address(pq_hw[0]), PAGE_SIZE) != 0) {
+               dev_err(dev, "Self-test p failed compare, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
+                  page_address(pq_hw[1]), PAGE_SIZE) != 0) {
+               dev_err(dev, "Self-test q failed compare, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       /* test correct zero sum using the software generated pq values */
+       for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+               pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+                                        DMA_TO_DEVICE);
+
+       zero_sum_result = ~0;
+       tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+                                     pq_src, IOP_ADMA_NUM_SRC_TEST,
+                                     raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+                                     DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+       cookie = iop_adma_tx_submit(tx);
+       iop_adma_issue_pending(dma_chan);
+       msleep(8);
+
+       if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+               DMA_SUCCESS) {
+               dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       if (zero_sum_result != 0) {
+               dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
+                       zero_sum_result);
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       /* test incorrect zero sum */
+       i = IOP_ADMA_NUM_SRC_TEST;
+       memset(pq_sw[i] + 100, 0, 100);
+       memset(pq_sw[i+1] + 200, 0, 200);
+       for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+               pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+                                        DMA_TO_DEVICE);
+
+       zero_sum_result = 0;
+       tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+                                     pq_src, IOP_ADMA_NUM_SRC_TEST,
+                                     raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+                                     DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+       cookie = iop_adma_tx_submit(tx);
+       iop_adma_issue_pending(dma_chan);
+       msleep(8);
+
+       if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+               DMA_SUCCESS) {
+               dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
+               dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
+                       zero_sum_result);
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+free_resources:
+       iop_adma_free_chan_resources(dma_chan);
+out:
+       i = ARRAY_SIZE(pq);
+       while (i--)
+               __free_page(pq[i]);
+       return err;
+}
+#endif
+
  static int __devexit iop_adma_remove(struct platform_device *dev)
  {
         struct iop_adma_device *device = platform_get_drvdata(dev);
@@ -1192,9 +1520,16 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
                 dma_dev->max_xor = iop_adma_get_max_xor();
                 dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
         }
-       if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
-               dma_dev->device_prep_dma_zero_sum =
-                       iop_adma_prep_dma_zero_sum;
+       if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
+               dma_dev->device_prep_dma_xor_val =
+                       iop_adma_prep_dma_xor_val;
+       if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+               dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
+               dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
+       }
+       if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
+               dma_dev->device_prep_dma_pq_val =
+                       iop_adma_prep_dma_pq_val;
         if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
                 dma_dev->device_prep_dma_interrupt =
                         iop_adma_prep_dma_interrupt;
@@ -1248,23 +1583,35 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
         }
  
         if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
-               dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
-               ret = iop_adma_xor_zero_sum_self_test(adev);
+           dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+               ret = iop_adma_xor_val_self_test(adev);
                 dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
                 if (ret)
                         goto err_free_iop_chan;
         }
  
+       if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
+           dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
+               #ifdef CONFIG_MD_RAID6_PQ
+               ret = iop_adma_pq_zero_sum_self_test(adev);
+               dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
+               #else
+               /* can not test raid6, so do not publish capability */
+               dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
+               dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
+               ret = 0;
+               #endif
+               if (ret)
+                       goto err_free_iop_chan;
+       }
+
         dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
-         "( %s%s%s%s%s%s%s%s%s%s)\n",
-         dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
-         dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
-         dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
+         "( %s%s%s%s%s%s%s)\n",
+         dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
+         dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
           dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
-         dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
-         dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
+         dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
           dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
-         dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
           dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
           dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
  
@@ -1296,7 +1643,7 @@ static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
         if (sw_desc) {
                 grp_start = sw_desc->group_head;
  
-               list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+               list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
                 async_tx_ack(&sw_desc->async_tx);
                 iop_desc_init_memcpy(grp_start, 0);
                 iop_desc_set_byte_count(grp_start, iop_chan, 0);
@@ -1352,7 +1699,7 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
         sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
         if (sw_desc) {
                 grp_start = sw_desc->group_head;
-               list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+               list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
                 async_tx_ack(&sw_desc->async_tx);
                 iop_desc_init_null_xor(grp_start, 2, 0);
                 iop_desc_set_byte_count(grp_start, iop_chan, 0);
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c

index 9f6fe46a9b87351d6b172d817a3532c05ec4c90d..c0a272c7368267ad25bd5185e14d5fa965cb2751 100644 (file)
--- a/drivers/dma/iovlock.c
+++ b/drivers/dma/iovlock.c
@@ -183,6 +183,11 @@ dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
                                         iov_byte_offset,
                                         kdata,
                                         copy);
+                       /* poll for a descriptor slot */
+                       if (unlikely(dma_cookie < 0)) {
+                               dma_async_issue_pending(chan);
+                               continue;
+                       }
  
                         len -= copy;
                         iov[iovec_idx].iov_len -= copy;
@@ -248,6 +253,11 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
                                         page,
                                         offset,
                                         copy);
+                       /* poll for a descriptor slot */
+                       if (unlikely(dma_cookie < 0)) {
+                               dma_async_issue_pending(chan);
+                               continue;
+                       }
  
                         len -= copy;
                         iov[iovec_idx].iov_len -= copy;
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c

index 3f23eabe09f2076cc05ddc1805129615d1dde6bc..466ab10c1ff10de1d001178fde9fdce203c410e3 100644 (file)
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -517,7 +517,7 @@ retry:
                         }
                         alloc_tail->group_head = alloc_start;
                         alloc_tail->async_tx.cookie = -EBUSY;
-                       list_splice(&chain, &alloc_tail->async_tx.tx_list);
+                       list_splice(&chain, &alloc_tail->tx_list);
                         mv_chan->last_used = last_used;
                         mv_desc_clear_next_desc(alloc_start);
                         mv_desc_clear_next_desc(alloc_tail);
@@ -565,14 +565,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
         cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
  
         if (list_empty(&mv_chan->chain))
-               list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
+               list_splice_init(&sw_desc->tx_list, &mv_chan->chain);
         else {
                 new_hw_chain = 0;
  
                 old_chain_tail = list_entry(mv_chan->chain.prev,
                                             struct mv_xor_desc_slot,
                                             chain_node);
-               list_splice_init(&grp_start->async_tx.tx_list,
+               list_splice_init(&grp_start->tx_list,
                                  &old_chain_tail->chain_node);
  
                 if (!mv_can_chain(grp_start))
@@ -632,6 +632,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
                 slot->async_tx.tx_submit = mv_xor_tx_submit;
                 INIT_LIST_HEAD(&slot->chain_node);
                 INIT_LIST_HEAD(&slot->slot_node);
+               INIT_LIST_HEAD(&slot->tx_list);
                 hw_desc = (char *) mv_chan->device->dma_desc_pool;
                 slot->async_tx.phys =
                         (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h

index 06cafe1ef521b06acaa8a2ea90ee5c76afbf72fb..977b592e976bfa9eea31b64cb0ef5aa8c1ff6b49 100644 (file)
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -126,9 +126,8 @@ struct mv_xor_chan {
   * @idx: pool index
   * @unmap_src_cnt: number of xor sources
   * @unmap_len: transaction bytecount
+ * @tx_list: list of slots that make up a multi-descriptor transaction
   * @async_tx: support for the async_tx api
- * @group_list: list of slots that make up a multi-descriptor transaction
- *     for example transfer lengths larger than the supported hw max
   * @xor_check_result: result of zero sum
   * @crc32_result: result crc calculation
   */
@@ -145,6 +144,7 @@ struct mv_xor_desc_slot {
         u16                     unmap_src_cnt;
         u32                     value;
         size_t                  unmap_len;
+       struct list_head        tx_list;
         struct dma_async_tx_descriptor  async_tx;
         union {
                 u32             *xor_check_result;
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c

new file mode 100644 (file)

index 0000000..b3b065c
--- /dev/null
+++ b/drivers/dma/shdma.c
@@ -0,0 +1,786 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * base is drivers/dma/flsdma.c
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * - DMA of SuperH does not have Hardware DMA chain mode.
+ * - MAX DMA size is 16MB.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/platform_device.h>
+#include <cpu/dma.h>
+#include <asm/dma-sh.h>
+#include "shdma.h"
+
+/* DMA descriptor control */
+#define DESC_LAST      (-1)
+#define DESC_COMP      (1)
+#define DESC_NCOMP     (0)
+
+#define NR_DESCS_PER_CHANNEL 32
+/*
+ * Define the default configuration for dual address memory-memory transfer.
+ * The 0x400 value represents auto-request, external->external.
+ *
+ * And this driver set 4byte burst mode.
+ * If you want to change mode, you need to change RS_DEFAULT of value.
+ * (ex 1byte burst mode -> (RS_DUAL & ~TS_32)
+ */
+#define RS_DEFAULT  (RS_DUAL)
+
+#define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id])
+static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
+{
+       ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
+}
+
+static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
+{
+       return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
+}
+
+static void dmae_init(struct sh_dmae_chan *sh_chan)
+{
+       u32 chcr = RS_DEFAULT; /* default is DUAL mode */
+       sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+/*
+ * Reset DMA controller
+ *
+ * SH7780 has two DMAOR register
+ */
+static void sh_dmae_ctl_stop(int id)
+{
+       unsigned short dmaor = dmaor_read_reg(id);
+
+       dmaor &= ~(DMAOR_NMIF | DMAOR_AE);
+       dmaor_write_reg(id, dmaor);
+}
+
+static int sh_dmae_rst(int id)
+{
+       unsigned short dmaor;
+
+       sh_dmae_ctl_stop(id);
+       dmaor = (dmaor_read_reg(id)|DMAOR_INIT);
+
+       dmaor_write_reg(id, dmaor);
+       if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) {
+               pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int dmae_is_idle(struct sh_dmae_chan *sh_chan)
+{
+       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+       if (chcr & CHCR_DE) {
+               if (!(chcr & CHCR_TE))
+                       return -EBUSY; /* working */
+       }
+       return 0; /* waiting */
+}
+
+static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan)
+{
+       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+       return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT];
+}
+
+static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw)
+{
+       sh_dmae_writel(sh_chan, hw.sar, SAR);
+       sh_dmae_writel(sh_chan, hw.dar, DAR);
+       sh_dmae_writel(sh_chan,
+               (hw.tcr >> calc_xmit_shift(sh_chan)), TCR);
+}
+
+static void dmae_start(struct sh_dmae_chan *sh_chan)
+{
+       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+       chcr |= (CHCR_DE|CHCR_IE);
+       sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+static void dmae_halt(struct sh_dmae_chan *sh_chan)
+{
+       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+       chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE);
+       sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
+{
+       int ret = dmae_is_idle(sh_chan);
+       /* When DMA was working, can not set data to CHCR */
+       if (ret)
+               return ret;
+
+       sh_dmae_writel(sh_chan, val, CHCR);
+       return 0;
+}
+
+#define DMARS1_ADDR    0x04
+#define DMARS2_ADDR    0x08
+#define DMARS_SHIFT 8
+#define DMARS_CHAN_MSK 0x01
+static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
+{
+       u32 addr;
+       int shift = 0;
+       int ret = dmae_is_idle(sh_chan);
+       if (ret)
+               return ret;
+
+       if (sh_chan->id & DMARS_CHAN_MSK)
+               shift = DMARS_SHIFT;
+
+       switch (sh_chan->id) {
+       /* DMARS0 */
+       case 0:
+       case 1:
+               addr = SH_DMARS_BASE;
+               break;
+       /* DMARS1 */
+       case 2:
+       case 3:
+               addr = (SH_DMARS_BASE + DMARS1_ADDR);
+               break;
+       /* DMARS2 */
+       case 4:
+       case 5:
+               addr = (SH_DMARS_BASE + DMARS2_ADDR);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       ctrl_outw((val << shift) |
+               (ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)),
+               addr);
+
+       return 0;
+}
+
+static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+       struct sh_desc *desc = tx_to_sh_desc(tx);
+       struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan);
+       dma_cookie_t cookie;
+
+       spin_lock_bh(&sh_chan->desc_lock);
+
+       cookie = sh_chan->common.cookie;
+       cookie++;
+       if (cookie < 0)
+               cookie = 1;
+
+       /* If desc only in the case of 1 */
+       if (desc->async_tx.cookie != -EBUSY)
+               desc->async_tx.cookie = cookie;
+       sh_chan->common.cookie = desc->async_tx.cookie;
+
+       list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev);
+
+       spin_unlock_bh(&sh_chan->desc_lock);
+
+       return cookie;
+}
+
+static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
+{
+       struct sh_desc *desc, *_desc, *ret = NULL;
+
+       spin_lock_bh(&sh_chan->desc_lock);
+       list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) {
+               if (async_tx_test_ack(&desc->async_tx)) {
+                       list_del(&desc->node);
+                       ret = desc;
+                       break;
+               }
+       }
+       spin_unlock_bh(&sh_chan->desc_lock);
+
+       return ret;
+}
+
+static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc)
+{
+       if (desc) {
+               spin_lock_bh(&sh_chan->desc_lock);
+
+               list_splice_init(&desc->tx_list, &sh_chan->ld_free);
+               list_add(&desc->node, &sh_chan->ld_free);
+
+               spin_unlock_bh(&sh_chan->desc_lock);
+       }
+}
+
+static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
+{
+       struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+       struct sh_desc *desc;
+
+       spin_lock_bh(&sh_chan->desc_lock);
+       while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) {
+               spin_unlock_bh(&sh_chan->desc_lock);
+               desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL);
+               if (!desc) {
+                       spin_lock_bh(&sh_chan->desc_lock);
+                       break;
+               }
+               dma_async_tx_descriptor_init(&desc->async_tx,
+                                       &sh_chan->common);
+               desc->async_tx.tx_submit = sh_dmae_tx_submit;
+               desc->async_tx.flags = DMA_CTRL_ACK;
+               INIT_LIST_HEAD(&desc->tx_list);
+               sh_dmae_put_desc(sh_chan, desc);
+
+               spin_lock_bh(&sh_chan->desc_lock);
+               sh_chan->descs_allocated++;
+       }
+       spin_unlock_bh(&sh_chan->desc_lock);
+
+       return sh_chan->descs_allocated;
+}
+
+/*
+ * sh_dma_free_chan_resources - Free all resources of the channel.
+ */
+static void sh_dmae_free_chan_resources(struct dma_chan *chan)
+{
+       struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+       struct sh_desc *desc, *_desc;
+       LIST_HEAD(list);
+
+       BUG_ON(!list_empty(&sh_chan->ld_queue));
+       spin_lock_bh(&sh_chan->desc_lock);
+
+       list_splice_init(&sh_chan->ld_free, &list);
+       sh_chan->descs_allocated = 0;
+
+       spin_unlock_bh(&sh_chan->desc_lock);
+
+       list_for_each_entry_safe(desc, _desc, &list, node)
+               kfree(desc);
+}
+
+static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
+       struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+       size_t len, unsigned long flags)
+{
+       struct sh_dmae_chan *sh_chan;
+       struct sh_desc *first = NULL, *prev = NULL, *new;
+       size_t copy_size;
+
+       if (!chan)
+               return NULL;
+
+       if (!len)
+               return NULL;
+
+       sh_chan = to_sh_chan(chan);
+
+       do {
+               /* Allocate the link descriptor from DMA pool */
+               new = sh_dmae_get_desc(sh_chan);
+               if (!new) {
+                       dev_err(sh_chan->dev,
+                                       "No free memory for link descriptor\n");
+                       goto err_get_desc;
+               }
+
+               copy_size = min(len, (size_t)SH_DMA_TCR_MAX);
+
+               new->hw.sar = dma_src;
+               new->hw.dar = dma_dest;
+               new->hw.tcr = copy_size;
+               if (!first)
+                       first = new;
+
+               new->mark = DESC_NCOMP;
+               async_tx_ack(&new->async_tx);
+
+               prev = new;
+               len -= copy_size;
+               dma_src += copy_size;
+               dma_dest += copy_size;
+               /* Insert the link descriptor to the LD ring */
+               list_add_tail(&new->node, &first->tx_list);
+       } while (len);
+
+       new->async_tx.flags = flags; /* client is in control of this ack */
+       new->async_tx.cookie = -EBUSY; /* Last desc */
+
+       return &first->async_tx;
+
+err_get_desc:
+       sh_dmae_put_desc(sh_chan, first);
+       return NULL;
+
+}
+
+/*
+ * sh_chan_ld_cleanup - Clean up link descriptors
+ *
+ * This function clean up the ld_queue of DMA channel.
+ */
+static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan)
+{
+       struct sh_desc *desc, *_desc;
+
+       spin_lock_bh(&sh_chan->desc_lock);
+       list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) {
+               dma_async_tx_callback callback;
+               void *callback_param;
+
+               /* non send data */
+               if (desc->mark == DESC_NCOMP)
+                       break;
+
+               /* send data sesc */
+               callback = desc->async_tx.callback;
+               callback_param = desc->async_tx.callback_param;
+
+               /* Remove from ld_queue list */
+               list_splice_init(&desc->tx_list, &sh_chan->ld_free);
+
+               dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n",
+                               desc);
+
+               list_move(&desc->node, &sh_chan->ld_free);
+               /* Run the link descriptor callback function */
+               if (callback) {
+                       spin_unlock_bh(&sh_chan->desc_lock);
+                       dev_dbg(sh_chan->dev, "link descriptor %p callback\n",
+                                       desc);
+                       callback(callback_param);
+                       spin_lock_bh(&sh_chan->desc_lock);
+               }
+       }
+       spin_unlock_bh(&sh_chan->desc_lock);
+}
+
+static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan)
+{
+       struct list_head *ld_node;
+       struct sh_dmae_regs hw;
+
+       /* DMA work check */
+       if (dmae_is_idle(sh_chan))
+               return;
+
+       /* Find the first un-transfer desciptor */
+       for (ld_node = sh_chan->ld_queue.next;
+               (ld_node != &sh_chan->ld_queue)
+                       && (to_sh_desc(ld_node)->mark == DESC_COMP);
+               ld_node = ld_node->next)
+               cpu_relax();
+
+       if (ld_node != &sh_chan->ld_queue) {
+               /* Get the ld start address from ld_queue */
+               hw = to_sh_desc(ld_node)->hw;
+               dmae_set_reg(sh_chan, hw);
+               dmae_start(sh_chan);
+       }
+}
+
+static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan)
+{
+       struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+       sh_chan_xfer_ld_queue(sh_chan);
+}
+
+static enum dma_status sh_dmae_is_complete(struct dma_chan *chan,
+                                       dma_cookie_t cookie,
+                                       dma_cookie_t *done,
+                                       dma_cookie_t *used)
+{
+       struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+       dma_cookie_t last_used;
+       dma_cookie_t last_complete;
+
+       sh_dmae_chan_ld_cleanup(sh_chan);
+
+       last_used = chan->cookie;
+       last_complete = sh_chan->completed_cookie;
+       if (last_complete == -EBUSY)
+               last_complete = last_used;
+
+       if (done)
+               *done = last_complete;
+
+       if (used)
+               *used = last_used;
+
+       return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static irqreturn_t sh_dmae_interrupt(int irq, void *data)
+{
+       irqreturn_t ret = IRQ_NONE;
+       struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
+       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+       if (chcr & CHCR_TE) {
+               /* DMA stop */
+               dmae_halt(sh_chan);
+
+               ret = IRQ_HANDLED;
+               tasklet_schedule(&sh_chan->tasklet);
+       }
+
+       return ret;
+}
+
+#if defined(CONFIG_CPU_SH4)
+static irqreturn_t sh_dmae_err(int irq, void *data)
+{
+       int err = 0;
+       struct sh_dmae_device *shdev = (struct sh_dmae_device *)data;
+
+       /* IRQ Multi */
+       if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+               int cnt = 0;
+               switch (irq) {
+#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
+               case DMTE6_IRQ:
+                       cnt++;
+#endif
+               case DMTE0_IRQ:
+                       if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) {
+                               disable_irq(irq);
+                               return IRQ_HANDLED;
+                       }
+               default:
+                       return IRQ_NONE;
+               }
+       } else {
+               /* reset dma controller */
+               err = sh_dmae_rst(0);
+               if (err)
+                       return err;
+               if (shdev->pdata.mode & SHDMA_DMAOR1) {
+                       err = sh_dmae_rst(1);
+                       if (err)
+                               return err;
+               }
+               disable_irq(irq);
+               return IRQ_HANDLED;
+       }
+}
+#endif
+
+static void dmae_do_tasklet(unsigned long data)
+{
+       struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
+       struct sh_desc *desc, *_desc, *cur_desc = NULL;
+       u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
+       list_for_each_entry_safe(desc, _desc,
+                                       &sh_chan->ld_queue, node) {
+               if ((desc->hw.sar + desc->hw.tcr) == sar_buf) {
+                       cur_desc = desc;
+                       break;
+               }
+       }
+
+       if (cur_desc) {
+               switch (cur_desc->async_tx.cookie) {
+               case 0: /* other desc data */
+                       break;
+               case -EBUSY: /* last desc */
+               sh_chan->completed_cookie =
+                               cur_desc->async_tx.cookie;
+                       break;
+               default: /* first desc ( 0 < )*/
+                       sh_chan->completed_cookie =
+                               cur_desc->async_tx.cookie - 1;
+                       break;
+               }
+               cur_desc->mark = DESC_COMP;
+       }
+       /* Next desc */
+       sh_chan_xfer_ld_queue(sh_chan);
+       sh_dmae_chan_ld_cleanup(sh_chan);
+}
+
+static unsigned int get_dmae_irq(unsigned int id)
+{
+       unsigned int irq = 0;
+       if (id < ARRAY_SIZE(dmte_irq_map))
+               irq = dmte_irq_map[id];
+       return irq;
+}
+
+static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id)
+{
+       int err;
+       unsigned int irq = get_dmae_irq(id);
+       unsigned long irqflags = IRQF_DISABLED;
+       struct sh_dmae_chan *new_sh_chan;
+
+       /* alloc channel */
+       new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
+       if (!new_sh_chan) {
+               dev_err(shdev->common.dev, "No free memory for allocating "
+                               "dma channels!\n");
+               return -ENOMEM;
+       }
+
+       new_sh_chan->dev = shdev->common.dev;
+       new_sh_chan->id = id;
+
+       /* Init DMA tasklet */
+       tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet,
+                       (unsigned long)new_sh_chan);
+
+       /* Init the channel */
+       dmae_init(new_sh_chan);
+
+       spin_lock_init(&new_sh_chan->desc_lock);
+
+       /* Init descripter manage list */
+       INIT_LIST_HEAD(&new_sh_chan->ld_queue);
+       INIT_LIST_HEAD(&new_sh_chan->ld_free);
+
+       /* copy struct dma_device */
+       new_sh_chan->common.device = &shdev->common;
+
+       /* Add the channel to DMA device channel list */
+       list_add_tail(&new_sh_chan->common.device_node,
+                       &shdev->common.channels);
+       shdev->common.chancnt++;
+
+       if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+               irqflags = IRQF_SHARED;
+#if defined(DMTE6_IRQ)
+               if (irq >= DMTE6_IRQ)
+                       irq = DMTE6_IRQ;
+               else
+#endif
+                       irq = DMTE0_IRQ;
+       }
+
+       snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
+                       "sh-dmae%d", new_sh_chan->id);
+
+       /* set up channel irq */
+       err = request_irq(irq, &sh_dmae_interrupt,
+               irqflags, new_sh_chan->dev_id, new_sh_chan);
+       if (err) {
+               dev_err(shdev->common.dev, "DMA channel %d request_irq error "
+                       "with return %d\n", id, err);
+               goto err_no_irq;
+       }
+
+       /* CHCR register control function */
+       new_sh_chan->set_chcr = dmae_set_chcr;
+       /* DMARS register control function */
+       new_sh_chan->set_dmars = dmae_set_dmars;
+
+       shdev->chan[id] = new_sh_chan;
+       return 0;
+
+err_no_irq:
+       /* remove from dmaengine device node */
+       list_del(&new_sh_chan->common.device_node);
+       kfree(new_sh_chan);
+       return err;
+}
+
+static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
+{
+       int i;
+
+       for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) {
+               if (shdev->chan[i]) {
+                       struct sh_dmae_chan *shchan = shdev->chan[i];
+                       if (!(shdev->pdata.mode & SHDMA_MIX_IRQ))
+                               free_irq(dmte_irq_map[i], shchan);
+
+                       list_del(&shchan->common.device_node);
+                       kfree(shchan);
+                       shdev->chan[i] = NULL;
+               }
+       }
+       shdev->common.chancnt = 0;
+}
+
+static int __init sh_dmae_probe(struct platform_device *pdev)
+{
+       int err = 0, cnt, ecnt;
+       unsigned long irqflags = IRQF_DISABLED;
+#if defined(CONFIG_CPU_SH4)
+       int eirq[] = { DMAE0_IRQ,
+#if defined(DMAE1_IRQ)
+                       DMAE1_IRQ
+#endif
+               };
+#endif
+       struct sh_dmae_device *shdev;
+
+       shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
+       if (!shdev) {
+               dev_err(&pdev->dev, "No enough memory\n");
+               err = -ENOMEM;
+               goto shdev_err;
+       }
+
+       /* get platform data */
+       if (!pdev->dev.platform_data)
+               goto shdev_err;
+
+       /* platform data */
+       memcpy(&shdev->pdata, pdev->dev.platform_data,
+                       sizeof(struct sh_dmae_pdata));
+
+       /* reset dma controller */
+       err = sh_dmae_rst(0);
+       if (err)
+               goto rst_err;
+
+       /* SH7780/85/23 has DMAOR1 */
+       if (shdev->pdata.mode & SHDMA_DMAOR1) {
+               err = sh_dmae_rst(1);
+               if (err)
+                       goto rst_err;
+       }
+
+       INIT_LIST_HEAD(&shdev->common.channels);
+
+       dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
+       shdev->common.device_alloc_chan_resources
+               = sh_dmae_alloc_chan_resources;
+       shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources;
+       shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy;
+       shdev->common.device_is_tx_complete = sh_dmae_is_complete;
+       shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending;
+       shdev->common.dev = &pdev->dev;
+
+#if defined(CONFIG_CPU_SH4)
+       /* Non Mix IRQ mode SH7722/SH7730 etc... */
+       if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+               irqflags = IRQF_SHARED;
+               eirq[0] = DMTE0_IRQ;
+#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
+               eirq[1] = DMTE6_IRQ;
+#endif
+       }
+
+       for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) {
+               err = request_irq(eirq[ecnt], sh_dmae_err,
+                       irqflags, "DMAC Address Error", shdev);
+               if (err) {
+                       dev_err(&pdev->dev, "DMA device request_irq"
+                               "error (irq %d) with return %d\n",
+                               eirq[ecnt], err);
+                       goto eirq_err;
+               }
+       }
+#endif /* CONFIG_CPU_SH4 */
+
+       /* Create DMA Channel */
+       for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) {
+               err = sh_dmae_chan_probe(shdev, cnt);
+               if (err)
+                       goto chan_probe_err;
+       }
+
+       platform_set_drvdata(pdev, shdev);
+       dma_async_device_register(&shdev->common);
+
+       return err;
+
+chan_probe_err:
+       sh_dmae_chan_remove(shdev);
+
+eirq_err:
+       for (ecnt-- ; ecnt >= 0; ecnt--)
+               free_irq(eirq[ecnt], shdev);
+
+rst_err:
+       kfree(shdev);
+
+shdev_err:
+       return err;
+}
+
+static int __exit sh_dmae_remove(struct platform_device *pdev)
+{
+       struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+
+       dma_async_device_unregister(&shdev->common);
+
+       if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+               free_irq(DMTE0_IRQ, shdev);
+#if defined(DMTE6_IRQ)
+               free_irq(DMTE6_IRQ, shdev);
+#endif
+       }
+
+       /* channel data remove */
+       sh_dmae_chan_remove(shdev);
+
+       if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) {
+               free_irq(DMAE0_IRQ, shdev);
+#if defined(DMAE1_IRQ)
+               free_irq(DMAE1_IRQ, shdev);
+#endif
+       }
+       kfree(shdev);
+
+       return 0;
+}
+
+static void sh_dmae_shutdown(struct platform_device *pdev)
+{
+       struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+       sh_dmae_ctl_stop(0);
+       if (shdev->pdata.mode & SHDMA_DMAOR1)
+               sh_dmae_ctl_stop(1);
+}
+
+static struct platform_driver sh_dmae_driver = {
+       .remove         = __exit_p(sh_dmae_remove),
+       .shutdown       = sh_dmae_shutdown,
+       .driver = {
+               .name   = "sh-dma-engine",
+       },
+};
+
+static int __init sh_dmae_init(void)
+{
+       return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
+}
+module_init(sh_dmae_init);
+
+static void __exit sh_dmae_exit(void)
+{
+       platform_driver_unregister(&sh_dmae_driver);
+}
+module_exit(sh_dmae_exit);
+
+MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
+MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h

new file mode 100644 (file)

index 0000000..2b4bc15
--- /dev/null
+++ b/drivers/dma/shdma.h
@@ -0,0 +1,64 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#ifndef __DMA_SHDMA_H
+#define __DMA_SHDMA_H
+
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+
+#define SH_DMA_TCR_MAX 0x00FFFFFF      /* 16MB */
+
+struct sh_dmae_regs {
+       u32 sar; /* SAR / source address */
+       u32 dar; /* DAR / destination address */
+       u32 tcr; /* TCR / transfer count */
+};
+
+struct sh_desc {
+       struct list_head tx_list;
+       struct sh_dmae_regs hw;
+       struct list_head node;
+       struct dma_async_tx_descriptor async_tx;
+       int mark;
+};
+
+struct sh_dmae_chan {
+       dma_cookie_t completed_cookie;  /* The maximum cookie completed */
+       spinlock_t desc_lock;                   /* Descriptor operation lock */
+       struct list_head ld_queue;              /* Link descriptors queue */
+       struct list_head ld_free;               /* Link descriptors free */
+       struct dma_chan common;                 /* DMA common channel */
+       struct device *dev;                             /* Channel device */
+       struct tasklet_struct tasklet;  /* Tasklet */
+       int descs_allocated;                    /* desc count */
+       int id;                         /* Raw id of this channel */
+       char dev_id[16];        /* unique name per DMAC of channel */
+
+       /* Set chcr */
+       int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs);
+       /* Set DMA resource */
+       int (*set_dmars)(struct sh_dmae_chan *sh_chan, u16 res);
+};
+
+struct sh_dmae_device {
+       struct dma_device common;
+       struct sh_dmae_chan *chan[MAX_DMA_CHANNELS];
+       struct sh_dmae_pdata pdata;
+};
+
+#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common)
+#define to_sh_desc(lh) container_of(lh, struct sh_desc, node)
+#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx)
+
+#endif /* __DMA_SHDMA_H */
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c

index 7837930146a4fb2d229ce28dcf56675084a5783a..fb6bb64e88619a729ab9b2cd1fa4574fd29be16e 100644 (file)
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -180,9 +180,8 @@ static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc)
  
  static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc)
  {
-       if (!list_empty(&desc->txd.tx_list))
-               desc = list_entry(desc->txd.tx_list.prev,
-                                 struct txx9dmac_desc, desc_node);
+       if (!list_empty(&desc->tx_list))
+               desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node);
         return desc;
  }
  
@@ -197,6 +196,7 @@ static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc,
         desc = kzalloc(sizeof(*desc), flags);
         if (!desc)
                 return NULL;
+       INIT_LIST_HEAD(&desc->tx_list);
         dma_async_tx_descriptor_init(&desc->txd, &dc->chan);
         desc->txd.tx_submit = txx9dmac_tx_submit;
         /* txd.flags will be overwritten in prep funcs */
@@ -245,7 +245,7 @@ static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc,
         struct txx9dmac_dev *ddev = dc->ddev;
         struct txx9dmac_desc *child;
  
-       list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+       list_for_each_entry(child, &desc->tx_list, desc_node)
                 dma_sync_single_for_cpu(chan2parent(&dc->chan),
                                 child->txd.phys, ddev->descsize,
                                 DMA_TO_DEVICE);
@@ -267,11 +267,11 @@ static void txx9dmac_desc_put(struct txx9dmac_chan *dc,
                 txx9dmac_sync_desc_for_cpu(dc, desc);
  
                 spin_lock_bh(&dc->lock);
-               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               list_for_each_entry(child, &desc->tx_list, desc_node)
                         dev_vdbg(chan2dev(&dc->chan),
                                  "moving child desc %p to freelist\n",
                                  child);
-               list_splice_init(&desc->txd.tx_list, &dc->free_list);
+               list_splice_init(&desc->tx_list, &dc->free_list);
                 dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n",
                          desc);
                 list_add(&desc->desc_node, &dc->free_list);
@@ -429,7 +429,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
         param = txd->callback_param;
  
         txx9dmac_sync_desc_for_cpu(dc, desc);
-       list_splice_init(&txd->tx_list, &dc->free_list);
+       list_splice_init(&desc->tx_list, &dc->free_list);
         list_move(&desc->desc_node, &dc->free_list);
  
         if (!ds) {
@@ -571,7 +571,7 @@ static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr)
                  "Bad descriptor submitted for DMA! (cookie: %d)\n",
                  bad_desc->txd.cookie);
         txx9dmac_dump_desc(dc, &bad_desc->hwdesc);
-       list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+       list_for_each_entry(child, &bad_desc->tx_list, desc_node)
                 txx9dmac_dump_desc(dc, &child->hwdesc);
         /* Pretend the descriptor completed successfully */
         txx9dmac_descriptor_complete(dc, bad_desc);
@@ -613,7 +613,7 @@ static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc)
                         return;
                 }
  
-               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               list_for_each_entry(child, &desc->tx_list, desc_node)
                         if (desc_read_CHAR(dc, child) == chain) {
                                 /* Currently in progress */
                                 if (csr & TXX9_DMA_CSR_ABCHC)
@@ -823,8 +823,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
                         dma_sync_single_for_device(chan2parent(&dc->chan),
                                         prev->txd.phys, ddev->descsize,
                                         DMA_TO_DEVICE);
-                       list_add_tail(&desc->desc_node,
-                                       &first->txd.tx_list);
+                       list_add_tail(&desc->desc_node, &first->tx_list);
                 }
                 prev = desc;
         }
@@ -919,8 +918,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                                         prev->txd.phys,
                                         ddev->descsize,
                                         DMA_TO_DEVICE);
-                       list_add_tail(&desc->desc_node,
-                                       &first->txd.tx_list);
+                       list_add_tail(&desc->desc_node, &first->tx_list);
                 }
                 prev = desc;
         }
diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h

index c907ff01d27603a06539e8ac89ec71651f86de34..365d42366b9f15e237833ace8530e1f4e78e1a21 100644 (file)
--- a/drivers/dma/txx9dmac.h
+++ b/drivers/dma/txx9dmac.h
@@ -231,6 +231,7 @@ struct txx9dmac_desc {
  
         /* THEN values for driver housekeeping */
         struct list_head                desc_node ____cacheline_aligned;
+       struct list_head                tx_list;
         struct dma_async_tx_descriptor  txd;
         size_t                          len;
  };
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig

index a3ca18e2d7cfe76d285cb5aa728ff9a709def563..02127e59fe8e68f8f3518453028d47b84988e598 100644 (file)
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -133,6 +133,13 @@ config EDAC_I3000
           Support for error detection and correction on the Intel
           3000 and 3010 server chipsets.
  
+config EDAC_I3200
+       tristate "Intel 3200"
+       depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL
+       help
+         Support for error detection and correction on the Intel
+         3200 and 3210 server chipsets.
+
  config EDAC_X38
         tristate "Intel X38"
         depends on EDAC_MM_EDAC && PCI && X86
@@ -176,11 +183,11 @@ config EDAC_I5100
           San Clemente MCH.
  
  config EDAC_MPC85XX
-       tristate "Freescale MPC85xx"
-       depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx
+       tristate "Freescale MPC83xx / MPC85xx"
+       depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || MPC85xx)
         help
           Support for error detection and correction on the Freescale
-         MPC8560, MPC8540, MPC8548
+         MPC8349, MPC8560, MPC8540, MPC8548
  
  config EDAC_MV64X60
         tristate "Marvell MV64x60"
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile

index cfa033ce53a7d3b2cfbaeb7f77d76fd902a610a0..7a473bbe8abd77bfab7784e2b707ca627a8cb798 100644 (file)
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_EDAC_I82443BXGX)         += i82443bxgx_edac.o
  obj-$(CONFIG_EDAC_I82875P)             += i82875p_edac.o
  obj-$(CONFIG_EDAC_I82975X)             += i82975x_edac.o
  obj-$(CONFIG_EDAC_I3000)               += i3000_edac.o
+obj-$(CONFIG_EDAC_I3200)               += i3200_edac.o
  obj-$(CONFIG_EDAC_X38)                 += x38_edac.o
  obj-$(CONFIG_EDAC_I82860)              += i82860_edac.o
  obj-$(CONFIG_EDAC_R82600)              += r82600_edac.o
@@ -49,3 +50,4 @@ obj-$(CONFIG_EDAC_CELL)                       += cell_edac.o
  obj-$(CONFIG_EDAC_PPC4XX)              += ppc4xx_edac.o
  obj-$(CONFIG_EDAC_AMD8111)             += amd8111_edac.o
  obj-$(CONFIG_EDAC_AMD8131)             += amd8131_edac.o
+
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c

index 8c54196b5aba90011c5558ab312c7a19a2c54486..3d50274f1348d97b6e3cc4bc9d9f071e73ebb2ec 100644 (file)
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -885,14 +885,14 @@ static int __devinit cpc925_probe(struct platform_device *pdev)
  
         if (!devm_request_mem_region(&pdev->dev,
                                      r->start,
-                                    r->end - r->start + 1,
+                                    resource_size(r),
                                      pdev->name)) {
                 cpc925_printk(KERN_ERR, "Unable to request mem region\n");
                 res = -EBUSY;
                 goto err1;
         }
  
-       vbase = devm_ioremap(&pdev->dev, r->start, r->end - r->start + 1);
+       vbase = devm_ioremap(&pdev->dev, r->start, resource_size(r));
         if (!vbase) {
                 cpc925_printk(KERN_ERR, "Unable to ioremap device\n");
                 res = -ENOMEM;
@@ -953,7 +953,7 @@ err3:
         cpc925_mc_exit(mci);
         edac_mc_free(mci);
  err2:
-       devm_release_mem_region(&pdev->dev, r->start, r->end-r->start+1);
+       devm_release_mem_region(&pdev->dev, r->start, resource_size(r));
  err1:
         devres_release_group(&pdev->dev, cpc925_probe);
  out:
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c

index b02a6a69a8f0148eebbaa19526a75e89890973d9..d5e13c94714f13bca93ae94927b55b144be9f10b 100644 (file)
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -356,7 +356,6 @@ static void complete_edac_device_list_del(struct rcu_head *head)
  
         edac_dev = container_of(head, struct edac_device_ctl_info, rcu);
         INIT_LIST_HEAD(&edac_dev->link);
-       complete(&edac_dev->removal_complete);
  }
  
  /*
@@ -369,10 +368,8 @@ static void del_edac_device_from_global_list(struct edac_device_ctl_info
                                                 *edac_device)
  {
         list_del_rcu(&edac_device->link);
-
-       init_completion(&edac_device->removal_complete);
         call_rcu(&edac_device->rcu, complete_edac_device_list_del);
-       wait_for_completion(&edac_device->removal_complete);
+       rcu_barrier();
  }
  
  /*
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c

index 335b7ebdb11c535481f785e11add07aef509177b..b629c41756f0324b348827cb1e1975d1cdefe7c9 100644 (file)
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -418,16 +418,14 @@ static void complete_mc_list_del(struct rcu_head *head)
  
         mci = container_of(head, struct mem_ctl_info, rcu);
         INIT_LIST_HEAD(&mci->link);
-       complete(&mci->complete);
  }
  
  static void del_mc_from_global_list(struct mem_ctl_info *mci)
  {
         atomic_dec(&edac_handlers);
         list_del_rcu(&mci->link);
-       init_completion(&mci->complete);
         call_rcu(&mci->rcu, complete_mc_list_del);
-       wait_for_completion(&mci->complete);
+       rcu_barrier();
  }
  
  /**
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c

index 30b585b1d60bdcf50b433a93d948cc6b30e88f0c..efb5d565078304c60ea096b84bf1ae2b9761e4a4 100644 (file)
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -174,7 +174,6 @@ static void complete_edac_pci_list_del(struct rcu_head *head)
  
         pci = container_of(head, struct edac_pci_ctl_info, rcu);
         INIT_LIST_HEAD(&pci->link);
-       complete(&pci->complete);
  }
  
  /*
@@ -185,9 +184,8 @@ static void complete_edac_pci_list_del(struct rcu_head *head)
  static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci)
  {
         list_del_rcu(&pci->link);
-       init_completion(&pci->complete);
         call_rcu(&pci->rcu, complete_edac_pci_list_del);
-       wait_for_completion(&pci->complete);
+       rcu_barrier();
  }
  
  #if 0
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c

new file mode 100644 (file)

index 0000000..fde4db9
--- /dev/null
+++ b/drivers/edac/i3200_edac.c
@@ -0,0 +1,527 @@
+/*
+ * Intel 3200/3210 Memory Controller kernel module
+ * Copyright (C) 2008-2009 Akamai Technologies, Inc.
+ * Portions by Hitoshi Mitake <h.mitake@gmail.com>.
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/edac.h>
+#include <linux/io.h>
+#include "edac_core.h"
+
+#define I3200_REVISION        "1.1"
+
+#define EDAC_MOD_STR        "i3200_edac"
+
+#define PCI_DEVICE_ID_INTEL_3200_HB    0x29f0
+
+#define I3200_RANKS            8
+#define I3200_RANKS_PER_CHANNEL        4
+#define I3200_CHANNELS         2
+
+/* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */
+
+#define I3200_MCHBAR_LOW       0x48    /* MCH Memory Mapped Register BAR */
+#define I3200_MCHBAR_HIGH      0x4c
+#define I3200_MCHBAR_MASK      0xfffffc000ULL  /* bits 35:14 */
+#define I3200_MMR_WINDOW_SIZE  16384
+
+#define I3200_TOM              0xa0    /* Top of Memory (16b)
+                *
+                * 15:10 reserved
+                *  9:0  total populated physical memory
+                */
+#define I3200_TOM_MASK         0x3ff   /* bits 9:0 */
+#define I3200_TOM_SHIFT                26      /* 64MiB grain */
+
+#define I3200_ERRSTS           0xc8    /* Error Status Register (16b)
+                *
+                * 15    reserved
+                * 14    Isochronous TBWRR Run Behind FIFO Full
+                *       (ITCV)
+                * 13    Isochronous TBWRR Run Behind FIFO Put
+                *       (ITSTV)
+                * 12    reserved
+                * 11    MCH Thermal Sensor Event
+                *       for SMI/SCI/SERR (GTSE)
+                * 10    reserved
+                *  9    LOCK to non-DRAM Memory Flag (LCKF)
+                *  8    reserved
+                *  7    DRAM Throttle Flag (DTF)
+                *  6:2  reserved
+                *  1    Multi-bit DRAM ECC Error Flag (DMERR)
+                *  0    Single-bit DRAM ECC Error Flag (DSERR)
+                */
+#define I3200_ERRSTS_UE                0x0002
+#define I3200_ERRSTS_CE                0x0001
+#define I3200_ERRSTS_BITS      (I3200_ERRSTS_UE | I3200_ERRSTS_CE)
+
+
+/* Intel  MMIO register space - device 0 function 0 - MMR space */
+
+#define I3200_C0DRB    0x200   /* Channel 0 DRAM Rank Boundary (16b x 4)
+                *
+                * 15:10 reserved
+                *  9:0  Channel 0 DRAM Rank Boundary Address
+                */
+#define I3200_C1DRB    0x600   /* Channel 1 DRAM Rank Boundary (16b x 4) */
+#define I3200_DRB_MASK 0x3ff   /* bits 9:0 */
+#define I3200_DRB_SHIFT        26      /* 64MiB grain */
+
+#define I3200_C0ECCERRLOG      0x280   /* Channel 0 ECC Error Log (64b)
+                *
+                * 63:48 Error Column Address (ERRCOL)
+                * 47:32 Error Row Address (ERRROW)
+                * 31:29 Error Bank Address (ERRBANK)
+                * 28:27 Error Rank Address (ERRRANK)
+                * 26:24 reserved
+                * 23:16 Error Syndrome (ERRSYND)
+                * 15: 2 reserved
+                *    1  Multiple Bit Error Status (MERRSTS)
+                *    0  Correctable Error Status (CERRSTS)
+                */
+#define I3200_C1ECCERRLOG              0x680   /* Chan 1 ECC Error Log (64b) */
+#define I3200_ECCERRLOG_CE             0x1
+#define I3200_ECCERRLOG_UE             0x2
+#define I3200_ECCERRLOG_RANK_BITS      0x18000000
+#define I3200_ECCERRLOG_RANK_SHIFT     27
+#define I3200_ECCERRLOG_SYNDROME_BITS  0xff0000
+#define I3200_ECCERRLOG_SYNDROME_SHIFT 16
+#define I3200_CAPID0                   0xe0    /* P.95 of spec for details */
+
+struct i3200_priv {
+       void __iomem *window;
+};
+
+static int nr_channels;
+
+static int how_many_channels(struct pci_dev *pdev)
+{
+       unsigned char capid0_8b; /* 8th byte of CAPID0 */
+
+       pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
+       if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
+               debugf0("In single channel mode.\n");
+               return 1;
+       } else {
+               debugf0("In dual channel mode.\n");
+               return 2;
+       }
+}
+
+static unsigned long eccerrlog_syndrome(u64 log)
+{
+       return (log & I3200_ECCERRLOG_SYNDROME_BITS) >>
+               I3200_ECCERRLOG_SYNDROME_SHIFT;
+}
+
+static int eccerrlog_row(int channel, u64 log)
+{
+       u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >>
+               I3200_ECCERRLOG_RANK_SHIFT);
+       return rank | (channel * I3200_RANKS_PER_CHANNEL);
+}
+
+enum i3200_chips {
+       I3200 = 0,
+};
+
+struct i3200_dev_info {
+       const char *ctl_name;
+};
+
+struct i3200_error_info {
+       u16 errsts;
+       u16 errsts2;
+       u64 eccerrlog[I3200_CHANNELS];
+};
+
+static const struct i3200_dev_info i3200_devs[] = {
+       [I3200] = {
+               .ctl_name = "i3200"
+       },
+};
+
+static struct pci_dev *mci_pdev;
+static int i3200_registered = 1;
+
+
+static void i3200_clear_error_info(struct mem_ctl_info *mci)
+{
+       struct pci_dev *pdev;
+
+       pdev = to_pci_dev(mci->dev);
+
+       /*
+        * Clear any error bits.
+        * (Yes, we really clear bits by writing 1 to them.)
+        */
+       pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS,
+               I3200_ERRSTS_BITS);
+}
+
+static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci,
+               struct i3200_error_info *info)
+{
+       struct pci_dev *pdev;
+       struct i3200_priv *priv = mci->pvt_info;
+       void __iomem *window = priv->window;
+
+       pdev = to_pci_dev(mci->dev);
+
+       /*
+        * This is a mess because there is no atomic way to read all the
+        * registers at once and the registers can transition from CE being
+        * overwritten by UE.
+        */
+       pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts);
+       if (!(info->errsts & I3200_ERRSTS_BITS))
+               return;
+
+       info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
+       if (nr_channels == 2)
+               info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
+
+       pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2);
+
+       /*
+        * If the error is the same for both reads then the first set
+        * of reads is valid.  If there is a change then there is a CE
+        * with no info and the second set of reads is valid and
+        * should be UE info.
+        */
+       if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
+               info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
+               if (nr_channels == 2)
+                       info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
+       }
+
+       i3200_clear_error_info(mci);
+}
+
+static void i3200_process_error_info(struct mem_ctl_info *mci,
+               struct i3200_error_info *info)
+{
+       int channel;
+       u64 log;
+
+       if (!(info->errsts & I3200_ERRSTS_BITS))
+               return;
+
+       if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
+               edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+               info->errsts = info->errsts2;
+       }
+
+       for (channel = 0; channel < nr_channels; channel++) {
+               log = info->eccerrlog[channel];
+               if (log & I3200_ECCERRLOG_UE) {
+                       edac_mc_handle_ue(mci, 0, 0,
+                               eccerrlog_row(channel, log),
+                               "i3200 UE");
+               } else if (log & I3200_ECCERRLOG_CE) {
+                       edac_mc_handle_ce(mci, 0, 0,
+                               eccerrlog_syndrome(log),
+                               eccerrlog_row(channel, log), 0,
+                               "i3200 CE");
+               }
+       }
+}
+
+static void i3200_check(struct mem_ctl_info *mci)
+{
+       struct i3200_error_info info;
+
+       debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
+       i3200_get_and_clear_error_info(mci, &info);
+       i3200_process_error_info(mci, &info);
+}
+
+
+void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
+{
+       union {
+               u64 mchbar;
+               struct {
+                       u32 mchbar_low;
+                       u32 mchbar_high;
+               };
+       } u;
+       void __iomem *window;
+
+       pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low);
+       pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high);
+       u.mchbar &= I3200_MCHBAR_MASK;
+
+       if (u.mchbar != (resource_size_t)u.mchbar) {
+               printk(KERN_ERR
+                       "i3200: mmio space beyond accessible range (0x%llx)\n",
+                       (unsigned long long)u.mchbar);
+               return NULL;
+       }
+
+       window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE);
+       if (!window)
+               printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n",
+                       (unsigned long long)u.mchbar);
+
+       return window;
+}
+
+
+static void i3200_get_drbs(void __iomem *window,
+       u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
+{
+       int i;
+
+       for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
+               drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
+               drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
+       }
+}
+
+static bool i3200_is_stacked(struct pci_dev *pdev,
+       u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
+{
+       u16 tom;
+
+       pci_read_config_word(pdev, I3200_TOM, &tom);
+       tom &= I3200_TOM_MASK;
+
+       return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom;
+}
+
+static unsigned long drb_to_nr_pages(
+       u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked,
+       int channel, int rank)
+{
+       int n;
+
+       n = drbs[channel][rank];
+       if (rank > 0)
+               n -= drbs[channel][rank - 1];
+       if (stacked && (channel == 1) &&
+       drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1])
+               n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1];
+
+       n <<= (I3200_DRB_SHIFT - PAGE_SHIFT);
+       return n;
+}
+
+static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
+{
+       int rc;
+       int i;
+       struct mem_ctl_info *mci = NULL;
+       unsigned long last_page;
+       u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
+       bool stacked;
+       void __iomem *window;
+       struct i3200_priv *priv;
+
+       debugf0("MC: %s()\n", __func__);
+
+       window = i3200_map_mchbar(pdev);
+       if (!window)
+               return -ENODEV;
+
+       i3200_get_drbs(window, drbs);
+       nr_channels = how_many_channels(pdev);
+
+       mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS,
+               nr_channels, 0);
+       if (!mci)
+               return -ENOMEM;
+
+       debugf3("MC: %s(): init mci\n", __func__);
+
+       mci->dev = &pdev->dev;
+       mci->mtype_cap = MEM_FLAG_DDR2;
+
+       mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+       mci->edac_cap = EDAC_FLAG_SECDED;
+
+       mci->mod_name = EDAC_MOD_STR;
+       mci->mod_ver = I3200_REVISION;
+       mci->ctl_name = i3200_devs[dev_idx].ctl_name;
+       mci->dev_name = pci_name(pdev);
+       mci->edac_check = i3200_check;
+       mci->ctl_page_to_phys = NULL;
+       priv = mci->pvt_info;
+       priv->window = window;
+
+       stacked = i3200_is_stacked(pdev, drbs);
+
+       /*
+        * The dram rank boundary (DRB) reg values are boundary addresses
+        * for each DRAM rank with a granularity of 64MB.  DRB regs are
+        * cumulative; the last one will contain the total memory
+        * contained in all ranks.
+        */
+       last_page = -1UL;
+       for (i = 0; i < mci->nr_csrows; i++) {
+               unsigned long nr_pages;
+               struct csrow_info *csrow = &mci->csrows[i];
+
+               nr_pages = drb_to_nr_pages(drbs, stacked,
+                       i / I3200_RANKS_PER_CHANNEL,
+                       i % I3200_RANKS_PER_CHANNEL);
+
+               if (nr_pages == 0) {
+                       csrow->mtype = MEM_EMPTY;
+                       continue;
+               }
+
+               csrow->first_page = last_page + 1;
+               last_page += nr_pages;
+               csrow->last_page = last_page;
+               csrow->nr_pages = nr_pages;
+
+               csrow->grain = nr_pages << PAGE_SHIFT;
+               csrow->mtype = MEM_DDR2;
+               csrow->dtype = DEV_UNKNOWN;
+               csrow->edac_mode = EDAC_UNKNOWN;
+       }
+
+       i3200_clear_error_info(mci);
+
+       rc = -ENODEV;
+       if (edac_mc_add_mc(mci)) {
+               debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__);
+               goto fail;
+       }
+
+       /* get this far and it's successful */
+       debugf3("MC: %s(): success\n", __func__);
+       return 0;
+
+fail:
+       iounmap(window);
+       if (mci)
+               edac_mc_free(mci);
+
+       return rc;
+}
+
+static int __devinit i3200_init_one(struct pci_dev *pdev,
+               const struct pci_device_id *ent)
+{
+       int rc;
+
+       debugf0("MC: %s()\n", __func__);
+
+       if (pci_enable_device(pdev) < 0)
+               return -EIO;
+
+       rc = i3200_probe1(pdev, ent->driver_data);
+       if (!mci_pdev)
+               mci_pdev = pci_dev_get(pdev);
+
+       return rc;
+}
+
+static void __devexit i3200_remove_one(struct pci_dev *pdev)
+{
+       struct mem_ctl_info *mci;
+       struct i3200_priv *priv;
+
+       debugf0("%s()\n", __func__);
+
+       mci = edac_mc_del_mc(&pdev->dev);
+       if (!mci)
+               return;
+
+       priv = mci->pvt_info;
+       iounmap(priv->window);
+
+       edac_mc_free(mci);
+}
+
+static const struct pci_device_id i3200_pci_tbl[] __devinitdata = {
+       {
+               PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               I3200},
+       {
+               0,
+       }            /* 0 terminated list. */
+};
+
+MODULE_DEVICE_TABLE(pci, i3200_pci_tbl);
+
+static struct pci_driver i3200_driver = {
+       .name = EDAC_MOD_STR,
+       .probe = i3200_init_one,
+       .remove = __devexit_p(i3200_remove_one),
+       .id_table = i3200_pci_tbl,
+};
+
+static int __init i3200_init(void)
+{
+       int pci_rc;
+
+       debugf3("MC: %s()\n", __func__);
+
+       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
+       opstate_init();
+
+       pci_rc = pci_register_driver(&i3200_driver);
+       if (pci_rc < 0)
+               goto fail0;
+
+       if (!mci_pdev) {
+               i3200_registered = 0;
+               mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+                               PCI_DEVICE_ID_INTEL_3200_HB, NULL);
+               if (!mci_pdev) {
+                       debugf0("i3200 pci_get_device fail\n");
+                       pci_rc = -ENODEV;
+                       goto fail1;
+               }
+
+               pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl);
+               if (pci_rc < 0) {
+                       debugf0("i3200 init fail\n");
+                       pci_rc = -ENODEV;
+                       goto fail1;
+               }
+       }
+
+       return 0;
+
+fail1:
+       pci_unregister_driver(&i3200_driver);
+
+fail0:
+       if (mci_pdev)
+               pci_dev_put(mci_pdev);
+
+       return pci_rc;
+}
+
+static void __exit i3200_exit(void)
+{
+       debugf3("MC: %s()\n", __func__);
+
+       pci_unregister_driver(&i3200_driver);
+       if (!i3200_registered) {
+               i3200_remove_one(mci_pdev);
+               pci_dev_put(mci_pdev);
+       }
+}
+
+module_init(i3200_init);
+module_exit(i3200_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Akamai Technologies, Inc.");
+MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers");
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c

index 3f2ccfc6407c298be7be4260e5b09512590e0dfe..157f6504f25ea14bee970b909f2f04856ffe5395 100644 (file)
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -41,7 +41,9 @@ static u32 orig_pci_err_en;
  #endif
  
  static u32 orig_l2_err_disable;
+#ifdef CONFIG_MPC85xx
  static u32 orig_hid1[2];
+#endif
  
  /************************ MC SYSFS parts ***********************************/
  
@@ -646,6 +648,7 @@ static struct of_device_id mpc85xx_l2_err_of_match[] = {
         { .compatible = "fsl,mpc8560-l2-cache-controller", },
         { .compatible = "fsl,mpc8568-l2-cache-controller", },
         { .compatible = "fsl,mpc8572-l2-cache-controller", },
+       { .compatible = "fsl,p2020-l2-cache-controller", },
         {},
  };
  
@@ -788,19 +791,20 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
                 csrow = &mci->csrows[index];
                 cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 +
                                   (index * MPC85XX_MC_CS_BNDS_OFS));
-               start = (cs_bnds & 0xfff0000) << 4;
-               end = ((cs_bnds & 0xfff) << 20);
-               if (start)
-                       start |= 0xfffff;
-               if (end)
-                       end |= 0xfffff;
+
+               start = (cs_bnds & 0xffff0000) >> 16;
+               end   = (cs_bnds & 0x0000ffff);
  
                 if (start == end)
                         continue;       /* not populated */
  
+               start <<= (24 - PAGE_SHIFT);
+               end   <<= (24 - PAGE_SHIFT);
+               end    |= (1 << (24 - PAGE_SHIFT)) - 1;
+
                 csrow->first_page = start >> PAGE_SHIFT;
                 csrow->last_page = end >> PAGE_SHIFT;
-               csrow->nr_pages = csrow->last_page + 1 - csrow->first_page;
+               csrow->nr_pages = end + 1 - start;
                 csrow->grain = 8;
                 csrow->mtype = mtype;
                 csrow->dtype = DEV_UNKNOWN;
@@ -984,6 +988,8 @@ static struct of_device_id mpc85xx_mc_err_of_match[] = {
         { .compatible = "fsl,mpc8560-memory-controller", },
         { .compatible = "fsl,mpc8568-memory-controller", },
         { .compatible = "fsl,mpc8572-memory-controller", },
+       { .compatible = "fsl,mpc8349-memory-controller", },
+       { .compatible = "fsl,p2020-memory-controller", },
         {},
  };
  
@@ -999,13 +1005,13 @@ static struct of_platform_driver mpc85xx_mc_err_driver = {
                    },
  };
  
-
+#ifdef CONFIG_MPC85xx
  static void __init mpc85xx_mc_clear_rfxe(void *data)
  {
         orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1);
         mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~0x20000));
  }
-
+#endif
  
  static int __init mpc85xx_mc_init(void)
  {
@@ -1038,26 +1044,32 @@ static int __init mpc85xx_mc_init(void)
                 printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
  #endif
  
+#ifdef CONFIG_MPC85xx
         /*
          * need to clear HID1[RFXE] to disable machine check int
          * so we can catch it
          */
         if (edac_op_state == EDAC_OPSTATE_INT)
                 on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0);
+#endif
  
         return 0;
  }
  
  module_init(mpc85xx_mc_init);
  
+#ifdef CONFIG_MPC85xx
  static void __exit mpc85xx_mc_restore_hid1(void *data)
  {
         mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]);
  }
+#endif
  
  static void __exit mpc85xx_mc_exit(void)
  {
+#ifdef CONFIG_MPC85xx
         on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0);
+#endif
  #ifdef CONFIG_PCI
         of_unregister_platform_driver(&mpc85xx_pci_err_driver);
  #endif
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c

index 5131aaae8e03787d0c3e237f3cbac85b77453880..a6b9fec13a74cd4a8afb8a10a89c55e240a7fa86 100644 (file)
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -90,7 +90,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev)
                 return -ENOENT;
         }
  
-       pci_serr = ioremap(r->start, r->end - r->start + 1);
+       pci_serr = ioremap(r->start, resource_size(r));
         if (!pci_serr)
                 return -ENOMEM;
  
@@ -140,7 +140,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
  
         if (!devm_request_mem_region(&pdev->dev,
                                      r->start,
-                                    r->end - r->start + 1,
+                                    resource_size(r),
                                      pdata->name)) {
                 printk(KERN_ERR "%s: Error while requesting mem region\n",
                        __func__);
@@ -150,7 +150,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
  
         pdata->pci_vbase = devm_ioremap(&pdev->dev,
                                         r->start,
-                                       r->end - r->start + 1);
+                                       resource_size(r));
         if (!pdata->pci_vbase) {
                 printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__);
                 res = -ENOMEM;
@@ -306,7 +306,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev)
  
         if (!devm_request_mem_region(&pdev->dev,
                                      r->start,
-                                    r->end - r->start + 1,
+                                    resource_size(r),
                                      pdata->name)) {
                 printk(KERN_ERR "%s: Error while request mem region\n",
                        __func__);
@@ -316,7 +316,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev)
  
         pdata->sram_vbase = devm_ioremap(&pdev->dev,
                                          r->start,
-                                        r->end - r->start + 1);
+                                        resource_size(r));
         if (!pdata->sram_vbase) {
                 printk(KERN_ERR "%s: Unable to setup SRAM err regs\n",
                        __func__);
@@ -474,7 +474,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
  
         if (!devm_request_mem_region(&pdev->dev,
                                      r->start,
-                                    r->end - r->start + 1,
+                                    resource_size(r),
                                      pdata->name)) {
                 printk(KERN_ERR "%s: Error while requesting mem region\n",
                        __func__);
@@ -484,7 +484,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
  
         pdata->cpu_vbase[0] = devm_ioremap(&pdev->dev,
                                            r->start,
-                                          r->end - r->start + 1);
+                                          resource_size(r));
         if (!pdata->cpu_vbase[0]) {
                 printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__);
                 res = -ENOMEM;
@@ -501,7 +501,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
  
         if (!devm_request_mem_region(&pdev->dev,
                                      r->start,
-                                    r->end - r->start + 1,
+                                    resource_size(r),
                                      pdata->name)) {
                 printk(KERN_ERR "%s: Error while requesting mem region\n",
                        __func__);
@@ -511,7 +511,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
  
         pdata->cpu_vbase[1] = devm_ioremap(&pdev->dev,
                                            r->start,
-                                          r->end - r->start + 1);
+                                          resource_size(r));
         if (!pdata->cpu_vbase[1]) {
                 printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__);
                 res = -ENOMEM;
@@ -726,7 +726,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
  
         if (!devm_request_mem_region(&pdev->dev,
                                      r->start,
-                                    r->end - r->start + 1,
+                                    resource_size(r),
                                      pdata->name)) {
                 printk(KERN_ERR "%s: Error while requesting mem region\n",
                        __func__);
@@ -736,7 +736,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
  
         pdata->mc_vbase = devm_ioremap(&pdev->dev,
                                        r->start,
-                                      r->end - r->start + 1);
+                                      resource_size(r));
         if (!pdata->mc_vbase) {
                 printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__);
                 res = -ENOMEM;
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig

index e4d971c8b9d0bebe015a80b975a05a5a9c0a9a6d..f831ea15929169af0f825343ec366974e9738932 100644 (file)
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -102,6 +102,7 @@ config DRM_I915
         select BACKLIGHT_CLASS_DEVICE if ACPI
         select INPUT if ACPI
         select ACPI_VIDEO if ACPI
+       select ACPI_BUTTON if ACPI
         help
           Choose this option if you have a system that has Intel 830M, 845G,
           852GM, 855GM 865G or 915G integrated graphics.  If M is selected, the
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c

index 230c9ffdd5e999677371654be083f64191038da3..80391995bdec05f07e809e33651466379c7823b3 100644 (file)
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -142,6 +142,19 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size)
         if (IS_ERR(obj->filp))
                 goto free;
  
+       /* Basically we want to disable the OOM killer and handle ENOMEM
+        * ourselves by sacrificing pages from cached buffers.
+        * XXX shmem_file_[gs]et_gfp_mask()
+        */
+       mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping,
+                            GFP_HIGHUSER |
+                            __GFP_COLD |
+                            __GFP_FS |
+                            __GFP_RECLAIMABLE |
+                            __GFP_NORETRY |
+                            __GFP_NOWARN |
+                            __GFP_NOMEMALLOC);
+
         kref_init(&obj->refcount);
         kref_init(&obj->handlecount);
         obj->size = size;
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile

index 5269dfa5f6205d024f153016d2e8a872116e77a1..fa7b9be096bc8b25ea17bca96dee8447828bf191 100644 (file)
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -9,6 +9,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
           i915_gem.o \
           i915_gem_debug.o \
           i915_gem_tiling.o \
+         i915_trace_points.o \
           intel_display.o \
           intel_crt.o \
           intel_lvds.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c

index 1e3bdcee863c65166dd64f7dadc898ee65c6b087..f8ce9a3a420de39c39c2f517f0c067cfda7669a5 100644 (file)
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -96,11 +96,13 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
         {
                 struct drm_gem_object *obj = obj_priv->obj;
  
-               seq_printf(m, "    %p: %s %08x %08x %d",
+               seq_printf(m, "    %p: %s %8zd %08x %08x %d %s",
                            obj,
                            get_pin_flag(obj_priv),
+                          obj->size,
                            obj->read_domains, obj->write_domain,
-                          obj_priv->last_rendering_seqno);
+                          obj_priv->last_rendering_seqno,
+                          obj_priv->dirty ? "dirty" : "");
  
                 if (obj->name)
                         seq_printf(m, " (name: %d)", obj->name);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c

index 5a49a1867b357841d24342859db0b79fab3a982d..45d507ebd3ff0bf192f1290d859b509779a356c6 100644 (file)
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -33,6 +33,7 @@
  #include "intel_drv.h"
  #include "i915_drm.h"
  #include "i915_drv.h"
+#include "i915_trace.h"
  #include <linux/vgaarb.h>
  
  /* Really want an OS-independent resettable timer.  Would like to have
@@ -50,14 +51,18 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)
         u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
         int i;
  
+       trace_i915_ring_wait_begin (dev);
+
         for (i = 0; i < 100000; i++) {
                 ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
                 acthd = I915_READ(acthd_reg);
                 ring->space = ring->head - (ring->tail + 8);
                 if (ring->space < 0)
                         ring->space += ring->Size;
-               if (ring->space >= n)
+               if (ring->space >= n) {
+                       trace_i915_ring_wait_end (dev);
                         return 0;
+               }
  
                 if (dev->primary->master) {
                         struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
@@ -77,6 +82,7 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)
  
         }
  
+       trace_i915_ring_wait_end (dev);
         return -EBUSY;
  }
  
@@ -922,7 +928,8 @@ static int i915_get_bridge_dev(struct drm_device *dev)
   * how much was set aside so we can use it for our own purposes.
   */
  static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
-                         uint32_t *preallocated_size)
+                         uint32_t *preallocated_size,
+                         uint32_t *start)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
         u16 tmp = 0;
@@ -1009,10 +1016,159 @@ static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
                 return -1;
         }
         *preallocated_size = stolen - overhead;
+       *start = overhead;
  
         return 0;
  }
  
+#define PTE_ADDRESS_MASK               0xfffff000
+#define PTE_ADDRESS_MASK_HIGH          0x000000f0 /* i915+ */
+#define PTE_MAPPING_TYPE_UNCACHED      (0 << 1)
+#define PTE_MAPPING_TYPE_DCACHE                (1 << 1) /* i830 only */
+#define PTE_MAPPING_TYPE_CACHED                (3 << 1)
+#define PTE_MAPPING_TYPE_MASK          (3 << 1)
+#define PTE_VALID                      (1 << 0)
+
+/**
+ * i915_gtt_to_phys - take a GTT address and turn it into a physical one
+ * @dev: drm device
+ * @gtt_addr: address to translate
+ *
+ * Some chip functions require allocations from stolen space but need the
+ * physical address of the memory in question.  We use this routine
+ * to get a physical address suitable for register programming from a given
+ * GTT address.
+ */
+static unsigned long i915_gtt_to_phys(struct drm_device *dev,
+                                     unsigned long gtt_addr)
+{
+       unsigned long *gtt;
+       unsigned long entry, phys;
+       int gtt_bar = IS_I9XX(dev) ? 0 : 1;
+       int gtt_offset, gtt_size;
+
+       if (IS_I965G(dev)) {
+               if (IS_G4X(dev) || IS_IGDNG(dev)) {
+                       gtt_offset = 2*1024*1024;
+                       gtt_size = 2*1024*1024;
+               } else {
+                       gtt_offset = 512*1024;
+                       gtt_size = 512*1024;
+               }
+       } else {
+               gtt_bar = 3;
+               gtt_offset = 0;
+               gtt_size = pci_resource_len(dev->pdev, gtt_bar);
+       }
+
+       gtt = ioremap_wc(pci_resource_start(dev->pdev, gtt_bar) + gtt_offset,
+                        gtt_size);
+       if (!gtt) {
+               DRM_ERROR("ioremap of GTT failed\n");
+               return 0;
+       }
+
+       entry = *(volatile u32 *)(gtt + (gtt_addr / 1024));
+
+       DRM_DEBUG("GTT addr: 0x%08lx, PTE: 0x%08lx\n", gtt_addr, entry);
+
+       /* Mask out these reserved bits on this hardware. */
+       if (!IS_I9XX(dev) || IS_I915G(dev) || IS_I915GM(dev) ||
+           IS_I945G(dev) || IS_I945GM(dev)) {
+               entry &= ~PTE_ADDRESS_MASK_HIGH;
+       }
+
+       /* If it's not a mapping type we know, then bail. */
+       if ((entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_UNCACHED &&
+           (entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_CACHED) {
+               iounmap(gtt);
+               return 0;
+       }
+
+       if (!(entry & PTE_VALID)) {
+               DRM_ERROR("bad GTT entry in stolen space\n");
+               iounmap(gtt);
+               return 0;
+       }
+
+       iounmap(gtt);
+
+       phys =(entry & PTE_ADDRESS_MASK) |
+               ((uint64_t)(entry & PTE_ADDRESS_MASK_HIGH) << (32 - 4));
+
+       DRM_DEBUG("GTT addr: 0x%08lx, phys addr: 0x%08lx\n", gtt_addr, phys);
+
+       return phys;
+}
+
+static void i915_warn_stolen(struct drm_device *dev)
+{
+       DRM_ERROR("not enough stolen space for compressed buffer, disabling\n");
+       DRM_ERROR("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
+}
+
+static void i915_setup_compression(struct drm_device *dev, int size)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_mm_node *compressed_fb, *compressed_llb;
+       unsigned long cfb_base, ll_base;
+
+       /* Leave 1M for line length buffer & misc. */
+       compressed_fb = drm_mm_search_free(&dev_priv->vram, size, 4096, 0);
+       if (!compressed_fb) {
+               i915_warn_stolen(dev);
+               return;
+       }
+
+       compressed_fb = drm_mm_get_block(compressed_fb, size, 4096);
+       if (!compressed_fb) {
+               i915_warn_stolen(dev);
+               return;
+       }
+
+       cfb_base = i915_gtt_to_phys(dev, compressed_fb->start);
+       if (!cfb_base) {
+               DRM_ERROR("failed to get stolen phys addr, disabling FBC\n");
+               drm_mm_put_block(compressed_fb);
+       }
+
+       if (!IS_GM45(dev)) {
+               compressed_llb = drm_mm_search_free(&dev_priv->vram, 4096,
+                                                   4096, 0);
+               if (!compressed_llb) {
+                       i915_warn_stolen(dev);
+                       return;
+               }
+
+               compressed_llb = drm_mm_get_block(compressed_llb, 4096, 4096);
+               if (!compressed_llb) {
+                       i915_warn_stolen(dev);
+                       return;
+               }
+
+               ll_base = i915_gtt_to_phys(dev, compressed_llb->start);
+               if (!ll_base) {
+                       DRM_ERROR("failed to get stolen phys addr, disabling FBC\n");
+                       drm_mm_put_block(compressed_fb);
+                       drm_mm_put_block(compressed_llb);
+               }
+       }
+
+       dev_priv->cfb_size = size;
+
+       if (IS_GM45(dev)) {
+               g4x_disable_fbc(dev);
+               I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
+       } else {
+               i8xx_disable_fbc(dev);
+               I915_WRITE(FBC_CFB_BASE, cfb_base);
+               I915_WRITE(FBC_LL_BASE, ll_base);
+       }
+
+       DRM_DEBUG("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n", cfb_base,
+                 ll_base, size >> 20);
+}
+
  /* true = enable decode, false = disable decoder */
  static unsigned int i915_vga_set_decode(void *cookie, bool state)
  {
@@ -1027,6 +1183,7 @@ static unsigned int i915_vga_set_decode(void *cookie, bool state)
  }
  
  static int i915_load_modeset_init(struct drm_device *dev,
+                                 unsigned long prealloc_start,
                                   unsigned long prealloc_size,
                                   unsigned long agp_size)
  {
@@ -1047,6 +1204,10 @@ static int i915_load_modeset_init(struct drm_device *dev,
  
         /* Basic memrange allocator for stolen space (aka vram) */
         drm_mm_init(&dev_priv->vram, 0, prealloc_size);
+       DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024));
+
+       /* We're off and running w/KMS */
+       dev_priv->mm.suspended = 0;
  
         /* Let GEM Manage from end of prealloc space to end of aperture.
          *
@@ -1059,10 +1220,25 @@ static int i915_load_modeset_init(struct drm_device *dev,
          */
         i915_gem_do_init(dev, prealloc_size, agp_size - 4096);
  
+       mutex_lock(&dev->struct_mutex);
         ret = i915_gem_init_ringbuffer(dev);
+       mutex_unlock(&dev->struct_mutex);
         if (ret)
                 goto out;
  
+       /* Try to set up FBC with a reasonable compressed buffer size */
+       if (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev) || IS_GM45(dev)) &&
+           i915_powersave) {
+               int cfb_size;
+
+               /* Try to get an 8M buffer... */
+               if (prealloc_size > (9*1024*1024))
+                       cfb_size = 8*1024*1024;
+               else /* fall back to 7/8 of the stolen space */
+                       cfb_size = prealloc_size * 7 / 8;
+               i915_setup_compression(dev, cfb_size);
+       }
+
         /* Allow hardware batchbuffers unless told otherwise.
          */
         dev_priv->allow_batchbuffer = 1;
@@ -1180,7 +1356,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
         struct drm_i915_private *dev_priv = dev->dev_private;
         resource_size_t base, size;
         int ret = 0, mmio_bar = IS_I9XX(dev) ? 0 : 1;
-       uint32_t agp_size, prealloc_size;
+       uint32_t agp_size, prealloc_size, prealloc_start;
  
         /* i915 has 4 more counters */
         dev->counters += 4;
@@ -1234,7 +1410,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
                          "performance may suffer.\n");
         }
  
-       ret = i915_probe_agp(dev, &agp_size, &prealloc_size);
+       ret = i915_probe_agp(dev, &agp_size, &prealloc_size, &prealloc_start);
         if (ret)
                 goto out_iomapfree;
  
@@ -1300,8 +1476,12 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
                 return ret;
         }
  
+       /* Start out suspended */
+       dev_priv->mm.suspended = 1;
+
         if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-               ret = i915_load_modeset_init(dev, prealloc_size, agp_size);
+               ret = i915_load_modeset_init(dev, prealloc_start,
+                                            prealloc_size, agp_size);
                 if (ret < 0) {
                         DRM_ERROR("failed to init modeset\n");
                         goto out_workqueue_free;
@@ -1313,6 +1493,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
         if (!IS_IGDNG(dev))
                 intel_opregion_init(dev, 0);
  
+       setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed,
+                   (unsigned long) dev);
         return 0;
  
  out_workqueue_free:
@@ -1333,6 +1515,7 @@ int i915_driver_unload(struct drm_device *dev)
         struct drm_i915_private *dev_priv = dev->dev_private;
  
         destroy_workqueue(dev_priv->wq);
+       del_timer_sync(&dev_priv->hangcheck_timer);
  
         io_mapping_free(dev_priv->mm.gtt_mapping);
         if (dev_priv->mm.gtt_mtrr >= 0) {
@@ -1472,6 +1655,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
         DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0),
         DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0),
         DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0),
+       DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, 0),
  };
  
  int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c

index dbe568c9327bed325eb7d8bae169c3bd7fa28814..b93814c0d3e247dcfb804ae9973c3cd7220b2dc3 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -89,6 +89,8 @@ static int i915_suspend(struct drm_device *dev, pm_message_t state)
                 pci_set_power_state(dev->pdev, PCI_D3hot);
         }
  
+       dev_priv->suspended = 1;
+
         return 0;
  }
  
@@ -97,8 +99,6 @@ static int i915_resume(struct drm_device *dev)
         struct drm_i915_private *dev_priv = dev->dev_private;
         int ret = 0;
  
-       pci_set_power_state(dev->pdev, PCI_D0);
-       pci_restore_state(dev->pdev);
         if (pci_enable_device(dev->pdev))
                 return -1;
         pci_set_master(dev->pdev);
@@ -124,9 +124,135 @@ static int i915_resume(struct drm_device *dev)
                 drm_helper_resume_force_mode(dev);
         }
  
+       dev_priv->suspended = 0;
+
         return ret;
  }
  
+/**
+ * i965_reset - reset chip after a hang
+ * @dev: drm device to reset
+ * @flags: reset domains
+ *
+ * Reset the chip.  Useful if a hang is detected. Returns zero on successful
+ * reset or otherwise an error code.
+ *
+ * Procedure is fairly simple:
+ *   - reset the chip using the reset reg
+ *   - re-init context state
+ *   - re-init hardware status page
+ *   - re-init ring buffer
+ *   - re-init interrupt state
+ *   - re-init display
+ */
+int i965_reset(struct drm_device *dev, u8 flags)
+{
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       unsigned long timeout;
+       u8 gdrst;
+       /*
+        * We really should only reset the display subsystem if we actually
+        * need to
+        */
+       bool need_display = true;
+
+       mutex_lock(&dev->struct_mutex);
+
+       /*
+        * Clear request list
+        */
+       i915_gem_retire_requests(dev);
+
+       if (need_display)
+               i915_save_display(dev);
+
+       if (IS_I965G(dev) || IS_G4X(dev)) {
+               /*
+                * Set the domains we want to reset, then the reset bit (bit 0).
+                * Clear the reset bit after a while and wait for hardware status
+                * bit (bit 1) to be set
+                */
+               pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+               pci_write_config_byte(dev->pdev, GDRST, gdrst | flags | ((flags == GDRST_FULL) ? 0x1 : 0x0));
+               udelay(50);
+               pci_write_config_byte(dev->pdev, GDRST, gdrst & 0xfe);
+
+               /* ...we don't want to loop forever though, 500ms should be plenty */
+              timeout = jiffies + msecs_to_jiffies(500);
+               do {
+                       udelay(100);
+                       pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+               } while ((gdrst & 0x1) && time_after(timeout, jiffies));
+
+               if (gdrst & 0x1) {
+                       WARN(true, "i915: Failed to reset chip\n");
+                       mutex_unlock(&dev->struct_mutex);
+                       return -EIO;
+               }
+       } else {
+               DRM_ERROR("Error occurred. Don't know how to reset this chip.\n");
+               return -ENODEV;
+       }
+
+       /* Ok, now get things going again... */
+
+       /*
+        * Everything depends on having the GTT running, so we need to start
+        * there.  Fortunately we don't need to do this unless we reset the
+        * chip at a PCI level.
+        *
+        * Next we need to restore the context, but we don't use those
+        * yet either...
+        *
+        * Ring buffer needs to be re-initialized in the KMS case, or if X
+        * was running at the time of the reset (i.e. we weren't VT
+        * switched away).
+        */
+       if (drm_core_check_feature(dev, DRIVER_MODESET) ||
+           !dev_priv->mm.suspended) {
+               drm_i915_ring_buffer_t *ring = &dev_priv->ring;
+               struct drm_gem_object *obj = ring->ring_obj;
+               struct drm_i915_gem_object *obj_priv = obj->driver_private;
+               dev_priv->mm.suspended = 0;
+
+               /* Stop the ring if it's running. */
+               I915_WRITE(PRB0_CTL, 0);
+               I915_WRITE(PRB0_TAIL, 0);
+               I915_WRITE(PRB0_HEAD, 0);
+
+               /* Initialize the ring. */
+               I915_WRITE(PRB0_START, obj_priv->gtt_offset);
+               I915_WRITE(PRB0_CTL,
+                          ((obj->size - 4096) & RING_NR_PAGES) |
+                          RING_NO_REPORT |
+                          RING_VALID);
+               if (!drm_core_check_feature(dev, DRIVER_MODESET))
+                       i915_kernel_lost_context(dev);
+               else {
+                       ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
+                       ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
+                       ring->space = ring->head - (ring->tail + 8);
+                       if (ring->space < 0)
+                               ring->space += ring->Size;
+               }
+
+               mutex_unlock(&dev->struct_mutex);
+               drm_irq_uninstall(dev);
+               drm_irq_install(dev);
+               mutex_lock(&dev->struct_mutex);
+       }
+
+       /*
+        * Display needs restore too...
+        */
+       if (need_display)
+               i915_restore_display(dev);
+
+       mutex_unlock(&dev->struct_mutex);
+       return 0;
+}
+
+
  static int __devinit
  i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
  {
@@ -234,6 +360,8 @@ static int __init i915_init(void)
  {
         driver.num_ioctls = i915_max_ioctl;
  
+       i915_gem_shrinker_init();
+
         /*
          * If CONFIG_DRM_I915_KMS is set, default to KMS unless
          * explicitly disabled with the module pararmeter.
@@ -260,6 +388,7 @@ static int __init i915_init(void)
  
  static void __exit i915_exit(void)
  {
+       i915_gem_shrinker_exit();
         drm_exit(&driver);
  }
  
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index a0632f8e76ace4735247ff0b91c82c180719aac8..b24b2d145b75dc4450fdd24095eca0b519a9fa47 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -48,6 +48,11 @@ enum pipe {
         PIPE_B,
  };
  
+enum plane {
+       PLANE_A = 0,
+       PLANE_B,
+};
+
  #define I915_NUM_PIPE  2
  
  /* Interface history:
@@ -148,6 +153,23 @@ struct drm_i915_error_state {
         struct timeval time;
  };
  
+struct drm_i915_display_funcs {
+       void (*dpms)(struct drm_crtc *crtc, int mode);
+       bool (*fbc_enabled)(struct drm_crtc *crtc);
+       void (*enable_fbc)(struct drm_crtc *crtc, unsigned long interval);
+       void (*disable_fbc)(struct drm_device *dev);
+       int (*get_display_clock_speed)(struct drm_device *dev);
+       int (*get_fifo_size)(struct drm_device *dev, int plane);
+       void (*update_wm)(struct drm_device *dev, int planea_clock,
+                         int planeb_clock, int sr_hdisplay, int pixel_size);
+       /* clock updates for mode set */
+       /* cursor updates */
+       /* render clock increase/decrease */
+       /* display clock increase/decrease */
+       /* pll clock increase/decrease */
+       /* clock gating init */
+};
+
  typedef struct drm_i915_private {
         struct drm_device *dev;
  
@@ -198,10 +220,21 @@ typedef struct drm_i915_private {
         unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds;
         int vblank_pipe;
  
+       /* For hangcheck timer */
+#define DRM_I915_HANGCHECK_PERIOD 75 /* in jiffies */
+       struct timer_list hangcheck_timer;
+       int hangcheck_count;
+       uint32_t last_acthd;
+
         bool cursor_needs_physical;
  
         struct drm_mm vram;
  
+       unsigned long cfb_size;
+       unsigned long cfb_pitch;
+       int cfb_fence;
+       int cfb_plane;
+
         int irq_enabled;
  
         struct intel_opregion opregion;
@@ -222,6 +255,8 @@ typedef struct drm_i915_private {
         unsigned int edp_support:1;
         int lvds_ssc_freq;
  
+       struct notifier_block lid_notifier;
+
         int crt_ddc_bus; /* -1 = unknown, else GPIO to use for CRT DDC */
         struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */
         int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */
@@ -234,7 +269,11 @@ typedef struct drm_i915_private {
         struct work_struct error_work;
         struct workqueue_struct *wq;
  
+       /* Display functions */
+       struct drm_i915_display_funcs display;
+
         /* Register state */
+       bool suspended;
         u8 saveLBB;
         u32 saveDSPACNTR;
         u32 saveDSPBCNTR;
@@ -349,6 +388,15 @@ typedef struct drm_i915_private {
                 struct io_mapping *gtt_mapping;
                 int gtt_mtrr;
  
+               /**
+                * Membership on list of all loaded devices, used to evict
+                * inactive buffers under memory pressure.
+                *
+                * Modifications should only be done whilst holding the
+                * shrink_list_lock spinlock.
+                */
+               struct list_head shrink_list;
+
                 /**
                  * List of objects currently involved in rendering from the
                  * ringbuffer.
@@ -432,7 +480,7 @@ typedef struct drm_i915_private {
                  * It prevents command submission from occuring and makes
                  * every pending request fail
                  */
-               int wedged;
+               atomic_t wedged;
  
                 /** Bit 6 swizzling required for X tiling */
                 uint32_t bit_6_swizzle_x;
@@ -491,10 +539,7 @@ struct drm_i915_gem_object {
          * This is the same as gtt_space->start
          */
         uint32_t gtt_offset;
-       /**
-        * Required alignment for the object
-        */
-       uint32_t gtt_alignment;
+
         /**
          * Fake offset for use by mmap(2)
          */
@@ -541,6 +586,11 @@ struct drm_i915_gem_object {
          * in an execbuffer object list.
          */
         int in_execbuffer;
+
+       /**
+        * Advice: are the backing pages purgeable?
+        */
+       int madv;
  };
  
  /**
@@ -585,6 +635,8 @@ extern int i915_max_ioctl;
  extern unsigned int i915_fbpercrtc;
  extern unsigned int i915_powersave;
  
+extern void i915_save_display(struct drm_device *dev);
+extern void i915_restore_display(struct drm_device *dev);
  extern int i915_master_create(struct drm_device *dev, struct drm_master *master);
  extern void i915_master_destroy(struct drm_device *dev, struct drm_master *master);
  
@@ -604,8 +656,10 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
  extern int i915_emit_box(struct drm_device *dev,
                          struct drm_clip_rect *boxes,
                          int i, int DR1, int DR4);
+extern int i965_reset(struct drm_device *dev, u8 flags);
  
  /* i915_irq.c */
+void i915_hangcheck_elapsed(unsigned long data);
  extern int i915_irq_emit(struct drm_device *dev, void *data,
                          struct drm_file *file_priv);
  extern int i915_irq_wait(struct drm_device *dev, void *data,
@@ -676,6 +730,8 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
                         struct drm_file *file_priv);
  int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
                             struct drm_file *file_priv);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+                          struct drm_file *file_priv);
  int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
                            struct drm_file *file_priv);
  int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
@@ -695,6 +751,7 @@ int i915_gem_object_unbind(struct drm_gem_object *obj);
  void i915_gem_release_mmap(struct drm_gem_object *obj);
  void i915_gem_lastclose(struct drm_device *dev);
  uint32_t i915_get_gem_seqno(struct drm_device *dev);
+bool i915_seqno_passed(uint32_t seq1, uint32_t seq2);
  int i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
  int i915_gem_object_put_fence_reg(struct drm_gem_object *obj);
  void i915_gem_retire_requests(struct drm_device *dev);
@@ -720,6 +777,9 @@ int i915_gem_object_get_pages(struct drm_gem_object *obj);
  void i915_gem_object_put_pages(struct drm_gem_object *obj);
  void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv);
  
+void i915_gem_shrinker_init(void);
+void i915_gem_shrinker_exit(void);
+
  /* i915_gem_tiling.c */
  void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
  void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
@@ -767,6 +827,8 @@ static inline void opregion_enable_asle(struct drm_device *dev) { return; }
  extern void intel_modeset_init(struct drm_device *dev);
  extern void intel_modeset_cleanup(struct drm_device *dev);
  extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state);
+extern void i8xx_disable_fbc(struct drm_device *dev);
+extern void g4x_disable_fbc(struct drm_device *dev);
  
  /**
   * Lock test for when it's just for synchronization of ring access.
@@ -864,6 +926,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
                        (dev)->pci_device == 0x2E12 || \
                        (dev)->pci_device == 0x2E22 || \
                        (dev)->pci_device == 0x2E32 || \
+                      (dev)->pci_device == 0x2E42 || \
                        (dev)->pci_device == 0x0042 || \
                        (dev)->pci_device == 0x0046)
  
@@ -876,6 +939,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
                      (dev)->pci_device == 0x2E12 || \
                      (dev)->pci_device == 0x2E22 || \
                      (dev)->pci_device == 0x2E32 || \
+                    (dev)->pci_device == 0x2E42 || \
                      IS_GM45(dev))
  
  #define IS_IGDG(dev) ((dev)->pci_device == 0xa001)
@@ -909,12 +973,13 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
  #define SUPPORTS_INTEGRATED_HDMI(dev)  (IS_G4X(dev) || IS_IGDNG(dev))
  #define SUPPORTS_INTEGRATED_DP(dev)    (IS_G4X(dev) || IS_IGDNG(dev))
  #define SUPPORTS_EDP(dev)              (IS_IGDNG_M(dev))
-#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_I965G(dev))
+#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev) || IS_I965G(dev))
  /* dsparb controlled by hw only */
  #define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IGDNG(dev))
  
  #define HAS_FW_BLC(dev) (IS_I9XX(dev) || IS_G4X(dev) || IS_IGDNG(dev))
  #define HAS_PIPE_CXSR(dev) (IS_G4X(dev) || IS_IGDNG(dev))
+#define I915_HAS_FBC(dev) (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev)))
  
  #define PRIMARY_RINGBUFFER_SIZE         (128*1024)
  
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index c67317112f4a1f32619b01e974bdd9b05e3bbfa2..40727d4c2919d6cbb4fa719b7a7a3fe906b7917a 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,6 +29,7 @@
  #include "drm.h"
  #include "i915_drm.h"
  #include "i915_drv.h"
+#include "i915_trace.h"
  #include "intel_drv.h"
  #include <linux/swap.h>
  #include <linux/pci.h>
@@ -48,11 +49,15 @@ static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
  static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
                                            unsigned alignment);
  static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
-static int i915_gem_evict_something(struct drm_device *dev);
+static int i915_gem_evict_something(struct drm_device *dev, int min_size);
+static int i915_gem_evict_from_inactive_list(struct drm_device *dev);
  static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
                                 struct drm_i915_gem_pwrite *args,
                                 struct drm_file *file_priv);
  
+static LIST_HEAD(shrink_list);
+static DEFINE_SPINLOCK(shrink_list_lock);
+
  int i915_gem_do_init(struct drm_device *dev, unsigned long start,
                      unsigned long end)
  {
@@ -316,6 +321,45 @@ fail_unlock:
         return ret;
  }
  
+static inline gfp_t
+i915_gem_object_get_page_gfp_mask (struct drm_gem_object *obj)
+{
+       return mapping_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping);
+}
+
+static inline void
+i915_gem_object_set_page_gfp_mask (struct drm_gem_object *obj, gfp_t gfp)
+{
+       mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, gfp);
+}
+
+static int
+i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
+{
+       int ret;
+
+       ret = i915_gem_object_get_pages(obj);
+
+       /* If we've insufficient memory to map in the pages, attempt
+        * to make some space by throwing out some old buffers.
+        */
+       if (ret == -ENOMEM) {
+               struct drm_device *dev = obj->dev;
+               gfp_t gfp;
+
+               ret = i915_gem_evict_something(dev, obj->size);
+               if (ret)
+                       return ret;
+
+               gfp = i915_gem_object_get_page_gfp_mask(obj);
+               i915_gem_object_set_page_gfp_mask(obj, gfp & ~__GFP_NORETRY);
+               ret = i915_gem_object_get_pages(obj);
+               i915_gem_object_set_page_gfp_mask (obj, gfp);
+       }
+
+       return ret;
+}
+
  /**
   * This is the fallback shmem pread path, which allocates temporary storage
   * in kernel space to copy_to_user into outside of the struct_mutex, so we
@@ -367,8 +411,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
  
         mutex_lock(&dev->struct_mutex);
  
-       ret = i915_gem_object_get_pages(obj);
-       if (ret != 0)
+       ret = i915_gem_object_get_pages_or_evict(obj);
+       if (ret)
                 goto fail_unlock;
  
         ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
@@ -842,8 +886,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
  
         mutex_lock(&dev->struct_mutex);
  
-       ret = i915_gem_object_get_pages(obj);
-       if (ret != 0)
+       ret = i915_gem_object_get_pages_or_evict(obj);
+       if (ret)
                 goto fail_unlock;
  
         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
@@ -1155,28 +1199,22 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
         /* Now bind it into the GTT if needed */
         mutex_lock(&dev->struct_mutex);
         if (!obj_priv->gtt_space) {
-               ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
-               if (ret) {
-                       mutex_unlock(&dev->struct_mutex);
-                       return VM_FAULT_SIGBUS;
-               }
-
-               ret = i915_gem_object_set_to_gtt_domain(obj, write);
-               if (ret) {
-                       mutex_unlock(&dev->struct_mutex);
-                       return VM_FAULT_SIGBUS;
-               }
+               ret = i915_gem_object_bind_to_gtt(obj, 0);
+               if (ret)
+                       goto unlock;
  
                 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
+
+               ret = i915_gem_object_set_to_gtt_domain(obj, write);
+               if (ret)
+                       goto unlock;
         }
  
         /* Need a new fence register? */
         if (obj_priv->tiling_mode != I915_TILING_NONE) {
                 ret = i915_gem_object_get_fence_reg(obj);
-               if (ret) {
-                       mutex_unlock(&dev->struct_mutex);
-                       return VM_FAULT_SIGBUS;
-               }
+               if (ret)
+                       goto unlock;
         }
  
         pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
@@ -1184,18 +1222,18 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
  
         /* Finally, remap it using the new GTT offset */
         ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
-
+unlock:
         mutex_unlock(&dev->struct_mutex);
  
         switch (ret) {
+       case 0:
+       case -ERESTARTSYS:
+               return VM_FAULT_NOPAGE;
         case -ENOMEM:
         case -EAGAIN:
                 return VM_FAULT_OOM;
-       case -EFAULT:
-       case -EINVAL:
-               return VM_FAULT_SIGBUS;
         default:
-               return VM_FAULT_NOPAGE;
+               return VM_FAULT_SIGBUS;
         }
  }
  
@@ -1388,6 +1426,14 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
  
         obj_priv = obj->driver_private;
  
+       if (obj_priv->madv != I915_MADV_WILLNEED) {
+               DRM_ERROR("Attempting to mmap a purgeable buffer\n");
+               drm_gem_object_unreference(obj);
+               mutex_unlock(&dev->struct_mutex);
+               return -EINVAL;
+       }
+
+
         if (!obj_priv->mmap_offset) {
                 ret = i915_gem_create_mmap_offset(obj);
                 if (ret) {
@@ -1399,22 +1445,12 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
  
         args->offset = obj_priv->mmap_offset;
  
-       obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj);
-
-       /* Make sure the alignment is correct for fence regs etc */
-       if (obj_priv->agp_mem &&
-           (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) {
-               drm_gem_object_unreference(obj);
-               mutex_unlock(&dev->struct_mutex);
-               return -EINVAL;
-       }
-
         /*
          * Pull it into the GTT so that we have a page list (makes the
          * initial fault faster and any subsequent flushing possible).
          */
         if (!obj_priv->agp_mem) {
-               ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
+               ret = i915_gem_object_bind_to_gtt(obj, 0);
                 if (ret) {
                         drm_gem_object_unreference(obj);
                         mutex_unlock(&dev->struct_mutex);
@@ -1437,6 +1473,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
         int i;
  
         BUG_ON(obj_priv->pages_refcount == 0);
+       BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
  
         if (--obj_priv->pages_refcount != 0)
                 return;
@@ -1444,13 +1481,21 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
         if (obj_priv->tiling_mode != I915_TILING_NONE)
                 i915_gem_object_save_bit_17_swizzle(obj);
  
-       for (i = 0; i < page_count; i++)
-               if (obj_priv->pages[i] != NULL) {
-                       if (obj_priv->dirty)
-                               set_page_dirty(obj_priv->pages[i]);
+       if (obj_priv->madv == I915_MADV_DONTNEED)
+               obj_priv->dirty = 0;
+
+       for (i = 0; i < page_count; i++) {
+               if (obj_priv->pages[i] == NULL)
+                       break;
+
+               if (obj_priv->dirty)
+                       set_page_dirty(obj_priv->pages[i]);
+
+               if (obj_priv->madv == I915_MADV_WILLNEED)
                         mark_page_accessed(obj_priv->pages[i]);
-                       page_cache_release(obj_priv->pages[i]);
-               }
+
+               page_cache_release(obj_priv->pages[i]);
+       }
         obj_priv->dirty = 0;
  
         drm_free_large(obj_priv->pages);
@@ -1489,6 +1534,26 @@ i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
         obj_priv->last_rendering_seqno = 0;
  }
  
+/* Immediately discard the backing storage */
+static void
+i915_gem_object_truncate(struct drm_gem_object *obj)
+{
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       struct inode *inode;
+
+       inode = obj->filp->f_path.dentry->d_inode;
+       if (inode->i_op->truncate)
+               inode->i_op->truncate (inode);
+
+       obj_priv->madv = __I915_MADV_PURGED;
+}
+
+static inline int
+i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
+{
+       return obj_priv->madv == I915_MADV_DONTNEED;
+}
+
  static void
  i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
  {
@@ -1577,15 +1642,24 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
  
                         if ((obj->write_domain & flush_domains) ==
                             obj->write_domain) {
+                               uint32_t old_write_domain = obj->write_domain;
+
                                 obj->write_domain = 0;
                                 i915_gem_object_move_to_active(obj, seqno);
+
+                               trace_i915_gem_object_change_domain(obj,
+                                                                   obj->read_domains,
+                                                                   old_write_domain);
                         }
                 }
  
         }
  
-       if (was_empty && !dev_priv->mm.suspended)
-               queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
+       if (!dev_priv->mm.suspended) {
+               mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
+               if (was_empty)
+                       queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
+       }
         return seqno;
  }
  
@@ -1623,6 +1697,8 @@ i915_gem_retire_request(struct drm_device *dev,
  {
         drm_i915_private_t *dev_priv = dev->dev_private;
  
+       trace_i915_gem_request_retire(dev, request->seqno);
+
         /* Move any buffers on the active list that are no longer referenced
          * by the ringbuffer to the flushing/inactive lists as appropriate.
          */
@@ -1671,7 +1747,7 @@ out:
  /**
   * Returns true if seq1 is later than seq2.
   */
-static int
+bool
  i915_seqno_passed(uint32_t seq1, uint32_t seq2)
  {
         return (int32_t)(seq1 - seq2) >= 0;
@@ -1709,7 +1785,7 @@ i915_gem_retire_requests(struct drm_device *dev)
                 retiring_seqno = request->seqno;
  
                 if (i915_seqno_passed(seqno, retiring_seqno) ||
-                   dev_priv->mm.wedged) {
+                   atomic_read(&dev_priv->mm.wedged)) {
                         i915_gem_retire_request(dev, request);
  
                         list_del(&request->list);
@@ -1751,6 +1827,9 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
  
         BUG_ON(seqno == 0);
  
+       if (atomic_read(&dev_priv->mm.wedged))
+               return -EIO;
+
         if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
                 if (IS_IGDNG(dev))
                         ier = I915_READ(DEIER) | I915_READ(GTIER);
@@ -1763,16 +1842,20 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
                         i915_driver_irq_postinstall(dev);
                 }
  
+               trace_i915_gem_request_wait_begin(dev, seqno);
+
                 dev_priv->mm.waiting_gem_seqno = seqno;
                 i915_user_irq_get(dev);
                 ret = wait_event_interruptible(dev_priv->irq_queue,
                                                i915_seqno_passed(i915_get_gem_seqno(dev),
                                                                  seqno) ||
-                                              dev_priv->mm.wedged);
+                                              atomic_read(&dev_priv->mm.wedged));
                 i915_user_irq_put(dev);
                 dev_priv->mm.waiting_gem_seqno = 0;
+
+               trace_i915_gem_request_wait_end(dev, seqno);
         }
-       if (dev_priv->mm.wedged)
+       if (atomic_read(&dev_priv->mm.wedged))
                 ret = -EIO;
  
         if (ret && ret != -ERESTARTSYS)
@@ -1803,6 +1886,8 @@ i915_gem_flush(struct drm_device *dev,
         DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
                   invalidate_domains, flush_domains);
  #endif
+       trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno,
+                                    invalidate_domains, flush_domains);
  
         if (flush_domains & I915_GEM_DOMAIN_CPU)
                 drm_agp_chipset_flush(dev);
@@ -1915,6 +2000,12 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
                 return -EINVAL;
         }
  
+       /* blow away mappings if mapped through GTT */
+       i915_gem_release_mmap(obj);
+
+       if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
+               i915_gem_clear_fence_reg(obj);
+
         /* Move the object to the CPU domain to ensure that
          * any possible CPU writes while it's not in the GTT
          * are flushed when we go to remap it. This will
@@ -1928,21 +2019,16 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
                 return ret;
         }
  
+       BUG_ON(obj_priv->active);
+
         if (obj_priv->agp_mem != NULL) {
                 drm_unbind_agp(obj_priv->agp_mem);
                 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
                 obj_priv->agp_mem = NULL;
         }
  
-       BUG_ON(obj_priv->active);
-
-       /* blow away mappings if mapped through GTT */
-       i915_gem_release_mmap(obj);
-
-       if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
-               i915_gem_clear_fence_reg(obj);
-
         i915_gem_object_put_pages(obj);
+       BUG_ON(obj_priv->pages_refcount);
  
         if (obj_priv->gtt_space) {
                 atomic_dec(&dev->gtt_count);
@@ -1956,40 +2042,113 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
         if (!list_empty(&obj_priv->list))
                 list_del_init(&obj_priv->list);
  
+       if (i915_gem_object_is_purgeable(obj_priv))
+               i915_gem_object_truncate(obj);
+
+       trace_i915_gem_object_unbind(obj);
+
         return 0;
  }
  
+static struct drm_gem_object *
+i915_gem_find_inactive_object(struct drm_device *dev, int min_size)
+{
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       struct drm_i915_gem_object *obj_priv;
+       struct drm_gem_object *best = NULL;
+       struct drm_gem_object *first = NULL;
+
+       /* Try to find the smallest clean object */
+       list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) {
+               struct drm_gem_object *obj = obj_priv->obj;
+               if (obj->size >= min_size) {
+                       if ((!obj_priv->dirty ||
+                            i915_gem_object_is_purgeable(obj_priv)) &&
+                           (!best || obj->size < best->size)) {
+                               best = obj;
+                               if (best->size == min_size)
+                                       return best;
+                       }
+                       if (!first)
+                           first = obj;
+               }
+       }
+
+       return best ? best : first;
+}
+
  static int
-i915_gem_evict_something(struct drm_device *dev)
+i915_gem_evict_everything(struct drm_device *dev)
+{
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       uint32_t seqno;
+       int ret;
+       bool lists_empty;
+
+       spin_lock(&dev_priv->mm.active_list_lock);
+       lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
+                      list_empty(&dev_priv->mm.flushing_list) &&
+                      list_empty(&dev_priv->mm.active_list));
+       spin_unlock(&dev_priv->mm.active_list_lock);
+
+       if (lists_empty)
+               return -ENOSPC;
+
+       /* Flush everything (on to the inactive lists) and evict */
+       i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+       seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
+       if (seqno == 0)
+               return -ENOMEM;
+
+       ret = i915_wait_request(dev, seqno);
+       if (ret)
+               return ret;
+
+       ret = i915_gem_evict_from_inactive_list(dev);
+       if (ret)
+               return ret;
+
+       spin_lock(&dev_priv->mm.active_list_lock);
+       lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
+                      list_empty(&dev_priv->mm.flushing_list) &&
+                      list_empty(&dev_priv->mm.active_list));
+       spin_unlock(&dev_priv->mm.active_list_lock);
+       BUG_ON(!lists_empty);
+
+       return 0;
+}
+
+static int
+i915_gem_evict_something(struct drm_device *dev, int min_size)
  {
         drm_i915_private_t *dev_priv = dev->dev_private;
         struct drm_gem_object *obj;
-       struct drm_i915_gem_object *obj_priv;
-       int ret = 0;
+       int ret;
  
         for (;;) {
+               i915_gem_retire_requests(dev);
+
                 /* If there's an inactive buffer available now, grab it
                  * and be done.
                  */
-               if (!list_empty(&dev_priv->mm.inactive_list)) {
-                       obj_priv = list_first_entry(&dev_priv->mm.inactive_list,
-                                                   struct drm_i915_gem_object,
-                                                   list);
-                       obj = obj_priv->obj;
-                       BUG_ON(obj_priv->pin_count != 0);
+               obj = i915_gem_find_inactive_object(dev, min_size);
+               if (obj) {
+                       struct drm_i915_gem_object *obj_priv;
+
  #if WATCH_LRU
                         DRM_INFO("%s: evicting %p\n", __func__, obj);
  #endif
+                       obj_priv = obj->driver_private;
+                       BUG_ON(obj_priv->pin_count != 0);
                         BUG_ON(obj_priv->active);
  
                         /* Wait on the rendering and unbind the buffer. */
-                       ret = i915_gem_object_unbind(obj);
-                       break;
+                       return i915_gem_object_unbind(obj);
                 }
  
                 /* If we didn't get anything, but the ring is still processing
-                * things, wait for one of those things to finish and hopefully
-                * leave us a buffer to evict.
+                * things, wait for the next to finish and hopefully leave us
+                * a buffer to evict.
                  */
                 if (!list_empty(&dev_priv->mm.request_list)) {
                         struct drm_i915_gem_request *request;
@@ -2000,16 +2159,9 @@ i915_gem_evict_something(struct drm_device *dev)
  
                         ret = i915_wait_request(dev, request->seqno);
                         if (ret)
-                               break;
+                               return ret;
  
-                       /* if waiting caused an object to become inactive,
-                        * then loop around and wait for it. Otherwise, we
-                        * assume that waiting freed and unbound something,
-                        * so there should now be some space in the GTT
-                        */
-                       if (!list_empty(&dev_priv->mm.inactive_list))
-                               continue;
-                       break;
+                       continue;
                 }
  
                 /* If we didn't have anything on the request list but there
@@ -2018,46 +2170,44 @@ i915_gem_evict_something(struct drm_device *dev)
                  * will get moved to inactive.
                  */
                 if (!list_empty(&dev_priv->mm.flushing_list)) {
-                       obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
-                                                   struct drm_i915_gem_object,
-                                                   list);
-                       obj = obj_priv->obj;
+                       struct drm_i915_gem_object *obj_priv;
  
-                       i915_gem_flush(dev,
-                                      obj->write_domain,
-                                      obj->write_domain);
-                       i915_add_request(dev, NULL, obj->write_domain);
+                       /* Find an object that we can immediately reuse */
+                       list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) {
+                               obj = obj_priv->obj;
+                               if (obj->size >= min_size)
+                                       break;
  
-                       obj = NULL;
-                       continue;
-               }
+                               obj = NULL;
+                       }
  
-               DRM_ERROR("inactive empty %d request empty %d "
-                         "flushing empty %d\n",
-                         list_empty(&dev_priv->mm.inactive_list),
-                         list_empty(&dev_priv->mm.request_list),
-                         list_empty(&dev_priv->mm.flushing_list));
-               /* If we didn't do any of the above, there's nothing to be done
-                * and we just can't fit it in.
-                */
-               return -ENOSPC;
-       }
-       return ret;
-}
+                       if (obj != NULL) {
+                               uint32_t seqno;
  
-static int
-i915_gem_evict_everything(struct drm_device *dev)
-{
-       int ret;
+                               i915_gem_flush(dev,
+                                              obj->write_domain,
+                                              obj->write_domain);
+                               seqno = i915_add_request(dev, NULL, obj->write_domain);
+                               if (seqno == 0)
+                                       return -ENOMEM;
  
-       for (;;) {
-               ret = i915_gem_evict_something(dev);
-               if (ret != 0)
-                       break;
+                               ret = i915_wait_request(dev, seqno);
+                               if (ret)
+                                       return ret;
+
+                               continue;
+                       }
+               }
+
+               /* If we didn't do any of the above, there's no single buffer
+                * large enough to swap out for the new one, so just evict
+                * everything and start again. (This should be rare.)
+                */
+               if (!list_empty (&dev_priv->mm.inactive_list))
+                       return i915_gem_evict_from_inactive_list(dev);
+               else
+                       return i915_gem_evict_everything(dev);
         }
-       if (ret == -ENOSPC)
-               return 0;
-       return ret;
  }
  
  int
@@ -2080,7 +2230,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
         BUG_ON(obj_priv->pages != NULL);
         obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
         if (obj_priv->pages == NULL) {
-               DRM_ERROR("Faled to allocate page list\n");
                 obj_priv->pages_refcount--;
                 return -ENOMEM;
         }
@@ -2091,7 +2240,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
                 page = read_mapping_page(mapping, i, NULL);
                 if (IS_ERR(page)) {
                         ret = PTR_ERR(page);
-                       DRM_ERROR("read_mapping_page failed: %d\n", ret);
                         i915_gem_object_put_pages(obj);
                         return ret;
                 }
@@ -2328,6 +2476,8 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
         else
                 i830_write_fence_reg(reg);
  
+       trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode);
+
         return 0;
  }
  
@@ -2410,10 +2560,17 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
         drm_i915_private_t *dev_priv = dev->dev_private;
         struct drm_i915_gem_object *obj_priv = obj->driver_private;
         struct drm_mm_node *free_space;
-       int page_count, ret;
+       bool retry_alloc = false;
+       int ret;
  
         if (dev_priv->mm.suspended)
                 return -EBUSY;
+
+       if (obj_priv->madv != I915_MADV_WILLNEED) {
+               DRM_ERROR("Attempting to bind a purgeable object\n");
+               return -EINVAL;
+       }
+
         if (alignment == 0)
                 alignment = i915_gem_get_gtt_alignment(obj);
         if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
@@ -2433,30 +2590,16 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
                 }
         }
         if (obj_priv->gtt_space == NULL) {
-               bool lists_empty;
-
                 /* If the gtt is empty and we're still having trouble
                  * fitting our object in, we're out of memory.
                  */
  #if WATCH_LRU
                 DRM_INFO("%s: GTT full, evicting something\n", __func__);
  #endif
-               spin_lock(&dev_priv->mm.active_list_lock);
-               lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
-                              list_empty(&dev_priv->mm.flushing_list) &&
-                              list_empty(&dev_priv->mm.active_list));
-               spin_unlock(&dev_priv->mm.active_list_lock);
-               if (lists_empty) {
-                       DRM_ERROR("GTT full, but LRU list empty\n");
-                       return -ENOSPC;
-               }
-
-               ret = i915_gem_evict_something(dev);
-               if (ret != 0) {
-                       if (ret != -ERESTARTSYS)
-                               DRM_ERROR("Failed to evict a buffer %d\n", ret);
+               ret = i915_gem_evict_something(dev, obj->size);
+               if (ret)
                         return ret;
-               }
+
                 goto search_free;
         }
  
@@ -2464,27 +2607,56 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
         DRM_INFO("Binding object of size %zd at 0x%08x\n",
                  obj->size, obj_priv->gtt_offset);
  #endif
+       if (retry_alloc) {
+               i915_gem_object_set_page_gfp_mask (obj,
+                                                  i915_gem_object_get_page_gfp_mask (obj) & ~__GFP_NORETRY);
+       }
         ret = i915_gem_object_get_pages(obj);
+       if (retry_alloc) {
+               i915_gem_object_set_page_gfp_mask (obj,
+                                                  i915_gem_object_get_page_gfp_mask (obj) | __GFP_NORETRY);
+       }
         if (ret) {
                 drm_mm_put_block(obj_priv->gtt_space);
                 obj_priv->gtt_space = NULL;
+
+               if (ret == -ENOMEM) {
+                       /* first try to clear up some space from the GTT */
+                       ret = i915_gem_evict_something(dev, obj->size);
+                       if (ret) {
+                               /* now try to shrink everyone else */
+                               if (! retry_alloc) {
+                                   retry_alloc = true;
+                                   goto search_free;
+                               }
+
+                               return ret;
+                       }
+
+                       goto search_free;
+               }
+
                 return ret;
         }
  
-       page_count = obj->size / PAGE_SIZE;
         /* Create an AGP memory structure pointing at our pages, and bind it
          * into the GTT.
          */
         obj_priv->agp_mem = drm_agp_bind_pages(dev,
                                                obj_priv->pages,
-                                              page_count,
+                                              obj->size >> PAGE_SHIFT,
                                                obj_priv->gtt_offset,
                                                obj_priv->agp_type);
         if (obj_priv->agp_mem == NULL) {
                 i915_gem_object_put_pages(obj);
                 drm_mm_put_block(obj_priv->gtt_space);
                 obj_priv->gtt_space = NULL;
-               return -ENOMEM;
+
+               ret = i915_gem_evict_something(dev, obj->size);
+               if (ret)
+                       return ret;
+
+               goto search_free;
         }
         atomic_inc(&dev->gtt_count);
         atomic_add(obj->size, &dev->gtt_memory);
@@ -2496,6 +2668,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
         BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
         BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
  
+       trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
+
         return 0;
  }
  
@@ -2511,15 +2685,7 @@ i915_gem_clflush_object(struct drm_gem_object *obj)
         if (obj_priv->pages == NULL)
                 return;
  
-       /* XXX: The 865 in particular appears to be weird in how it handles
-        * cache flushing.  We haven't figured it out, but the
-        * clflush+agp_chipset_flush doesn't appear to successfully get the
-        * data visible to the PGU, while wbinvd + agp_chipset_flush does.
-        */
-       if (IS_I865G(obj->dev)) {
-               wbinvd();
-               return;
-       }
+       trace_i915_gem_object_clflush(obj);
  
         drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
  }
@@ -2530,21 +2696,29 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
  {
         struct drm_device *dev = obj->dev;
         uint32_t seqno;
+       uint32_t old_write_domain;
  
         if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
                 return;
  
         /* Queue the GPU write cache flushing we need. */
+       old_write_domain = obj->write_domain;
         i915_gem_flush(dev, 0, obj->write_domain);
         seqno = i915_add_request(dev, NULL, obj->write_domain);
         obj->write_domain = 0;
         i915_gem_object_move_to_active(obj, seqno);
+
+       trace_i915_gem_object_change_domain(obj,
+                                           obj->read_domains,
+                                           old_write_domain);
  }
  
  /** Flushes the GTT write domain for the object if it's dirty. */
  static void
  i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
  {
+       uint32_t old_write_domain;
+
         if (obj->write_domain != I915_GEM_DOMAIN_GTT)
                 return;
  
@@ -2552,7 +2726,12 @@ i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
          * to it immediately go to main memory as far as we know, so there's
          * no chipset flush.  It also doesn't land in render cache.
          */
+       old_write_domain = obj->write_domain;
         obj->write_domain = 0;
+
+       trace_i915_gem_object_change_domain(obj,
+                                           obj->read_domains,
+                                           old_write_domain);
  }
  
  /** Flushes the CPU write domain for the object if it's dirty. */
@@ -2560,13 +2739,19 @@ static void
  i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
  {
         struct drm_device *dev = obj->dev;
+       uint32_t old_write_domain;
  
         if (obj->write_domain != I915_GEM_DOMAIN_CPU)
                 return;
  
         i915_gem_clflush_object(obj);
         drm_agp_chipset_flush(dev);
+       old_write_domain = obj->write_domain;
         obj->write_domain = 0;
+
+       trace_i915_gem_object_change_domain(obj,
+                                           obj->read_domains,
+                                           old_write_domain);
  }
  
  /**
@@ -2579,6 +2764,7 @@ int
  i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
  {
         struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       uint32_t old_write_domain, old_read_domains;
         int ret;
  
         /* Not valid to be called on unbound objects. */
@@ -2591,6 +2777,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
         if (ret != 0)
                 return ret;
  
+       old_write_domain = obj->write_domain;
+       old_read_domains = obj->read_domains;
+
         /* If we're writing through the GTT domain, then CPU and GPU caches
          * will need to be invalidated at next use.
          */
@@ -2609,6 +2798,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
                 obj_priv->dirty = 1;
         }
  
+       trace_i915_gem_object_change_domain(obj,
+                                           old_read_domains,
+                                           old_write_domain);
+
         return 0;
  }
  
@@ -2621,6 +2814,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
  static int
  i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
  {
+       uint32_t old_write_domain, old_read_domains;
         int ret;
  
         i915_gem_object_flush_gpu_write_domain(obj);
@@ -2636,6 +2830,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
          */
         i915_gem_object_set_to_full_cpu_read_domain(obj);
  
+       old_write_domain = obj->write_domain;
+       old_read_domains = obj->read_domains;
+
         /* Flush the CPU cache if it's still invalid. */
         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
                 i915_gem_clflush_object(obj);
@@ -2656,6 +2853,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
                 obj->write_domain = I915_GEM_DOMAIN_CPU;
         }
  
+       trace_i915_gem_object_change_domain(obj,
+                                           old_read_domains,
+                                           old_write_domain);
+
         return 0;
  }
  
@@ -2777,6 +2978,7 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
         struct drm_i915_gem_object      *obj_priv = obj->driver_private;
         uint32_t                        invalidate_domains = 0;
         uint32_t                        flush_domains = 0;
+       uint32_t                        old_read_domains;
  
         BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
         BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
@@ -2823,6 +3025,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
                 i915_gem_clflush_object(obj);
         }
  
+       old_read_domains = obj->read_domains;
+
         /* The actual obj->write_domain will be updated with
          * pending_write_domain after we emit the accumulated flush for all
          * of our domain changes in execbuffers (which clears objects'
@@ -2841,6 +3045,10 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
                  obj->read_domains, obj->write_domain,
                  dev->invalidate_domains, dev->flush_domains);
  #endif
+
+       trace_i915_gem_object_change_domain(obj,
+                                           old_read_domains,
+                                           obj->write_domain);
  }
  
  /**
@@ -2893,6 +3101,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
                                           uint64_t offset, uint64_t size)
  {
         struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       uint32_t old_read_domains;
         int i, ret;
  
         if (offset == 0 && size == obj->size)
@@ -2939,8 +3148,13 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
          */
         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
  
+       old_read_domains = obj->read_domains;
         obj->read_domains |= I915_GEM_DOMAIN_CPU;
  
+       trace_i915_gem_object_change_domain(obj,
+                                           old_read_domains,
+                                           obj->write_domain);
+
         return 0;
  }
  
@@ -2984,6 +3198,21 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
                 }
                 target_obj_priv = target_obj->driver_private;
  
+#if WATCH_RELOC
+               DRM_INFO("%s: obj %p offset %08x target %d "
+                        "read %08x write %08x gtt %08x "
+                        "presumed %08x delta %08x\n",
+                        __func__,
+                        obj,
+                        (int) reloc->offset,
+                        (int) reloc->target_handle,
+                        (int) reloc->read_domains,
+                        (int) reloc->write_domain,
+                        (int) target_obj_priv->gtt_offset,
+                        (int) reloc->presumed_offset,
+                        reloc->delta);
+#endif
+
                 /* The target buffer should have appeared before us in the
                  * exec_object list, so it should have a GTT space bound by now.
                  */
@@ -2995,25 +3224,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
                         return -EINVAL;
                 }
  
-               if (reloc->offset > obj->size - 4) {
-                       DRM_ERROR("Relocation beyond object bounds: "
-                                 "obj %p target %d offset %d size %d.\n",
-                                 obj, reloc->target_handle,
-                                 (int) reloc->offset, (int) obj->size);
-                       drm_gem_object_unreference(target_obj);
-                       i915_gem_object_unpin(obj);
-                       return -EINVAL;
-               }
-               if (reloc->offset & 3) {
-                       DRM_ERROR("Relocation not 4-byte aligned: "
-                                 "obj %p target %d offset %d.\n",
-                                 obj, reloc->target_handle,
-                                 (int) reloc->offset);
-                       drm_gem_object_unreference(target_obj);
-                       i915_gem_object_unpin(obj);
-                       return -EINVAL;
-               }
-
+               /* Validate that the target is in a valid r/w GPU domain */
                 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
                     reloc->read_domains & I915_GEM_DOMAIN_CPU) {
                         DRM_ERROR("reloc with read/write CPU domains: "
@@ -3027,7 +3238,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
                         i915_gem_object_unpin(obj);
                         return -EINVAL;
                 }
-
                 if (reloc->write_domain && target_obj->pending_write_domain &&
                     reloc->write_domain != target_obj->pending_write_domain) {
                         DRM_ERROR("Write domain conflict: "
@@ -3042,21 +3252,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
                         return -EINVAL;
                 }
  
-#if WATCH_RELOC
-               DRM_INFO("%s: obj %p offset %08x target %d "
-                        "read %08x write %08x gtt %08x "
-                        "presumed %08x delta %08x\n",
-                        __func__,
-                        obj,
-                        (int) reloc->offset,
-                        (int) reloc->target_handle,
-                        (int) reloc->read_domains,
-                        (int) reloc->write_domain,
-                        (int) target_obj_priv->gtt_offset,
-                        (int) reloc->presumed_offset,
-                        reloc->delta);
-#endif
-
                 target_obj->pending_read_domains |= reloc->read_domains;
                 target_obj->pending_write_domain |= reloc->write_domain;
  
@@ -3068,6 +3263,37 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
                         continue;
                 }
  
+               /* Check that the relocation address is valid... */
+               if (reloc->offset > obj->size - 4) {
+                       DRM_ERROR("Relocation beyond object bounds: "
+                                 "obj %p target %d offset %d size %d.\n",
+                                 obj, reloc->target_handle,
+                                 (int) reloc->offset, (int) obj->size);
+                       drm_gem_object_unreference(target_obj);
+                       i915_gem_object_unpin(obj);
+                       return -EINVAL;
+               }
+               if (reloc->offset & 3) {
+                       DRM_ERROR("Relocation not 4-byte aligned: "
+                                 "obj %p target %d offset %d.\n",
+                                 obj, reloc->target_handle,
+                                 (int) reloc->offset);
+                       drm_gem_object_unreference(target_obj);
+                       i915_gem_object_unpin(obj);
+                       return -EINVAL;
+               }
+
+               /* and points to somewhere within the target object. */
+               if (reloc->delta >= target_obj->size) {
+                       DRM_ERROR("Relocation beyond target object bounds: "
+                                 "obj %p target %d delta %d size %d.\n",
+                                 obj, reloc->target_handle,
+                                 (int) reloc->delta, (int) target_obj->size);
+                       drm_gem_object_unreference(target_obj);
+                       i915_gem_object_unpin(obj);
+                       return -EINVAL;
+               }
+
                 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
                 if (ret != 0) {
                         drm_gem_object_unreference(target_obj);
@@ -3126,6 +3352,8 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
         exec_len = (uint32_t) exec->batch_len;
  
+       trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno);
+
         count = nbox ? nbox : 1;
  
         for (i = 0; i < count; i++) {
@@ -3363,7 +3591,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
  
         i915_verify_inactive(dev, __FILE__, __LINE__);
  
-       if (dev_priv->mm.wedged) {
+       if (atomic_read(&dev_priv->mm.wedged)) {
                 DRM_ERROR("Execbuf while wedged\n");
                 mutex_unlock(&dev->struct_mutex);
                 ret = -EIO;
@@ -3421,8 +3649,23 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
  
                 /* error other than GTT full, or we've already tried again */
                 if (ret != -ENOSPC || pin_tries >= 1) {
-                       if (ret != -ERESTARTSYS)
-                               DRM_ERROR("Failed to pin buffers %d\n", ret);
+                       if (ret != -ERESTARTSYS) {
+                               unsigned long long total_size = 0;
+                               for (i = 0; i < args->buffer_count; i++)
+                                       total_size += object_list[i]->size;
+                               DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes: %d\n",
+                                         pinned+1, args->buffer_count,
+                                         total_size, ret);
+                               DRM_ERROR("%d objects [%d pinned], "
+                                         "%d object bytes [%d pinned], "
+                                         "%d/%d gtt bytes\n",
+                                         atomic_read(&dev->object_count),
+                                         atomic_read(&dev->pin_count),
+                                         atomic_read(&dev->object_memory),
+                                         atomic_read(&dev->pin_memory),
+                                         atomic_read(&dev->gtt_memory),
+                                         dev->gtt_total);
+                       }
                         goto err;
                 }
  
@@ -3433,7 +3676,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
  
                 /* evict everyone we can from the aperture */
                 ret = i915_gem_evict_everything(dev);
-               if (ret)
+               if (ret && ret != -ENOSPC)
                         goto err;
         }
  
@@ -3489,8 +3732,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
  
         for (i = 0; i < args->buffer_count; i++) {
                 struct drm_gem_object *obj = object_list[i];
+               uint32_t old_write_domain = obj->write_domain;
  
                 obj->write_domain = obj->pending_write_domain;
+               trace_i915_gem_object_change_domain(obj,
+                                                   obj->read_domains,
+                                                   old_write_domain);
         }
  
         i915_verify_inactive(dev, __FILE__, __LINE__);
@@ -3607,11 +3854,8 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
         i915_verify_inactive(dev, __FILE__, __LINE__);
         if (obj_priv->gtt_space == NULL) {
                 ret = i915_gem_object_bind_to_gtt(obj, alignment);
-               if (ret != 0) {
-                       if (ret != -EBUSY && ret != -ERESTARTSYS)
-                               DRM_ERROR("Failure to bind: %d\n", ret);
+               if (ret)
                         return ret;
-               }
         }
         /*
          * Pre-965 chips need a fence register set up in order to
@@ -3691,6 +3935,13 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
         }
         obj_priv = obj->driver_private;
  
+       if (obj_priv->madv != I915_MADV_WILLNEED) {
+               DRM_ERROR("Attempting to pin a purgeable buffer\n");
+               drm_gem_object_unreference(obj);
+               mutex_unlock(&dev->struct_mutex);
+               return -EINVAL;
+       }
+
         if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
                 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
                           args->handle);
@@ -3803,6 +4054,56 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
      return i915_gem_ring_throttle(dev, file_priv);
  }
  
+int
+i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+                      struct drm_file *file_priv)
+{
+       struct drm_i915_gem_madvise *args = data;
+       struct drm_gem_object *obj;
+       struct drm_i915_gem_object *obj_priv;
+
+       switch (args->madv) {
+       case I915_MADV_DONTNEED:
+       case I915_MADV_WILLNEED:
+           break;
+       default:
+           return -EINVAL;
+       }
+
+       obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+       if (obj == NULL) {
+               DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
+                         args->handle);
+               return -EBADF;
+       }
+
+       mutex_lock(&dev->struct_mutex);
+       obj_priv = obj->driver_private;
+
+       if (obj_priv->pin_count) {
+               drm_gem_object_unreference(obj);
+               mutex_unlock(&dev->struct_mutex);
+
+               DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
+               return -EINVAL;
+       }
+
+       if (obj_priv->madv != __I915_MADV_PURGED)
+               obj_priv->madv = args->madv;
+
+       /* if the object is no longer bound, discard its backing storage */
+       if (i915_gem_object_is_purgeable(obj_priv) &&
+           obj_priv->gtt_space == NULL)
+               i915_gem_object_truncate(obj);
+
+       args->retained = obj_priv->madv != __I915_MADV_PURGED;
+
+       drm_gem_object_unreference(obj);
+       mutex_unlock(&dev->struct_mutex);
+
+       return 0;
+}
+
  int i915_gem_init_object(struct drm_gem_object *obj)
  {
         struct drm_i915_gem_object *obj_priv;
@@ -3827,6 +4128,9 @@ int i915_gem_init_object(struct drm_gem_object *obj)
         obj_priv->fence_reg = I915_FENCE_REG_NONE;
         INIT_LIST_HEAD(&obj_priv->list);
         INIT_LIST_HEAD(&obj_priv->fence_list);
+       obj_priv->madv = I915_MADV_WILLNEED;
+
+       trace_i915_gem_object_create(obj);
  
         return 0;
  }
@@ -3836,6 +4140,8 @@ void i915_gem_free_object(struct drm_gem_object *obj)
         struct drm_device *dev = obj->dev;
         struct drm_i915_gem_object *obj_priv = obj->driver_private;
  
+       trace_i915_gem_object_destroy(obj);
+
         while (obj_priv->pin_count > 0)
                 i915_gem_object_unpin(obj);
  
@@ -3844,43 +4150,35 @@ void i915_gem_free_object(struct drm_gem_object *obj)
  
         i915_gem_object_unbind(obj);
  
-       i915_gem_free_mmap_offset(obj);
+       if (obj_priv->mmap_offset)
+               i915_gem_free_mmap_offset(obj);
  
         kfree(obj_priv->page_cpu_valid);
         kfree(obj_priv->bit_17);
         kfree(obj->driver_private);
  }
  
-/** Unbinds all objects that are on the given buffer list. */
+/** Unbinds all inactive objects. */
  static int
-i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
+i915_gem_evict_from_inactive_list(struct drm_device *dev)
  {
-       struct drm_gem_object *obj;
-       struct drm_i915_gem_object *obj_priv;
-       int ret;
+       drm_i915_private_t *dev_priv = dev->dev_private;
  
-       while (!list_empty(head)) {
-               obj_priv = list_first_entry(head,
-                                           struct drm_i915_gem_object,
-                                           list);
-               obj = obj_priv->obj;
+       while (!list_empty(&dev_priv->mm.inactive_list)) {
+               struct drm_gem_object *obj;
+               int ret;
  
-               if (obj_priv->pin_count != 0) {
-                       DRM_ERROR("Pinned object in unbind list\n");
-                       mutex_unlock(&dev->struct_mutex);
-                       return -EINVAL;
-               }
+               obj = list_first_entry(&dev_priv->mm.inactive_list,
+                                      struct drm_i915_gem_object,
+                                      list)->obj;
  
                 ret = i915_gem_object_unbind(obj);
                 if (ret != 0) {
-                       DRM_ERROR("Error unbinding object in LeaveVT: %d\n",
-                                 ret);
-                       mutex_unlock(&dev->struct_mutex);
+                       DRM_ERROR("Error unbinding object: %d\n", ret);
                         return ret;
                 }
         }
  
-
         return 0;
  }
  
@@ -3902,6 +4200,7 @@ i915_gem_idle(struct drm_device *dev)
          * We need to replace this with a semaphore, or something.
          */
         dev_priv->mm.suspended = 1;
+       del_timer(&dev_priv->hangcheck_timer);
  
         /* Cancel the retire work handler, wait for it to finish if running
          */
@@ -3931,7 +4230,7 @@ i915_gem_idle(struct drm_device *dev)
                 if (last_seqno == cur_seqno) {
                         if (stuck++ > 100) {
                                 DRM_ERROR("hardware wedged\n");
-                               dev_priv->mm.wedged = 1;
+                               atomic_set(&dev_priv->mm.wedged, 1);
                                 DRM_WAKEUP(&dev_priv->irq_queue);
                                 break;
                         }
@@ -3944,7 +4243,7 @@ i915_gem_idle(struct drm_device *dev)
         i915_gem_retire_requests(dev);
  
         spin_lock(&dev_priv->mm.active_list_lock);
-       if (!dev_priv->mm.wedged) {
+       if (!atomic_read(&dev_priv->mm.wedged)) {
                 /* Active and flushing should now be empty as we've
                  * waited for a sequence higher than any pending execbuffer
                  */
@@ -3962,29 +4261,41 @@ i915_gem_idle(struct drm_device *dev)
          * the GPU domains and just stuff them onto inactive.
          */
         while (!list_empty(&dev_priv->mm.active_list)) {
-               struct drm_i915_gem_object *obj_priv;
+               struct drm_gem_object *obj;
+               uint32_t old_write_domain;
  
-               obj_priv = list_first_entry(&dev_priv->mm.active_list,
-                                           struct drm_i915_gem_object,
-                                           list);
-               obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
-               i915_gem_object_move_to_inactive(obj_priv->obj);
+               obj = list_first_entry(&dev_priv->mm.active_list,
+                                      struct drm_i915_gem_object,
+                                      list)->obj;
+               old_write_domain = obj->write_domain;
+               obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
+               i915_gem_object_move_to_inactive(obj);
+
+               trace_i915_gem_object_change_domain(obj,
+                                                   obj->read_domains,
+                                                   old_write_domain);
         }
         spin_unlock(&dev_priv->mm.active_list_lock);
  
         while (!list_empty(&dev_priv->mm.flushing_list)) {
-               struct drm_i915_gem_object *obj_priv;
+               struct drm_gem_object *obj;
+               uint32_t old_write_domain;
  
-               obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
-                                           struct drm_i915_gem_object,
-                                           list);
-               obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
-               i915_gem_object_move_to_inactive(obj_priv->obj);
+               obj = list_first_entry(&dev_priv->mm.flushing_list,
+                                      struct drm_i915_gem_object,
+                                      list)->obj;
+               old_write_domain = obj->write_domain;
+               obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
+               i915_gem_object_move_to_inactive(obj);
+
+               trace_i915_gem_object_change_domain(obj,
+                                                   obj->read_domains,
+                                                   old_write_domain);
         }
  
  
         /* Move all inactive buffers out of the GTT. */
-       ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list);
+       ret = i915_gem_evict_from_inactive_list(dev);
         WARN_ON(!list_empty(&dev_priv->mm.inactive_list));
         if (ret) {
                 mutex_unlock(&dev->struct_mutex);
@@ -4206,9 +4517,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
         if (drm_core_check_feature(dev, DRIVER_MODESET))
                 return 0;
  
-       if (dev_priv->mm.wedged) {
+       if (atomic_read(&dev_priv->mm.wedged)) {
                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
-               dev_priv->mm.wedged = 0;
+               atomic_set(&dev_priv->mm.wedged, 0);
         }
  
         mutex_lock(&dev->struct_mutex);
@@ -4274,6 +4585,10 @@ i915_gem_load(struct drm_device *dev)
                           i915_gem_retire_work_handler);
         dev_priv->mm.next_gem_seqno = 1;
  
+       spin_lock(&shrink_list_lock);
+       list_add(&dev_priv->mm.shrink_list, &shrink_list);
+       spin_unlock(&shrink_list_lock);
+
         /* Old X drivers will take 0-2 for front, back, depth buffers */
         dev_priv->fence_reg_start = 3;
  
@@ -4491,3 +4806,116 @@ void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
                 list_del_init(i915_file_priv->mm.request_list.next);
         mutex_unlock(&dev->struct_mutex);
  }
+
+static int
+i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask)
+{
+       drm_i915_private_t *dev_priv, *next_dev;
+       struct drm_i915_gem_object *obj_priv, *next_obj;
+       int cnt = 0;
+       int would_deadlock = 1;
+
+       /* "fast-path" to count number of available objects */
+       if (nr_to_scan == 0) {
+               spin_lock(&shrink_list_lock);
+               list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
+                       struct drm_device *dev = dev_priv->dev;
+
+                       if (mutex_trylock(&dev->struct_mutex)) {
+                               list_for_each_entry(obj_priv,
+                                                   &dev_priv->mm.inactive_list,
+                                                   list)
+                                       cnt++;
+                               mutex_unlock(&dev->struct_mutex);
+                       }
+               }
+               spin_unlock(&shrink_list_lock);
+
+               return (cnt / 100) * sysctl_vfs_cache_pressure;
+       }
+
+       spin_lock(&shrink_list_lock);
+
+       /* first scan for clean buffers */
+       list_for_each_entry_safe(dev_priv, next_dev,
+                                &shrink_list, mm.shrink_list) {
+               struct drm_device *dev = dev_priv->dev;
+
+               if (! mutex_trylock(&dev->struct_mutex))
+                       continue;
+
+               spin_unlock(&shrink_list_lock);
+
+               i915_gem_retire_requests(dev);
+
+               list_for_each_entry_safe(obj_priv, next_obj,
+                                        &dev_priv->mm.inactive_list,
+                                        list) {
+                       if (i915_gem_object_is_purgeable(obj_priv)) {
+                               i915_gem_object_unbind(obj_priv->obj);
+                               if (--nr_to_scan <= 0)
+                                       break;
+                       }
+               }
+
+               spin_lock(&shrink_list_lock);
+               mutex_unlock(&dev->struct_mutex);
+
+               would_deadlock = 0;
+
+               if (nr_to_scan <= 0)
+                       break;
+       }
+
+       /* second pass, evict/count anything still on the inactive list */
+       list_for_each_entry_safe(dev_priv, next_dev,
+                                &shrink_list, mm.shrink_list) {
+               struct drm_device *dev = dev_priv->dev;
+
+               if (! mutex_trylock(&dev->struct_mutex))
+                       continue;
+
+               spin_unlock(&shrink_list_lock);
+
+               list_for_each_entry_safe(obj_priv, next_obj,
+                                        &dev_priv->mm.inactive_list,
+                                        list) {
+                       if (nr_to_scan > 0) {
+                               i915_gem_object_unbind(obj_priv->obj);
+                               nr_to_scan--;
+                       } else
+                               cnt++;
+               }
+
+               spin_lock(&shrink_list_lock);
+               mutex_unlock(&dev->struct_mutex);
+
+               would_deadlock = 0;
+       }
+
+       spin_unlock(&shrink_list_lock);
+
+       if (would_deadlock)
+               return -1;
+       else if (cnt > 0)
+               return (cnt / 100) * sysctl_vfs_cache_pressure;
+       else
+               return 0;
+}
+
+static struct shrinker shrinker = {
+       .shrink = i915_gem_shrink,
+       .seeks = DEFAULT_SEEKS,
+};
+
+__init void
+i915_gem_shrinker_init(void)
+{
+    register_shrinker(&shrinker);
+}
+
+__exit void
+i915_gem_shrinker_exit(void)
+{
+    unregister_shrinker(&shrinker);
+}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c

index 6c89f2ff24956867a3376b97967f557491caaca3..4dfeec7cdd42c9a0e0c8b1832cc6c29ae4efa6e2 100644 (file)
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -31,6 +31,7 @@
  #include "drm.h"
  #include "i915_drm.h"
  #include "i915_drv.h"
+#include "i915_trace.h"
  #include "intel_drv.h"
  
  #define MAX_NOPID ((u32)~0)
@@ -279,7 +280,9 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev)
                 }
  
                 if (gt_iir & GT_USER_INTERRUPT) {
-                       dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
+                       u32 seqno = i915_get_gem_seqno(dev);
+                       dev_priv->mm.irq_gem_seqno = seqno;
+                       trace_i915_gem_request_complete(dev, seqno);
                         DRM_WAKEUP(&dev_priv->irq_queue);
                 }
  
@@ -302,12 +305,25 @@ static void i915_error_work_func(struct work_struct *work)
         drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
                                                     error_work);
         struct drm_device *dev = dev_priv->dev;
-       char *event_string = "ERROR=1";
-       char *envp[] = { event_string, NULL };
+       char *error_event[] = { "ERROR=1", NULL };
+       char *reset_event[] = { "RESET=1", NULL };
+       char *reset_done_event[] = { "ERROR=0", NULL };
  
         DRM_DEBUG("generating error event\n");
-
-       kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp);
+       kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
+
+       if (atomic_read(&dev_priv->mm.wedged)) {
+               if (IS_I965G(dev)) {
+                       DRM_DEBUG("resetting chip\n");
+                       kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event);
+                       if (!i965_reset(dev, GDRST_RENDER)) {
+                               atomic_set(&dev_priv->mm.wedged, 0);
+                               kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event);
+                       }
+               } else {
+                       printk("reboot required\n");
+               }
+       }
  }
  
  /**
@@ -372,7 +388,7 @@ out:
   * so userspace knows something bad happened (should trigger collection
   * of a ring dump etc.).
   */
-static void i915_handle_error(struct drm_device *dev)
+static void i915_handle_error(struct drm_device *dev, bool wedged)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
         u32 eir = I915_READ(EIR);
@@ -482,6 +498,16 @@ static void i915_handle_error(struct drm_device *dev)
                 I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
         }
  
+       if (wedged) {
+               atomic_set(&dev_priv->mm.wedged, 1);
+
+               /*
+                * Wakeup waiting processes so they don't hang
+                */
+               printk("i915: Waking up sleeping processes\n");
+               DRM_WAKEUP(&dev_priv->irq_queue);
+       }
+
         queue_work(dev_priv->wq, &dev_priv->error_work);
  }
  
@@ -527,7 +553,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
                 pipeb_stats = I915_READ(PIPEBSTAT);
  
                 if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
-                       i915_handle_error(dev);
+                       i915_handle_error(dev, false);
  
                 /*
                  * Clear the PIPE(A|B)STAT regs before the IIR
@@ -599,8 +625,12 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
                 }
  
                 if (iir & I915_USER_INTERRUPT) {
-                       dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
+                       u32 seqno = i915_get_gem_seqno(dev);
+                       dev_priv->mm.irq_gem_seqno = seqno;
+                       trace_i915_gem_request_complete(dev, seqno);
                         DRM_WAKEUP(&dev_priv->irq_queue);
+                       dev_priv->hangcheck_count = 0;
+                       mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
                 }
  
                 if (pipea_stats & vblank_status) {
@@ -880,6 +910,52 @@ int i915_vblank_swap(struct drm_device *dev, void *data,
         return -EINVAL;
  }
  
+struct drm_i915_gem_request *i915_get_tail_request(struct drm_device *dev) {
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       return list_entry(dev_priv->mm.request_list.prev, struct drm_i915_gem_request, list);
+}
+
+/**
+ * This is called when the chip hasn't reported back with completed
+ * batchbuffers in a long time. The first time this is called we simply record
+ * ACTHD. If ACTHD hasn't changed by the time the hangcheck timer elapses
+ * again, we assume the chip is wedged and try to fix it.
+ */
+void i915_hangcheck_elapsed(unsigned long data)
+{
+       struct drm_device *dev = (struct drm_device *)data;
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       uint32_t acthd;
+       
+       if (!IS_I965G(dev))
+               acthd = I915_READ(ACTHD);
+       else
+               acthd = I915_READ(ACTHD_I965);
+
+       /* If all work is done then ACTHD clearly hasn't advanced. */
+       if (list_empty(&dev_priv->mm.request_list) ||
+                      i915_seqno_passed(i915_get_gem_seqno(dev), i915_get_tail_request(dev)->seqno)) {
+               dev_priv->hangcheck_count = 0;
+               return;
+       }
+
+       if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) {
+               DRM_ERROR("Hangcheck timer elapsed... GPU hung\n");
+               i915_handle_error(dev, true);
+               return;
+       } 
+
+       /* Reset timer case chip hangs without another request being added */
+       mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
+
+       if (acthd != dev_priv->last_acthd)
+               dev_priv->hangcheck_count = 0;
+       else
+               dev_priv->hangcheck_count++;
+
+       dev_priv->last_acthd = acthd;
+}
+
  /* drm_dma.h hooks
  */
  static void igdng_irq_preinstall(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_opregion.c b/drivers/gpu/drm/i915/i915_opregion.c

index e4b4e8898e39a20a451cd97a5b57b13d9e63561b..2d5193556d3f48315714d190c686325e950644b0 100644 (file)
--- a/drivers/gpu/drm/i915/i915_opregion.c
+++ b/drivers/gpu/drm/i915/i915_opregion.c
@@ -148,6 +148,7 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
         struct drm_i915_private *dev_priv = dev->dev_private;
         struct opregion_asle *asle = dev_priv->opregion.asle;
         u32 blc_pwm_ctl, blc_pwm_ctl2;
+       u32 max_backlight, level, shift;
  
         if (!(bclp & ASLE_BCLP_VALID))
                 return ASLE_BACKLIGHT_FAIL;
@@ -157,14 +158,25 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
                 return ASLE_BACKLIGHT_FAIL;
  
         blc_pwm_ctl = I915_READ(BLC_PWM_CTL);
-       blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK;
         blc_pwm_ctl2 = I915_READ(BLC_PWM_CTL2);
  
-       if (blc_pwm_ctl2 & BLM_COMBINATION_MODE)
+       if (IS_I965G(dev) && (blc_pwm_ctl2 & BLM_COMBINATION_MODE))
                 pci_write_config_dword(dev->pdev, PCI_LBPC, bclp);
-       else
-               I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | ((bclp * 0x101)-1));
-
+       else {
+               if (IS_IGD(dev)) {
+                       blc_pwm_ctl &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1);
+                       max_backlight = (blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >> 
+                                       BACKLIGHT_MODULATION_FREQ_SHIFT;
+                       shift = BACKLIGHT_DUTY_CYCLE_SHIFT + 1;
+               } else {
+                       blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK;
+                       max_backlight = ((blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >> 
+                                       BACKLIGHT_MODULATION_FREQ_SHIFT) * 2;
+                       shift = BACKLIGHT_DUTY_CYCLE_SHIFT;
+               }
+               level = (bclp * max_backlight) / 255;
+               I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | (level << shift));
+       }
         asle->cblv = (bclp*0x64)/0xff | ASLE_CBLV_VALID;
  
         return 0;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h

index 3f796355346403f4cd868534b39b29866d3bd508..0466ddbeba3297e9d774665be2840d9098a29a82 100644 (file)
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -86,6 +86,10 @@
  #define   I915_GC_RENDER_CLOCK_200_MHZ (1 << 0)
  #define   I915_GC_RENDER_CLOCK_333_MHZ (4 << 0)
  #define LBB    0xf4
+#define GDRST 0xc0
+#define  GDRST_FULL    (0<<2)
+#define  GDRST_RENDER  (1<<2)
+#define  GDRST_MEDIA   (3<<2)
  
  /* VGA stuff */
  
@@ -344,9 +348,37 @@
  #define   FBC_CTL_PLANEA       (0<<0)
  #define   FBC_CTL_PLANEB       (1<<0)
  #define FBC_FENCE_OFF          0x0321b
+#define FBC_TAG                        0x03300
  
  #define FBC_LL_SIZE            (1536)
  
+/* Framebuffer compression for GM45+ */
+#define DPFC_CB_BASE           0x3200
+#define DPFC_CONTROL           0x3208
+#define   DPFC_CTL_EN          (1<<31)
+#define   DPFC_CTL_PLANEA      (0<<30)
+#define   DPFC_CTL_PLANEB      (1<<30)
+#define   DPFC_CTL_FENCE_EN    (1<<29)
+#define   DPFC_SR_EN           (1<<10)
+#define   DPFC_CTL_LIMIT_1X    (0<<6)
+#define   DPFC_CTL_LIMIT_2X    (1<<6)
+#define   DPFC_CTL_LIMIT_4X    (2<<6)
+#define DPFC_RECOMP_CTL                0x320c
+#define   DPFC_RECOMP_STALL_EN (1<<27)
+#define   DPFC_RECOMP_STALL_WM_SHIFT (16)
+#define   DPFC_RECOMP_STALL_WM_MASK (0x07ff0000)
+#define   DPFC_RECOMP_TIMER_COUNT_SHIFT (0)
+#define   DPFC_RECOMP_TIMER_COUNT_MASK (0x0000003f)
+#define DPFC_STATUS            0x3210
+#define   DPFC_INVAL_SEG_SHIFT  (16)
+#define   DPFC_INVAL_SEG_MASK  (0x07ff0000)
+#define   DPFC_COMP_SEG_SHIFT  (0)
+#define   DPFC_COMP_SEG_MASK   (0x000003ff)
+#define DPFC_STATUS2           0x3214
+#define DPFC_FENCE_YOFF                0x3218
+#define DPFC_CHICKEN           0x3224
+#define   DPFC_HT_MODIFY       (1<<31)
+
  /*
   * GPIO regs
   */
@@ -2000,6 +2032,8 @@
  #define  PF_ENABLE              (1<<31)
  #define PFA_WIN_SZ             0x68074
  #define PFB_WIN_SZ             0x68874
+#define PFA_WIN_POS            0x68070
+#define PFB_WIN_POS            0x68870
  
  /* legacy palette */
  #define LGC_PALETTE_A           0x4a000
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c

index 20d4d19f55687f603879d4eb01eaae0378785c1e..bd6d8d91ca9f7e7f8f2efb046cd0bb3b502af861 100644 (file)
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -228,6 +228,7 @@ static void i915_save_modeset_reg(struct drm_device *dev)
  
         if (drm_core_check_feature(dev, DRIVER_MODESET))
                 return;
+
         /* Pipe & plane A info */
         dev_priv->savePIPEACONF = I915_READ(PIPEACONF);
         dev_priv->savePIPEASRC = I915_READ(PIPEASRC);
@@ -285,6 +286,7 @@ static void i915_save_modeset_reg(struct drm_device *dev)
         dev_priv->savePIPEBSTAT = I915_READ(PIPEBSTAT);
         return;
  }
+
  static void i915_restore_modeset_reg(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
@@ -379,19 +381,10 @@ static void i915_restore_modeset_reg(struct drm_device *dev)
  
         return;
  }
-int i915_save_state(struct drm_device *dev)
+
+void i915_save_display(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
-       int i;
-
-       pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB);
-
-       /* Render Standby */
-       if (IS_I965G(dev) && IS_MOBILE(dev))
-               dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY);
-
-       /* Hardware status page */
-       dev_priv->saveHWS = I915_READ(HWS_PGA);
  
         /* Display arbitration control */
         dev_priv->saveDSPARB = I915_READ(DSPARB);
@@ -399,6 +392,7 @@ int i915_save_state(struct drm_device *dev)
         /* This is only meaningful in non-KMS mode */
         /* Don't save them in KMS mode */
         i915_save_modeset_reg(dev);
+
         /* Cursor state */
         dev_priv->saveCURACNTR = I915_READ(CURACNTR);
         dev_priv->saveCURAPOS = I915_READ(CURAPOS);
@@ -448,81 +442,22 @@ int i915_save_state(struct drm_device *dev)
         dev_priv->saveFBC_CONTROL2 = I915_READ(FBC_CONTROL2);
         dev_priv->saveFBC_CONTROL = I915_READ(FBC_CONTROL);
  
-       /* Interrupt state */
-       dev_priv->saveIIR = I915_READ(IIR);
-       dev_priv->saveIER = I915_READ(IER);
-       dev_priv->saveIMR = I915_READ(IMR);
-
         /* VGA state */
         dev_priv->saveVGA0 = I915_READ(VGA0);
         dev_priv->saveVGA1 = I915_READ(VGA1);
         dev_priv->saveVGA_PD = I915_READ(VGA_PD);
         dev_priv->saveVGACNTRL = I915_READ(VGACNTRL);
  
-       /* Clock gating state */
-       dev_priv->saveD_STATE = I915_READ(D_STATE);
-       dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D);
-
-       /* Cache mode state */
-       dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
-
-       /* Memory Arbitration state */
-       dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE);
-
-       /* Scratch space */
-       for (i = 0; i < 16; i++) {
-               dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2));
-               dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2));
-       }
-       for (i = 0; i < 3; i++)
-               dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2));
-
-       /* Fences */
-       if (IS_I965G(dev)) {
-               for (i = 0; i < 16; i++)
-                       dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
-       } else {
-               for (i = 0; i < 8; i++)
-                       dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
-
-               if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
-                       for (i = 0; i < 8; i++)
-                               dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
-       }
         i915_save_vga(dev);
-
-       return 0;
  }
  
-int i915_restore_state(struct drm_device *dev)
+void i915_restore_display(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
-       int i;
-
-       pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
-
-       /* Render Standby */
-       if (IS_I965G(dev) && IS_MOBILE(dev))
-               I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY);
-
-       /* Hardware status page */
-       I915_WRITE(HWS_PGA, dev_priv->saveHWS);
  
         /* Display arbitration */
         I915_WRITE(DSPARB, dev_priv->saveDSPARB);
  
-       /* Fences */
-       if (IS_I965G(dev)) {
-               for (i = 0; i < 16; i++)
-                       I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]);
-       } else {
-               for (i = 0; i < 8; i++)
-                       I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
-               if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
-                       for (i = 0; i < 8; i++)
-                               I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]);
-       }
-       
         /* Display port ratios (must be done before clock is set) */
         if (SUPPORTS_INTEGRATED_DP(dev)) {
                 I915_WRITE(PIPEA_GMCH_DATA_M, dev_priv->savePIPEA_GMCH_DATA_M);
@@ -534,9 +469,11 @@ int i915_restore_state(struct drm_device *dev)
                 I915_WRITE(PIPEA_DP_LINK_N, dev_priv->savePIPEA_DP_LINK_N);
                 I915_WRITE(PIPEB_DP_LINK_N, dev_priv->savePIPEB_DP_LINK_N);
         }
+
         /* This is only meaningful in non-KMS mode */
         /* Don't restore them in KMS mode */
         i915_restore_modeset_reg(dev);
+
         /* Cursor state */
         I915_WRITE(CURAPOS, dev_priv->saveCURAPOS);
         I915_WRITE(CURACNTR, dev_priv->saveCURACNTR);
@@ -586,6 +523,95 @@ int i915_restore_state(struct drm_device *dev)
         I915_WRITE(VGA_PD, dev_priv->saveVGA_PD);
         DRM_UDELAY(150);
  
+       i915_restore_vga(dev);
+}
+
+int i915_save_state(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int i;
+
+       pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB);
+
+       /* Render Standby */
+       if (IS_I965G(dev) && IS_MOBILE(dev))
+               dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY);
+
+       /* Hardware status page */
+       dev_priv->saveHWS = I915_READ(HWS_PGA);
+
+       i915_save_display(dev);
+
+       /* Interrupt state */
+       dev_priv->saveIER = I915_READ(IER);
+       dev_priv->saveIMR = I915_READ(IMR);
+
+       /* Clock gating state */
+       dev_priv->saveD_STATE = I915_READ(D_STATE);
+       dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D); /* Not sure about this */
+
+       /* Cache mode state */
+       dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
+
+       /* Memory Arbitration state */
+       dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE);
+
+       /* Scratch space */
+       for (i = 0; i < 16; i++) {
+               dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2));
+               dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2));
+       }
+       for (i = 0; i < 3; i++)
+               dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2));
+
+       /* Fences */
+       if (IS_I965G(dev)) {
+               for (i = 0; i < 16; i++)
+                       dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
+       } else {
+               for (i = 0; i < 8; i++)
+                       dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
+
+               if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
+                       for (i = 0; i < 8; i++)
+                               dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
+       }
+
+       return 0;
+}
+
+int i915_restore_state(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int i;
+
+       pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
+
+       /* Render Standby */
+       if (IS_I965G(dev) && IS_MOBILE(dev))
+               I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY);
+
+       /* Hardware status page */
+       I915_WRITE(HWS_PGA, dev_priv->saveHWS);
+
+       /* Fences */
+       if (IS_I965G(dev)) {
+               for (i = 0; i < 16; i++)
+                       I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]);
+       } else {
+               for (i = 0; i < 8; i++)
+                       I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
+               if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
+                       for (i = 0; i < 8; i++)
+                               I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]);
+       }
+
+       i915_restore_display(dev);
+
+       /* Interrupt state */
+       I915_WRITE (IER, dev_priv->saveIER);
+       I915_WRITE (IMR,  dev_priv->saveIMR);
+
         /* Clock gating state */
         I915_WRITE (D_STATE, dev_priv->saveD_STATE);
         I915_WRITE (DSPCLK_GATE_D, dev_priv->saveDSPCLK_GATE_D);
@@ -603,8 +629,6 @@ int i915_restore_state(struct drm_device *dev)
         for (i = 0; i < 3; i++)
                 I915_WRITE(SWF30 + (i << 2), dev_priv->saveSWF2[i]);
  
-       i915_restore_vga(dev);
-
         return 0;
  }
  
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h

new file mode 100644 (file)

index 0000000..5567a40
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -0,0 +1,315 @@
+#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _I915_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include <drm/drmP.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM i915
+#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
+#define TRACE_INCLUDE_FILE i915_trace
+
+/* object tracking */
+
+TRACE_EVENT(i915_gem_object_create,
+
+           TP_PROTO(struct drm_gem_object *obj),
+
+           TP_ARGS(obj),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            __field(u32, size)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          __entry->size = obj->size;
+                          ),
+
+           TP_printk("obj=%p, size=%u", __entry->obj, __entry->size)
+);
+
+TRACE_EVENT(i915_gem_object_bind,
+
+           TP_PROTO(struct drm_gem_object *obj, u32 gtt_offset),
+
+           TP_ARGS(obj, gtt_offset),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            __field(u32, gtt_offset)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          __entry->gtt_offset = gtt_offset;
+                          ),
+
+           TP_printk("obj=%p, gtt_offset=%08x",
+                     __entry->obj, __entry->gtt_offset)
+);
+
+TRACE_EVENT(i915_gem_object_clflush,
+
+           TP_PROTO(struct drm_gem_object *obj),
+
+           TP_ARGS(obj),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          ),
+
+           TP_printk("obj=%p", __entry->obj)
+);
+
+TRACE_EVENT(i915_gem_object_change_domain,
+
+           TP_PROTO(struct drm_gem_object *obj, uint32_t old_read_domains, uint32_t old_write_domain),
+
+           TP_ARGS(obj, old_read_domains, old_write_domain),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            __field(u32, read_domains)
+                            __field(u32, write_domain)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          __entry->read_domains = obj->read_domains | (old_read_domains << 16);
+                          __entry->write_domain = obj->write_domain | (old_write_domain << 16);
+                          ),
+
+           TP_printk("obj=%p, read=%04x, write=%04x",
+                     __entry->obj,
+                     __entry->read_domains, __entry->write_domain)
+);
+
+TRACE_EVENT(i915_gem_object_get_fence,
+
+           TP_PROTO(struct drm_gem_object *obj, int fence, int tiling_mode),
+
+           TP_ARGS(obj, fence, tiling_mode),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            __field(int, fence)
+                            __field(int, tiling_mode)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          __entry->fence = fence;
+                          __entry->tiling_mode = tiling_mode;
+                          ),
+
+           TP_printk("obj=%p, fence=%d, tiling=%d",
+                     __entry->obj, __entry->fence, __entry->tiling_mode)
+);
+
+TRACE_EVENT(i915_gem_object_unbind,
+
+           TP_PROTO(struct drm_gem_object *obj),
+
+           TP_ARGS(obj),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          ),
+
+           TP_printk("obj=%p", __entry->obj)
+);
+
+TRACE_EVENT(i915_gem_object_destroy,
+
+           TP_PROTO(struct drm_gem_object *obj),
+
+           TP_ARGS(obj),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          ),
+
+           TP_printk("obj=%p", __entry->obj)
+);
+
+/* batch tracing */
+
+TRACE_EVENT(i915_gem_request_submit,
+
+           TP_PROTO(struct drm_device *dev, u32 seqno),
+
+           TP_ARGS(dev, seqno),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            __field(u32, seqno)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          __entry->seqno = seqno;
+                          ),
+
+           TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_flush,
+
+           TP_PROTO(struct drm_device *dev, u32 seqno,
+                    u32 flush_domains, u32 invalidate_domains),
+
+           TP_ARGS(dev, seqno, flush_domains, invalidate_domains),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            __field(u32, seqno)
+                            __field(u32, flush_domains)
+                            __field(u32, invalidate_domains)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          __entry->seqno = seqno;
+                          __entry->flush_domains = flush_domains;
+                          __entry->invalidate_domains = invalidate_domains;
+                          ),
+
+           TP_printk("dev=%p, seqno=%u, flush=%04x, invalidate=%04x",
+                     __entry->dev, __entry->seqno,
+                     __entry->flush_domains, __entry->invalidate_domains)
+);
+
+
+TRACE_EVENT(i915_gem_request_complete,
+
+           TP_PROTO(struct drm_device *dev, u32 seqno),
+
+           TP_ARGS(dev, seqno),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            __field(u32, seqno)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          __entry->seqno = seqno;
+                          ),
+
+           TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_retire,
+
+           TP_PROTO(struct drm_device *dev, u32 seqno),
+
+           TP_ARGS(dev, seqno),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            __field(u32, seqno)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          __entry->seqno = seqno;
+                          ),
+
+           TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_wait_begin,
+
+           TP_PROTO(struct drm_device *dev, u32 seqno),
+
+           TP_ARGS(dev, seqno),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            __field(u32, seqno)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          __entry->seqno = seqno;
+                          ),
+
+           TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_wait_end,
+
+           TP_PROTO(struct drm_device *dev, u32 seqno),
+
+           TP_ARGS(dev, seqno),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            __field(u32, seqno)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          __entry->seqno = seqno;
+                          ),
+
+           TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_ring_wait_begin,
+
+           TP_PROTO(struct drm_device *dev),
+
+           TP_ARGS(dev),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          ),
+
+           TP_printk("dev=%p", __entry->dev)
+);
+
+TRACE_EVENT(i915_ring_wait_end,
+
+           TP_PROTO(struct drm_device *dev),
+
+           TP_ARGS(dev),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          ),
+
+           TP_printk("dev=%p", __entry->dev)
+);
+
+#endif /* _I915_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/i915/i915_trace_points.c b/drivers/gpu/drm/i915/i915_trace_points.c

new file mode 100644 (file)

index 0000000..ead876e
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_trace_points.c
@@ -0,0 +1,11 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ */
+
+#include "i915_drv.h"
+
+#define CREATE_TRACE_POINTS
+#include "i915_trace.h"
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c

index 1e28c1652fd03f48633b3eb2674460b760487530..4337414846b6eb3dc6b4704f7817a75520ff36fa 100644 (file)
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -217,6 +217,9 @@ parse_general_features(struct drm_i915_private *dev_priv,
                         if (IS_I85X(dev_priv->dev))
                                 dev_priv->lvds_ssc_freq =
                                         general->ssc_freq ? 66 : 48;
+                       else if (IS_IGDNG(dev_priv->dev))
+                               dev_priv->lvds_ssc_freq =
+                                       general->ssc_freq ? 100 : 120;
                         else
                                 dev_priv->lvds_ssc_freq =
                                         general->ssc_freq ? 100 : 96;
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c

index 88814fa2dfd213427d556d091aa36f933bf08980..212e22740fc123e4a569a1e84e7445ae8a9de135 100644 (file)
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -179,13 +179,10 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector)
  {
         struct drm_device *dev = connector->dev;
         struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 adpa, temp;
+       u32 adpa;
         bool ret;
  
-       temp = adpa = I915_READ(PCH_ADPA);
-
-       adpa &= ~ADPA_DAC_ENABLE;
-       I915_WRITE(PCH_ADPA, adpa);
+       adpa = I915_READ(PCH_ADPA);
  
         adpa &= ~ADPA_CRT_HOTPLUG_MASK;
  
@@ -212,8 +209,6 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector)
         else
                 ret = false;
  
-       /* restore origin register */
-       I915_WRITE(PCH_ADPA, temp);
         return ret;
  }
  
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c

index 0227b1652906cc0ad345863f2ead6c8bce6025dc..93ff6c03733e6359b69e720dc48f3193272ce04f 100644 (file)
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -24,6 +24,8 @@
   *     Eric Anholt <eric@anholt.net>
   */
  
+#include <linux/module.h>
+#include <linux/input.h>
  #include <linux/i2c.h>
  #include <linux/kernel.h>
  #include "drmP.h"
@@ -875,7 +877,7 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc,
                                                refclk, best_clock);
  
         if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
-               if ((I915_READ(LVDS) & LVDS_CLKB_POWER_MASK) ==
+               if ((I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) ==
                     LVDS_CLKB_POWER_UP)
                         clock.p2 = limit->p2.p2_fast;
                 else
@@ -952,6 +954,241 @@ intel_wait_for_vblank(struct drm_device *dev)
         mdelay(20);
  }
  
+/* Parameters have changed, update FBC info */
+static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
+{
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_framebuffer *fb = crtc->fb;
+       struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+       struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       int plane, i;
+       u32 fbc_ctl, fbc_ctl2;
+
+       dev_priv->cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE;
+
+       if (fb->pitch < dev_priv->cfb_pitch)
+               dev_priv->cfb_pitch = fb->pitch;
+
+       /* FBC_CTL wants 64B units */
+       dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
+       dev_priv->cfb_fence = obj_priv->fence_reg;
+       dev_priv->cfb_plane = intel_crtc->plane;
+       plane = dev_priv->cfb_plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB;
+
+       /* Clear old tags */
+       for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
+               I915_WRITE(FBC_TAG + (i * 4), 0);
+
+       /* Set it up... */
+       fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | plane;
+       if (obj_priv->tiling_mode != I915_TILING_NONE)
+               fbc_ctl2 |= FBC_CTL_CPU_FENCE;
+       I915_WRITE(FBC_CONTROL2, fbc_ctl2);
+       I915_WRITE(FBC_FENCE_OFF, crtc->y);
+
+       /* enable it... */
+       fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC;
+       fbc_ctl |= (dev_priv->cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
+       fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT;
+       if (obj_priv->tiling_mode != I915_TILING_NONE)
+               fbc_ctl |= dev_priv->cfb_fence;
+       I915_WRITE(FBC_CONTROL, fbc_ctl);
+
+       DRM_DEBUG("enabled FBC, pitch %ld, yoff %d, plane %d, ",
+                 dev_priv->cfb_pitch, crtc->y, dev_priv->cfb_plane);
+}
+
+void i8xx_disable_fbc(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 fbc_ctl;
+
+       if (!I915_HAS_FBC(dev))
+               return;
+
+       /* Disable compression */
+       fbc_ctl = I915_READ(FBC_CONTROL);
+       fbc_ctl &= ~FBC_CTL_EN;
+       I915_WRITE(FBC_CONTROL, fbc_ctl);
+
+       /* Wait for compressing bit to clear */
+       while (I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING)
+               ; /* nothing */
+
+       intel_wait_for_vblank(dev);
+
+       DRM_DEBUG("disabled FBC\n");
+}
+
+static bool i8xx_fbc_enabled(struct drm_crtc *crtc)
+{
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
+}
+
+static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
+{
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_framebuffer *fb = crtc->fb;
+       struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+       struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       int plane = (intel_crtc->plane == 0 ? DPFC_CTL_PLANEA :
+                    DPFC_CTL_PLANEB);
+       unsigned long stall_watermark = 200;
+       u32 dpfc_ctl;
+
+       dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
+       dev_priv->cfb_fence = obj_priv->fence_reg;
+       dev_priv->cfb_plane = intel_crtc->plane;
+
+       dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X;
+       if (obj_priv->tiling_mode != I915_TILING_NONE) {
+               dpfc_ctl |= DPFC_CTL_FENCE_EN | dev_priv->cfb_fence;
+               I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY);
+       } else {
+               I915_WRITE(DPFC_CHICKEN, ~DPFC_HT_MODIFY);
+       }
+
+       I915_WRITE(DPFC_CONTROL, dpfc_ctl);
+       I915_WRITE(DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
+                  (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
+                  (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
+       I915_WRITE(DPFC_FENCE_YOFF, crtc->y);
+
+       /* enable it... */
+       I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN);
+
+       DRM_DEBUG("enabled fbc on plane %d\n", intel_crtc->plane);
+}
+
+void g4x_disable_fbc(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 dpfc_ctl;
+
+       /* Disable compression */
+       dpfc_ctl = I915_READ(DPFC_CONTROL);
+       dpfc_ctl &= ~DPFC_CTL_EN;
+       I915_WRITE(DPFC_CONTROL, dpfc_ctl);
+       intel_wait_for_vblank(dev);
+
+       DRM_DEBUG("disabled FBC\n");
+}
+
+static bool g4x_fbc_enabled(struct drm_crtc *crtc)
+{
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
+}
+
+/**
+ * intel_update_fbc - enable/disable FBC as needed
+ * @crtc: CRTC to point the compressor at
+ * @mode: mode in use
+ *
+ * Set up the framebuffer compression hardware at mode set time.  We
+ * enable it if possible:
+ *   - plane A only (on pre-965)
+ *   - no pixel mulitply/line duplication
+ *   - no alpha buffer discard
+ *   - no dual wide
+ *   - framebuffer <= 2048 in width, 1536 in height
+ *
+ * We can't assume that any compression will take place (worst case),
+ * so the compressed buffer has to be the same size as the uncompressed
+ * one.  It also must reside (along with the line length buffer) in
+ * stolen memory.
+ *
+ * We need to enable/disable FBC on a global basis.
+ */
+static void intel_update_fbc(struct drm_crtc *crtc,
+                            struct drm_display_mode *mode)
+{
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_framebuffer *fb = crtc->fb;
+       struct intel_framebuffer *intel_fb;
+       struct drm_i915_gem_object *obj_priv;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       int plane = intel_crtc->plane;
+
+       if (!i915_powersave)
+               return;
+
+       if (!dev_priv->display.fbc_enabled ||
+           !dev_priv->display.enable_fbc ||
+           !dev_priv->display.disable_fbc)
+               return;
+
+       if (!crtc->fb)
+               return;
+
+       intel_fb = to_intel_framebuffer(fb);
+       obj_priv = intel_fb->obj->driver_private;
+
+       /*
+        * If FBC is already on, we just have to verify that we can
+        * keep it that way...
+        * Need to disable if:
+        *   - changing FBC params (stride, fence, mode)
+        *   - new fb is too large to fit in compressed buffer
+        *   - going to an unsupported config (interlace, pixel multiply, etc.)
+        */
+       if (intel_fb->obj->size > dev_priv->cfb_size) {
+               DRM_DEBUG("framebuffer too large, disabling compression\n");
+               goto out_disable;
+       }
+       if ((mode->flags & DRM_MODE_FLAG_INTERLACE) ||
+           (mode->flags & DRM_MODE_FLAG_DBLSCAN)) {
+               DRM_DEBUG("mode incompatible with compression, disabling\n");
+               goto out_disable;
+       }
+       if ((mode->hdisplay > 2048) ||
+           (mode->vdisplay > 1536)) {
+               DRM_DEBUG("mode too large for compression, disabling\n");
+               goto out_disable;
+       }
+       if ((IS_I915GM(dev) || IS_I945GM(dev)) && plane != 0) {
+               DRM_DEBUG("plane not 0, disabling compression\n");
+               goto out_disable;
+       }
+       if (obj_priv->tiling_mode != I915_TILING_X) {
+               DRM_DEBUG("framebuffer not tiled, disabling compression\n");
+               goto out_disable;
+       }
+
+       if (dev_priv->display.fbc_enabled(crtc)) {
+               /* We can re-enable it in this case, but need to update pitch */
+               if (fb->pitch > dev_priv->cfb_pitch)
+                       dev_priv->display.disable_fbc(dev);
+               if (obj_priv->fence_reg != dev_priv->cfb_fence)
+                       dev_priv->display.disable_fbc(dev);
+               if (plane != dev_priv->cfb_plane)
+                       dev_priv->display.disable_fbc(dev);
+       }
+
+       if (!dev_priv->display.fbc_enabled(crtc)) {
+               /* Now try to turn it back on if possible */
+               dev_priv->display.enable_fbc(crtc, 500);
+       }
+
+       return;
+
+out_disable:
+       DRM_DEBUG("unsupported config, disabling FBC\n");
+       /* Multiple disables should be harmless */
+       if (dev_priv->display.fbc_enabled(crtc))
+               dev_priv->display.disable_fbc(dev);
+}
+
  static int
  intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
                     struct drm_framebuffer *old_fb)
@@ -964,12 +1201,13 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
         struct drm_i915_gem_object *obj_priv;
         struct drm_gem_object *obj;
         int pipe = intel_crtc->pipe;
+       int plane = intel_crtc->plane;
         unsigned long Start, Offset;
-       int dspbase = (pipe == 0 ? DSPAADDR : DSPBADDR);
-       int dspsurf = (pipe == 0 ? DSPASURF : DSPBSURF);
-       int dspstride = (pipe == 0) ? DSPASTRIDE : DSPBSTRIDE;
-       int dsptileoff = (pipe == 0 ? DSPATILEOFF : DSPBTILEOFF);
-       int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
+       int dspbase = (plane == 0 ? DSPAADDR : DSPBADDR);
+       int dspsurf = (plane == 0 ? DSPASURF : DSPBSURF);
+       int dspstride = (plane == 0) ? DSPASTRIDE : DSPBSTRIDE;
+       int dsptileoff = (plane == 0 ? DSPATILEOFF : DSPBTILEOFF);
+       int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
         u32 dspcntr, alignment;
         int ret;
  
@@ -979,12 +1217,12 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
                 return 0;
         }
  
-       switch (pipe) {
+       switch (plane) {
         case 0:
         case 1:
                 break;
         default:
-               DRM_ERROR("Can't update pipe %d in SAREA\n", pipe);
+               DRM_ERROR("Can't update plane %d in SAREA\n", plane);
                 return -EINVAL;
         }
  
@@ -1086,6 +1324,9 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
                 I915_READ(dspbase);
         }
  
+       if ((IS_I965G(dev) || plane == 0))
+               intel_update_fbc(crtc, &crtc->mode);
+
         intel_wait_for_vblank(dev);
  
         if (old_fb) {
@@ -1217,6 +1458,7 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode)
         int transconf_reg = (pipe == 0) ? TRANSACONF : TRANSBCONF;
         int pf_ctl_reg = (pipe == 0) ? PFA_CTL_1 : PFB_CTL_1;
         int pf_win_size = (pipe == 0) ? PFA_WIN_SZ : PFB_WIN_SZ;
+       int pf_win_pos = (pipe == 0) ? PFA_WIN_POS : PFB_WIN_POS;
         int cpu_htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
         int cpu_hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
         int cpu_hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B;
@@ -1268,6 +1510,19 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode)
                         }
                 }
  
+               /* Enable panel fitting for LVDS */
+               if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
+                       temp = I915_READ(pf_ctl_reg);
+                       I915_WRITE(pf_ctl_reg, temp | PF_ENABLE);
+
+                       /* currently full aspect */
+                       I915_WRITE(pf_win_pos, 0);
+
+                       I915_WRITE(pf_win_size,
+                                  (dev_priv->panel_fixed_mode->hdisplay << 16) |
+                                  (dev_priv->panel_fixed_mode->vdisplay));
+               }
+
                 /* Enable CPU pipe */
                 temp = I915_READ(pipeconf_reg);
                 if ((temp & PIPEACONF_ENABLE) == 0) {
@@ -1532,9 +1787,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
         struct drm_i915_private *dev_priv = dev->dev_private;
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
         int pipe = intel_crtc->pipe;
+       int plane = intel_crtc->plane;
         int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
-       int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
-       int dspbase_reg = (pipe == 0) ? DSPAADDR : DSPBADDR;
+       int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
+       int dspbase_reg = (plane == 0) ? DSPAADDR : DSPBADDR;
         int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
         u32 temp;
  
@@ -1577,6 +1833,9 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
  
                 intel_crtc_load_lut(crtc);
  
+               if ((IS_I965G(dev) || plane == 0))
+                       intel_update_fbc(crtc, &crtc->mode);
+
                 /* Give the overlay scaler a chance to enable if it's on this pipe */
                 //intel_crtc_dpms_video(crtc, true); TODO
                 intel_update_watermarks(dev);
@@ -1586,6 +1845,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
                 /* Give the overlay scaler a chance to disable if it's on this pipe */
                 //intel_crtc_dpms_video(crtc, FALSE); TODO
  
+               if (dev_priv->cfb_plane == plane &&
+                   dev_priv->display.disable_fbc)
+                       dev_priv->display.disable_fbc(dev);
+
                 /* Disable the VGA plane that we never use */
                 i915_disable_vga(dev);
  
@@ -1634,15 +1897,13 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
  static void intel_crtc_dpms(struct drm_crtc *crtc, int mode)
  {
         struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
         struct drm_i915_master_private *master_priv;
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
         int pipe = intel_crtc->pipe;
         bool enabled;
  
-       if (IS_IGDNG(dev))
-               igdng_crtc_dpms(crtc, mode);
-       else
-               i9xx_crtc_dpms(crtc, mode);
+       dev_priv->display.dpms(crtc, mode);
  
         intel_crtc->dpms_mode = mode;
  
@@ -1709,56 +1970,68 @@ static bool intel_crtc_mode_fixup(struct drm_crtc *crtc,
         return true;
  }
  
+static int i945_get_display_clock_speed(struct drm_device *dev)
+{
+       return 400000;
+}
  
-/** Returns the core display clock speed for i830 - i945 */
-static int intel_get_core_clock_speed(struct drm_device *dev)
+static int i915_get_display_clock_speed(struct drm_device *dev)
  {
+       return 333000;
+}
  
-       /* Core clock values taken from the published datasheets.
-        * The 830 may go up to 166 Mhz, which we should check.
-        */
-       if (IS_I945G(dev))
-               return 400000;
-       else if (IS_I915G(dev))
-               return 333000;
-       else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev))
-               return 200000;
-       else if (IS_I915GM(dev)) {
-               u16 gcfgc = 0;
+static int i9xx_misc_get_display_clock_speed(struct drm_device *dev)
+{
+       return 200000;
+}
  
-               pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+static int i915gm_get_display_clock_speed(struct drm_device *dev)
+{
+       u16 gcfgc = 0;
  
-               if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
-                       return 133000;
-               else {
-                       switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
-                       case GC_DISPLAY_CLOCK_333_MHZ:
-                               return 333000;
-                       default:
-                       case GC_DISPLAY_CLOCK_190_200_MHZ:
-                               return 190000;
-                       }
-               }
-       } else if (IS_I865G(dev))
-               return 266000;
-       else if (IS_I855(dev)) {
-               u16 hpllcc = 0;
-               /* Assume that the hardware is in the high speed state.  This
-                * should be the default.
-                */
-               switch (hpllcc & GC_CLOCK_CONTROL_MASK) {
-               case GC_CLOCK_133_200:
-               case GC_CLOCK_100_200:
-                       return 200000;
-               case GC_CLOCK_166_250:
-                       return 250000;
-               case GC_CLOCK_100_133:
-                       return 133000;
+       pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+
+       if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
+               return 133000;
+       else {
+               switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
+               case GC_DISPLAY_CLOCK_333_MHZ:
+                       return 333000;
+               default:
+               case GC_DISPLAY_CLOCK_190_200_MHZ:
+                       return 190000;
                 }
-       } else /* 852, 830 */
+       }
+}
+
+static int i865_get_display_clock_speed(struct drm_device *dev)
+{
+       return 266000;
+}
+
+static int i855_get_display_clock_speed(struct drm_device *dev)
+{
+       u16 hpllcc = 0;
+       /* Assume that the hardware is in the high speed state.  This
+        * should be the default.
+        */
+       switch (hpllcc & GC_CLOCK_CONTROL_MASK) {
+       case GC_CLOCK_133_200:
+       case GC_CLOCK_100_200:
+               return 200000;
+       case GC_CLOCK_166_250:
+               return 250000;
+       case GC_CLOCK_100_133:
                 return 133000;
+       }
+
+       /* Shouldn't happen */
+       return 0;
+}
  
-       return 0; /* Silence gcc warning */
+static int i830_get_display_clock_speed(struct drm_device *dev)
+{
+       return 133000;
  }
  
  /**
@@ -1921,7 +2194,14 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
  {
         long entries_required, wm_size;
  
-       entries_required = (clock_in_khz * pixel_size * latency_ns) / 1000000;
+       /*
+        * Note: we need to make sure we don't overflow for various clock &
+        * latency values.
+        * clocks go from a few thousand to several hundred thousand.
+        * latency is usually a few thousand
+        */
+       entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
+               1000;
         entries_required /= wm->cacheline_size;
  
         DRM_DEBUG("FIFO entries required for mode: %d\n", entries_required);
@@ -1986,14 +2266,13 @@ static struct cxsr_latency *intel_get_cxsr_latency(int is_desktop, int fsb,
         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
                 latency = &cxsr_latency_table[i];
                 if (is_desktop == latency->is_desktop &&
-                       fsb == latency->fsb_freq && mem == latency->mem_freq)
-                       break;
+                   fsb == latency->fsb_freq && mem == latency->mem_freq)
+                       return latency;
         }
-       if (i >= ARRAY_SIZE(cxsr_latency_table)) {
-               DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n");
-               return NULL;
-       }
-       return latency;
+
+       DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n");
+
+       return NULL;
  }
  
  static void igd_disable_cxsr(struct drm_device *dev)
@@ -2084,32 +2363,36 @@ static void igd_enable_cxsr(struct drm_device *dev, unsigned long clock,
   */
  const static int latency_ns = 5000;
  
-static int intel_get_fifo_size(struct drm_device *dev, int plane)
+static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
         uint32_t dsparb = I915_READ(DSPARB);
         int size;
  
-       if (IS_I9XX(dev)) {
-               if (plane == 0)
-                       size = dsparb & 0x7f;
-               else
-                       size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) -
-                               (dsparb & 0x7f);
-       } else if (IS_I85X(dev)) {
-               if (plane == 0)
-                       size = dsparb & 0x1ff;
-               else
-                       size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) -
-                               (dsparb & 0x1ff);
-               size >>= 1; /* Convert to cachelines */
-       } else if (IS_845G(dev)) {
+       if (plane == 0)
                 size = dsparb & 0x7f;
-               size >>= 2; /* Convert to cachelines */
-       } else {
-               size = dsparb & 0x7f;
-               size >>= 1; /* Convert to cachelines */
-       }
+       else
+               size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) -
+                       (dsparb & 0x7f);
+
+       DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
+                 size);
+
+       return size;
+}
+
+static int i85x_get_fifo_size(struct drm_device *dev, int plane)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t dsparb = I915_READ(DSPARB);
+       int size;
+
+       if (plane == 0)
+               size = dsparb & 0x1ff;
+       else
+               size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) -
+                       (dsparb & 0x1ff);
+       size >>= 1; /* Convert to cachelines */
  
         DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
                   size);
@@ -2117,7 +2400,38 @@ static int intel_get_fifo_size(struct drm_device *dev, int plane)
         return size;
  }
  
-static void g4x_update_wm(struct drm_device *dev)
+static int i845_get_fifo_size(struct drm_device *dev, int plane)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t dsparb = I915_READ(DSPARB);
+       int size;
+
+       size = dsparb & 0x7f;
+       size >>= 2; /* Convert to cachelines */
+
+       DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
+                 size);
+
+       return size;
+}
+
+static int i830_get_fifo_size(struct drm_device *dev, int plane)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t dsparb = I915_READ(DSPARB);
+       int size;
+
+       size = dsparb & 0x7f;
+       size >>= 1; /* Convert to cachelines */
+
+       DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
+                 size);
+
+       return size;
+}
+
+static void g4x_update_wm(struct drm_device *dev, int unused, int unused2,
+                         int unused3, int unused4)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
         u32 fw_blc_self = I915_READ(FW_BLC_SELF);
@@ -2129,7 +2443,8 @@ static void g4x_update_wm(struct drm_device *dev)
         I915_WRITE(FW_BLC_SELF, fw_blc_self);
  }
  
-static void i965_update_wm(struct drm_device *dev)
+static void i965_update_wm(struct drm_device *dev, int unused, int unused2,
+                          int unused3, int unused4)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
  
@@ -2165,8 +2480,8 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock,
         cacheline_size = planea_params.cacheline_size;
  
         /* Update per-plane FIFO sizes */
-       planea_params.fifo_size = intel_get_fifo_size(dev, 0);
-       planeb_params.fifo_size = intel_get_fifo_size(dev, 1);
+       planea_params.fifo_size = dev_priv->display.get_fifo_size(dev, 0);
+       planeb_params.fifo_size = dev_priv->display.get_fifo_size(dev, 1);
  
         planea_wm = intel_calculate_wm(planea_clock, &planea_params,
                                        pixel_size, latency_ns);
@@ -2213,14 +2528,14 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock,
         I915_WRITE(FW_BLC2, fwater_hi);
  }
  
-static void i830_update_wm(struct drm_device *dev, int planea_clock,
-                          int pixel_size)
+static void i830_update_wm(struct drm_device *dev, int planea_clock, int unused,
+                          int unused2, int pixel_size)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
         uint32_t fwater_lo = I915_READ(FW_BLC) & ~0xfff;
         int planea_wm;
  
-       i830_wm_info.fifo_size = intel_get_fifo_size(dev, 0);
+       i830_wm_info.fifo_size = dev_priv->display.get_fifo_size(dev, 0);
  
         planea_wm = intel_calculate_wm(planea_clock, &i830_wm_info,
                                        pixel_size, latency_ns);
@@ -2264,6 +2579,7 @@ static void i830_update_wm(struct drm_device *dev, int planea_clock,
    */
  static void intel_update_watermarks(struct drm_device *dev)
  {
+       struct drm_i915_private *dev_priv = dev->dev_private;
         struct drm_crtc *crtc;
         struct intel_crtc *intel_crtc;
         int sr_hdisplay = 0;
@@ -2302,15 +2618,8 @@ static void intel_update_watermarks(struct drm_device *dev)
         else if (IS_IGD(dev))
                 igd_disable_cxsr(dev);
  
-       if (IS_G4X(dev))
-               g4x_update_wm(dev);
-       else if (IS_I965G(dev))
-               i965_update_wm(dev);
-       else if (IS_I9XX(dev) || IS_MOBILE(dev))
-               i9xx_update_wm(dev, planea_clock, planeb_clock, sr_hdisplay,
-                              pixel_size);
-       else
-               i830_update_wm(dev, planea_clock, pixel_size);
+       dev_priv->display.update_wm(dev, planea_clock, planeb_clock,
+                                   sr_hdisplay, pixel_size);
  }
  
  static int intel_crtc_mode_set(struct drm_crtc *crtc,
@@ -2323,10 +2632,11 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
         struct drm_i915_private *dev_priv = dev->dev_private;
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
         int pipe = intel_crtc->pipe;
+       int plane = intel_crtc->plane;
         int fp_reg = (pipe == 0) ? FPA0 : FPB0;
         int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
         int dpll_md_reg = (intel_crtc->pipe == 0) ? DPLL_A_MD : DPLL_B_MD;
-       int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
+       int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
         int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
         int htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
         int hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
@@ -2334,8 +2644,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
         int vtot_reg = (pipe == 0) ? VTOTAL_A : VTOTAL_B;
         int vblank_reg = (pipe == 0) ? VBLANK_A : VBLANK_B;
         int vsync_reg = (pipe == 0) ? VSYNC_A : VSYNC_B;
-       int dspsize_reg = (pipe == 0) ? DSPASIZE : DSPBSIZE;
-       int dsppos_reg = (pipe == 0) ? DSPAPOS : DSPBPOS;
+       int dspsize_reg = (plane == 0) ? DSPASIZE : DSPBSIZE;
+       int dsppos_reg = (plane == 0) ? DSPAPOS : DSPBPOS;
         int pipesrc_reg = (pipe == 0) ? PIPEASRC : PIPEBSRC;
         int refclk, num_outputs = 0;
         intel_clock_t clock, reduced_clock;
@@ -2568,7 +2878,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
            enable color space conversion */
         if (!IS_IGDNG(dev)) {
                 if (pipe == 0)
-                       dspcntr |= DISPPLANE_SEL_PIPE_A;
+                       dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
                 else
                         dspcntr |= DISPPLANE_SEL_PIPE_B;
         }
@@ -2580,7 +2890,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
                  * XXX: No double-wide on 915GM pipe B. Is that the only reason for the
                  * pipe == 0 check?
                  */
-               if (mode->clock > intel_get_core_clock_speed(dev) * 9 / 10)
+               if (mode->clock >
+                   dev_priv->display.get_display_clock_speed(dev) * 9 / 10)
                         pipeconf |= PIPEACONF_DOUBLE_WIDE;
                 else
                         pipeconf &= ~PIPEACONF_DOUBLE_WIDE;
@@ -2652,9 +2963,12 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
                 udelay(150);
  
                 if (IS_I965G(dev) && !IS_IGDNG(dev)) {
-                       sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
-                       I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
+                       if (is_sdvo) {
+                               sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
+                               I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
                                         ((sdvo_pixel_multiply - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT));
+                       } else
+                               I915_WRITE(dpll_md_reg, 0);
                 } else {
                         /* write it again -- the BIOS does, after all */
                         I915_WRITE(dpll_reg, dpll);
@@ -2734,6 +3048,9 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
         /* Flush the plane changes */
         ret = intel_pipe_set_base(crtc, x, y, old_fb);
  
+       if ((IS_I965G(dev) || plane == 0))
+               intel_update_fbc(crtc, &crtc->mode);
+
         intel_update_watermarks(dev);
  
         drm_vblank_post_modeset(dev, pipe);
@@ -2778,6 +3095,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
         struct drm_gem_object *bo;
         struct drm_i915_gem_object *obj_priv;
         int pipe = intel_crtc->pipe;
+       int plane = intel_crtc->plane;
         uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR;
         uint32_t base = (pipe == 0) ? CURABASE : CURBBASE;
         uint32_t temp = I915_READ(control);
@@ -2863,6 +3181,10 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
                         i915_gem_object_unpin(intel_crtc->cursor_bo);
                 drm_gem_object_unreference(intel_crtc->cursor_bo);
         }
+
+       if ((IS_I965G(dev) || plane == 0))
+               intel_update_fbc(crtc, &crtc->mode);
+
         mutex_unlock(&dev->struct_mutex);
  
         intel_crtc->cursor_addr = addr;
@@ -3544,6 +3866,14 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
                 intel_crtc->lut_b[i] = i;
         }
  
+       /* Swap pipes & planes for FBC on pre-965 */
+       intel_crtc->pipe = pipe;
+       intel_crtc->plane = pipe;
+       if (IS_MOBILE(dev) && (IS_I9XX(dev) && !IS_I965G(dev))) {
+               DRM_DEBUG("swapping pipes & planes for FBC\n");
+               intel_crtc->plane = ((pipe == 0) ? 1 : 0);
+       }
+
         intel_crtc->cursor_addr = 0;
         intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF;
         drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
@@ -3826,6 +4156,73 @@ void intel_init_clock_gating(struct drm_device *dev)
         }
  }
  
+/* Set up chip specific display functions */
+static void intel_init_display(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       /* We always want a DPMS function */
+       if (IS_IGDNG(dev))
+               dev_priv->display.dpms = igdng_crtc_dpms;
+       else
+               dev_priv->display.dpms = i9xx_crtc_dpms;
+
+       /* Only mobile has FBC, leave pointers NULL for other chips */
+       if (IS_MOBILE(dev)) {
+               if (IS_GM45(dev)) {
+                       dev_priv->display.fbc_enabled = g4x_fbc_enabled;
+                       dev_priv->display.enable_fbc = g4x_enable_fbc;
+                       dev_priv->display.disable_fbc = g4x_disable_fbc;
+               } else if (IS_I965GM(dev) || IS_I945GM(dev) || IS_I915GM(dev)) {
+                       dev_priv->display.fbc_enabled = i8xx_fbc_enabled;
+                       dev_priv->display.enable_fbc = i8xx_enable_fbc;
+                       dev_priv->display.disable_fbc = i8xx_disable_fbc;
+               }
+               /* 855GM needs testing */
+       }
+
+       /* Returns the core display clock speed */
+       if (IS_I945G(dev))
+               dev_priv->display.get_display_clock_speed =
+                       i945_get_display_clock_speed;
+       else if (IS_I915G(dev))
+               dev_priv->display.get_display_clock_speed =
+                       i915_get_display_clock_speed;
+       else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev))
+               dev_priv->display.get_display_clock_speed =
+                       i9xx_misc_get_display_clock_speed;
+       else if (IS_I915GM(dev))
+               dev_priv->display.get_display_clock_speed =
+                       i915gm_get_display_clock_speed;
+       else if (IS_I865G(dev))
+               dev_priv->display.get_display_clock_speed =
+                       i865_get_display_clock_speed;
+       else if (IS_I855(dev))
+               dev_priv->display.get_display_clock_speed =
+                       i855_get_display_clock_speed;
+       else /* 852, 830 */
+               dev_priv->display.get_display_clock_speed =
+                       i830_get_display_clock_speed;
+
+       /* For FIFO watermark updates */
+       if (IS_G4X(dev))
+               dev_priv->display.update_wm = g4x_update_wm;
+       else if (IS_I965G(dev))
+               dev_priv->display.update_wm = i965_update_wm;
+       else if (IS_I9XX(dev) || IS_MOBILE(dev)) {
+               dev_priv->display.update_wm = i9xx_update_wm;
+               dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
+       } else {
+               if (IS_I85X(dev))
+                       dev_priv->display.get_fifo_size = i85x_get_fifo_size;
+               else if (IS_845G(dev))
+                       dev_priv->display.get_fifo_size = i845_get_fifo_size;
+               else
+                       dev_priv->display.get_fifo_size = i830_get_fifo_size;
+               dev_priv->display.update_wm = i830_update_wm;
+       }
+}
+
  void intel_modeset_init(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3839,6 +4236,8 @@ void intel_modeset_init(struct drm_device *dev)
  
         dev->mode_config.funcs = (void *)&intel_mode_funcs;
  
+       intel_init_display(dev);
+
         if (IS_I965G(dev)) {
                 dev->mode_config.max_width = 8192;
                 dev->mode_config.max_height = 8192;
@@ -3904,6 +4303,9 @@ void intel_modeset_cleanup(struct drm_device *dev)
  
         mutex_unlock(&dev->struct_mutex);
  
+       if (dev_priv->display.disable_fbc)
+               dev_priv->display.disable_fbc(dev);
+
         drm_mode_config_cleanup(dev);
  }
  
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h

index 3ebbbabfe59bf413001d27d9dd40d847fb7572c9..8aa4b7f30daa58bea5280bc9569041e4dc25c157 100644 (file)
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -28,6 +28,7 @@
  #include <linux/i2c.h>
  #include <linux/i2c-id.h>
  #include <linux/i2c-algo-bit.h>
+#include "i915_drv.h"
  #include "drm_crtc.h"
  
  #include "drm_crtc_helper.h"
@@ -111,8 +112,8 @@ struct intel_output {
  
  struct intel_crtc {
         struct drm_crtc base;
-       int pipe;
-       int plane;
+       enum pipe pipe;
+       enum plane plane;
         struct drm_gem_object *cursor_bo;
         uint32_t cursor_addr;
         u8 lut_r[256], lut_g[256], lut_b[256];
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c

index dafc0da1c256fa9acccc99f863c21cb6e4676f0c..98ae3d73577ee473ed927065b2c0d6e540c33b36 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -27,6 +27,7 @@
   *      Jesse Barnes <jesse.barnes@intel.com>
   */
  
+#include <acpi/button.h>
  #include <linux/dmi.h>
  #include <linux/i2c.h>
  #include "drmP.h"
@@ -295,6 +296,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder,
                 goto out;
         }
  
+       /* full screen scale for now */
+       if (IS_IGDNG(dev))
+               goto out;
+
         /* 965+ wants fuzzy fitting */
         if (IS_I965G(dev))
                 pfit_control |= (intel_crtc->pipe << PFIT_PIPE_SHIFT) |
@@ -322,8 +327,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder,
          * to register description and PRM.
          * Change the value here to see the borders for debugging
          */
-       I915_WRITE(BCLRPAT_A, 0);
-       I915_WRITE(BCLRPAT_B, 0);
+       if (!IS_IGDNG(dev)) {
+               I915_WRITE(BCLRPAT_A, 0);
+               I915_WRITE(BCLRPAT_B, 0);
+       }
  
         switch (lvds_priv->fitting_mode) {
         case DRM_MODE_SCALE_CENTER:
@@ -572,7 +579,6 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
          * settings.
          */
  
-       /* No panel fitting yet, fixme */
         if (IS_IGDNG(dev))
                 return;
  
@@ -585,15 +591,33 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
         I915_WRITE(PFIT_CONTROL, lvds_priv->pfit_control);
  }
  
+/* Some lid devices report incorrect lid status, assume they're connected */
+static const struct dmi_system_id bad_lid_status[] = {
+       {
+               .ident = "Aspire One",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Aspire one"),
+               },
+       },
+       { }
+};
+
  /**
   * Detect the LVDS connection.
   *
- * This always returns CONNECTOR_STATUS_CONNECTED.  This connector should only have
- * been set up if the LVDS was actually connected anyway.
+ * Since LVDS doesn't have hotlug, we use the lid as a proxy.  Open means
+ * connected and closed means disconnected.  We also send hotplug events as
+ * needed, using lid status notification from the input layer.
   */
  static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector)
  {
-       return connector_status_connected;
+       enum drm_connector_status status = connector_status_connected;
+
+       if (!acpi_lid_open() && !dmi_check_system(bad_lid_status))
+               status = connector_status_disconnected;
+
+       return status;
  }
  
  /**
@@ -632,6 +656,24 @@ static int intel_lvds_get_modes(struct drm_connector *connector)
         return 0;
  }
  
+static int intel_lid_notify(struct notifier_block *nb, unsigned long val,
+                           void *unused)
+{
+       struct drm_i915_private *dev_priv =
+               container_of(nb, struct drm_i915_private, lid_notifier);
+       struct drm_device *dev = dev_priv->dev;
+
+       if (acpi_lid_open() && !dev_priv->suspended) {
+               mutex_lock(&dev->mode_config.mutex);
+               drm_helper_resume_force_mode(dev);
+               mutex_unlock(&dev->mode_config.mutex);
+       }
+
+       drm_sysfs_hotplug_event(dev_priv->dev);
+
+       return NOTIFY_OK;
+}
+
  /**
   * intel_lvds_destroy - unregister and free LVDS structures
   * @connector: connector to free
@@ -641,10 +683,14 @@ static int intel_lvds_get_modes(struct drm_connector *connector)
   */
  static void intel_lvds_destroy(struct drm_connector *connector)
  {
+       struct drm_device *dev = connector->dev;
         struct intel_output *intel_output = to_intel_output(connector);
+       struct drm_i915_private *dev_priv = dev->dev_private;
  
         if (intel_output->ddc_bus)
                 intel_i2c_destroy(intel_output->ddc_bus);
+       if (dev_priv->lid_notifier.notifier_call)
+               acpi_lid_notifier_unregister(&dev_priv->lid_notifier);
         drm_sysfs_connector_remove(connector);
         drm_connector_cleanup(connector);
         kfree(connector);
@@ -1011,6 +1057,11 @@ out:
                 pwm |= PWM_PCH_ENABLE;
                 I915_WRITE(BLC_PWM_PCH_CTL1, pwm);
         }
+       dev_priv->lid_notifier.notifier_call = intel_lid_notify;
+       if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) {
+               DRM_DEBUG("lid notifier registration failed\n");
+               dev_priv->lid_notifier.notifier_call = NULL;
+       }
         drm_sysfs_connector_add(connector);
         return;
  
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c

index 0bf28efcf2c1f1d9ac4eb8a8f4d243e77dfd95d1..083bec2e50f9993d11635ae5d3881f1cbc272a85 100644 (file)
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -135,6 +135,30 @@ struct intel_sdvo_priv {
         struct intel_sdvo_dtd save_input_dtd_1, save_input_dtd_2;
         struct intel_sdvo_dtd save_output_dtd[16];
         u32 save_SDVOX;
+       /* add the property for the SDVO-TV */
+       struct drm_property *left_property;
+       struct drm_property *right_property;
+       struct drm_property *top_property;
+       struct drm_property *bottom_property;
+       struct drm_property *hpos_property;
+       struct drm_property *vpos_property;
+
+       /* add the property for the SDVO-TV/LVDS */
+       struct drm_property *brightness_property;
+       struct drm_property *contrast_property;
+       struct drm_property *saturation_property;
+       struct drm_property *hue_property;
+
+       /* Add variable to record current setting for the above property */
+       u32     left_margin, right_margin, top_margin, bottom_margin;
+       /* this is to get the range of margin.*/
+       u32     max_hscan,  max_vscan;
+       u32     max_hpos, cur_hpos;
+       u32     max_vpos, cur_vpos;
+       u32     cur_brightness, max_brightness;
+       u32     cur_contrast,   max_contrast;
+       u32     cur_saturation, max_saturation;
+       u32     cur_hue,        max_hue;
  };
  
  static bool
@@ -281,6 +305,31 @@ static const struct _sdvo_cmd_name {
      SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT),
      SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT),
      SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS),
+    /* Add the op code for SDVO enhancements */
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_H),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_H),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_H),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_V),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_V),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_V),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SATURATION),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SATURATION),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SATURATION),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HUE),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HUE),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HUE),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_CONTRAST),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CONTRAST),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTRAST),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_BRIGHTNESS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_BRIGHTNESS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_BRIGHTNESS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_H),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_H),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_H),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_V),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_V),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_V),
      /* HDMI op code */
      SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE),
      SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE),
@@ -981,7 +1030,7 @@ static void intel_sdvo_set_tv_format(struct intel_output *output)
  
         status = intel_sdvo_read_response(output, NULL, 0);
         if (status != SDVO_CMD_STATUS_SUCCESS)
-               DRM_DEBUG("%s: Failed to set TV format\n",
+               DRM_DEBUG_KMS("%s: Failed to set TV format\n",
                           SDVO_NAME(sdvo_priv));
  }
  
@@ -1792,6 +1841,45 @@ static int intel_sdvo_get_modes(struct drm_connector *connector)
         return 1;
  }
  
+static
+void intel_sdvo_destroy_enhance_property(struct drm_connector *connector)
+{
+       struct intel_output *intel_output = to_intel_output(connector);
+       struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+       struct drm_device *dev = connector->dev;
+
+       if (sdvo_priv->is_tv) {
+               if (sdvo_priv->left_property)
+                       drm_property_destroy(dev, sdvo_priv->left_property);
+               if (sdvo_priv->right_property)
+                       drm_property_destroy(dev, sdvo_priv->right_property);
+               if (sdvo_priv->top_property)
+                       drm_property_destroy(dev, sdvo_priv->top_property);
+               if (sdvo_priv->bottom_property)
+                       drm_property_destroy(dev, sdvo_priv->bottom_property);
+               if (sdvo_priv->hpos_property)
+                       drm_property_destroy(dev, sdvo_priv->hpos_property);
+               if (sdvo_priv->vpos_property)
+                       drm_property_destroy(dev, sdvo_priv->vpos_property);
+       }
+       if (sdvo_priv->is_tv) {
+               if (sdvo_priv->saturation_property)
+                       drm_property_destroy(dev,
+                                       sdvo_priv->saturation_property);
+               if (sdvo_priv->contrast_property)
+                       drm_property_destroy(dev,
+                                       sdvo_priv->contrast_property);
+               if (sdvo_priv->hue_property)
+                       drm_property_destroy(dev, sdvo_priv->hue_property);
+       }
+       if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
+               if (sdvo_priv->brightness_property)
+                       drm_property_destroy(dev,
+                                       sdvo_priv->brightness_property);
+       }
+       return;
+}
+
  static void intel_sdvo_destroy(struct drm_connector *connector)
  {
         struct intel_output *intel_output = to_intel_output(connector);
@@ -1812,6 +1900,9 @@ static void intel_sdvo_destroy(struct drm_connector *connector)
                 drm_property_destroy(connector->dev,
                                      sdvo_priv->tv_format_property);
  
+       if (sdvo_priv->is_tv || sdvo_priv->is_lvds)
+               intel_sdvo_destroy_enhance_property(connector);
+
         drm_sysfs_connector_remove(connector);
         drm_connector_cleanup(connector);
  
@@ -1829,6 +1920,8 @@ intel_sdvo_set_property(struct drm_connector *connector,
         struct drm_crtc *crtc = encoder->crtc;
         int ret = 0;
         bool changed = false;
+       uint8_t cmd, status;
+       uint16_t temp_value;
  
         ret = drm_connector_property_set_value(connector, property, val);
         if (ret < 0)
@@ -1845,11 +1938,102 @@ intel_sdvo_set_property(struct drm_connector *connector,
  
                 sdvo_priv->tv_format_name = sdvo_priv->tv_format_supported[val];
                 changed = true;
-       } else {
-               ret = -EINVAL;
-               goto out;
         }
  
+       if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
+               cmd = 0;
+               temp_value = val;
+               if (sdvo_priv->left_property == property) {
+                       drm_connector_property_set_value(connector,
+                               sdvo_priv->right_property, val);
+                       if (sdvo_priv->left_margin == temp_value)
+                               goto out;
+
+                       sdvo_priv->left_margin = temp_value;
+                       sdvo_priv->right_margin = temp_value;
+                       temp_value = sdvo_priv->max_hscan -
+                                       sdvo_priv->left_margin;
+                       cmd = SDVO_CMD_SET_OVERSCAN_H;
+               } else if (sdvo_priv->right_property == property) {
+                       drm_connector_property_set_value(connector,
+                               sdvo_priv->left_property, val);
+                       if (sdvo_priv->right_margin == temp_value)
+                               goto out;
+
+                       sdvo_priv->left_margin = temp_value;
+                       sdvo_priv->right_margin = temp_value;
+                       temp_value = sdvo_priv->max_hscan -
+                               sdvo_priv->left_margin;
+                       cmd = SDVO_CMD_SET_OVERSCAN_H;
+               } else if (sdvo_priv->top_property == property) {
+                       drm_connector_property_set_value(connector,
+                               sdvo_priv->bottom_property, val);
+                       if (sdvo_priv->top_margin == temp_value)
+                               goto out;
+
+                       sdvo_priv->top_margin = temp_value;
+                       sdvo_priv->bottom_margin = temp_value;
+                       temp_value = sdvo_priv->max_vscan -
+                                       sdvo_priv->top_margin;
+                       cmd = SDVO_CMD_SET_OVERSCAN_V;
+               } else if (sdvo_priv->bottom_property == property) {
+                       drm_connector_property_set_value(connector,
+                               sdvo_priv->top_property, val);
+                       if (sdvo_priv->bottom_margin == temp_value)
+                               goto out;
+                       sdvo_priv->top_margin = temp_value;
+                       sdvo_priv->bottom_margin = temp_value;
+                       temp_value = sdvo_priv->max_vscan -
+                                       sdvo_priv->top_margin;
+                       cmd = SDVO_CMD_SET_OVERSCAN_V;
+               } else if (sdvo_priv->hpos_property == property) {
+                       if (sdvo_priv->cur_hpos == temp_value)
+                               goto out;
+
+                       cmd = SDVO_CMD_SET_POSITION_H;
+                       sdvo_priv->cur_hpos = temp_value;
+               } else if (sdvo_priv->vpos_property == property) {
+                       if (sdvo_priv->cur_vpos == temp_value)
+                               goto out;
+
+                       cmd = SDVO_CMD_SET_POSITION_V;
+                       sdvo_priv->cur_vpos = temp_value;
+               } else if (sdvo_priv->saturation_property == property) {
+                       if (sdvo_priv->cur_saturation == temp_value)
+                               goto out;
+
+                       cmd = SDVO_CMD_SET_SATURATION;
+                       sdvo_priv->cur_saturation = temp_value;
+               } else if (sdvo_priv->contrast_property == property) {
+                       if (sdvo_priv->cur_contrast == temp_value)
+                               goto out;
+
+                       cmd = SDVO_CMD_SET_CONTRAST;
+                       sdvo_priv->cur_contrast = temp_value;
+               } else if (sdvo_priv->hue_property == property) {
+                       if (sdvo_priv->cur_hue == temp_value)
+                               goto out;
+
+                       cmd = SDVO_CMD_SET_HUE;
+                       sdvo_priv->cur_hue = temp_value;
+               } else if (sdvo_priv->brightness_property == property) {
+                       if (sdvo_priv->cur_brightness == temp_value)
+                               goto out;
+
+                       cmd = SDVO_CMD_SET_BRIGHTNESS;
+                       sdvo_priv->cur_brightness = temp_value;
+               }
+               if (cmd) {
+                       intel_sdvo_write_cmd(intel_output, cmd, &temp_value, 2);
+                       status = intel_sdvo_read_response(intel_output,
+                                                               NULL, 0);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO command \n");
+                               return -EINVAL;
+                       }
+                       changed = true;
+               }
+       }
         if (changed && crtc)
                 drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x,
                                 crtc->y, crtc->fb);
@@ -2090,6 +2274,8 @@ intel_sdvo_output_setup(struct intel_output *intel_output, uint16_t flags)
                 sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1;
                 encoder->encoder_type = DRM_MODE_ENCODER_DAC;
                 connector->connector_type = DRM_MODE_CONNECTOR_VGA;
+               intel_output->clone_mask = (1 << INTEL_SDVO_NON_TV_CLONE_BIT) |
+                                       (1 << INTEL_ANALOG_CLONE_BIT);
         } else if (flags & SDVO_OUTPUT_LVDS0) {
  
                 sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0;
@@ -2176,6 +2362,310 @@ static void intel_sdvo_tv_create_property(struct drm_connector *connector)
  
  }
  
+static void intel_sdvo_create_enhance_property(struct drm_connector *connector)
+{
+       struct intel_output *intel_output = to_intel_output(connector);
+       struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+       struct intel_sdvo_enhancements_reply sdvo_data;
+       struct drm_device *dev = connector->dev;
+       uint8_t status;
+       uint16_t response, data_value[2];
+
+       intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS,
+                                               NULL, 0);
+       status = intel_sdvo_read_response(intel_output, &sdvo_data,
+                                       sizeof(sdvo_data));
+       if (status != SDVO_CMD_STATUS_SUCCESS) {
+               DRM_DEBUG_KMS(" incorrect response is returned\n");
+               return;
+       }
+       response = *((uint16_t *)&sdvo_data);
+       if (!response) {
+               DRM_DEBUG_KMS("No enhancement is supported\n");
+               return;
+       }
+       if (sdvo_priv->is_tv) {
+               /* when horizontal overscan is supported, Add the left/right
+                * property
+                */
+               if (sdvo_data.overscan_h) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_OVERSCAN_H, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO max "
+                                               "h_overscan\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_OVERSCAN_H, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO h_overscan\n");
+                               return;
+                       }
+                       sdvo_priv->max_hscan = data_value[0];
+                       sdvo_priv->left_margin = data_value[0] - response;
+                       sdvo_priv->right_margin = sdvo_priv->left_margin;
+                       sdvo_priv->left_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "left_margin", 2);
+                       sdvo_priv->left_property->values[0] = 0;
+                       sdvo_priv->left_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->left_property,
+                                               sdvo_priv->left_margin);
+                       sdvo_priv->right_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "right_margin", 2);
+                       sdvo_priv->right_property->values[0] = 0;
+                       sdvo_priv->right_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->right_property,
+                                               sdvo_priv->right_margin);
+                       DRM_DEBUG_KMS("h_overscan: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+               if (sdvo_data.overscan_v) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_OVERSCAN_V, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO max "
+                                               "v_overscan\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_OVERSCAN_V, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO v_overscan\n");
+                               return;
+                       }
+                       sdvo_priv->max_vscan = data_value[0];
+                       sdvo_priv->top_margin = data_value[0] - response;
+                       sdvo_priv->bottom_margin = sdvo_priv->top_margin;
+                       sdvo_priv->top_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "top_margin", 2);
+                       sdvo_priv->top_property->values[0] = 0;
+                       sdvo_priv->top_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->top_property,
+                                               sdvo_priv->top_margin);
+                       sdvo_priv->bottom_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "bottom_margin", 2);
+                       sdvo_priv->bottom_property->values[0] = 0;
+                       sdvo_priv->bottom_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->bottom_property,
+                                               sdvo_priv->bottom_margin);
+                       DRM_DEBUG_KMS("v_overscan: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+               if (sdvo_data.position_h) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_POSITION_H, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO Max h_pos\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_POSITION_H, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO get h_postion\n");
+                               return;
+                       }
+                       sdvo_priv->max_hpos = data_value[0];
+                       sdvo_priv->cur_hpos = response;
+                       sdvo_priv->hpos_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "hpos", 2);
+                       sdvo_priv->hpos_property->values[0] = 0;
+                       sdvo_priv->hpos_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->hpos_property,
+                                               sdvo_priv->cur_hpos);
+                       DRM_DEBUG_KMS("h_position: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+               if (sdvo_data.position_v) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_POSITION_V, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO Max v_pos\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_POSITION_V, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO get v_postion\n");
+                               return;
+                       }
+                       sdvo_priv->max_vpos = data_value[0];
+                       sdvo_priv->cur_vpos = response;
+                       sdvo_priv->vpos_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "vpos", 2);
+                       sdvo_priv->vpos_property->values[0] = 0;
+                       sdvo_priv->vpos_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->vpos_property,
+                                               sdvo_priv->cur_vpos);
+                       DRM_DEBUG_KMS("v_position: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+       }
+       if (sdvo_priv->is_tv) {
+               if (sdvo_data.saturation) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_SATURATION, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO Max sat\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_SATURATION, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO get sat\n");
+                               return;
+                       }
+                       sdvo_priv->max_saturation = data_value[0];
+                       sdvo_priv->cur_saturation = response;
+                       sdvo_priv->saturation_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "saturation", 2);
+                       sdvo_priv->saturation_property->values[0] = 0;
+                       sdvo_priv->saturation_property->values[1] =
+                                                       data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->saturation_property,
+                                               sdvo_priv->cur_saturation);
+                       DRM_DEBUG_KMS("saturation: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+               if (sdvo_data.contrast) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_CONTRAST, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO Max contrast\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_CONTRAST, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO get contrast\n");
+                               return;
+                       }
+                       sdvo_priv->max_contrast = data_value[0];
+                       sdvo_priv->cur_contrast = response;
+                       sdvo_priv->contrast_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "contrast", 2);
+                       sdvo_priv->contrast_property->values[0] = 0;
+                       sdvo_priv->contrast_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->contrast_property,
+                                               sdvo_priv->cur_contrast);
+                       DRM_DEBUG_KMS("contrast: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+               if (sdvo_data.hue) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_HUE, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO Max hue\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_HUE, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO get hue\n");
+                               return;
+                       }
+                       sdvo_priv->max_hue = data_value[0];
+                       sdvo_priv->cur_hue = response;
+                       sdvo_priv->hue_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "hue", 2);
+                       sdvo_priv->hue_property->values[0] = 0;
+                       sdvo_priv->hue_property->values[1] =
+                                                       data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->hue_property,
+                                               sdvo_priv->cur_hue);
+                       DRM_DEBUG_KMS("hue: max %d, default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+       }
+       if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
+               if (sdvo_data.brightness) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_BRIGHTNESS, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO Max bright\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_BRIGHTNESS, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO get brigh\n");
+                               return;
+                       }
+                       sdvo_priv->max_brightness = data_value[0];
+                       sdvo_priv->cur_brightness = response;
+                       sdvo_priv->brightness_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "brightness", 2);
+                       sdvo_priv->brightness_property->values[0] = 0;
+                       sdvo_priv->brightness_property->values[1] =
+                                                       data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->brightness_property,
+                                               sdvo_priv->cur_brightness);
+                       DRM_DEBUG_KMS("brightness: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+       }
+       return;
+}
+
  bool intel_sdvo_init(struct drm_device *dev, int output_device)
  {
         struct drm_connector *connector;
@@ -2264,6 +2754,10 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
         drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc);
         if (sdvo_priv->is_tv)
                 intel_sdvo_tv_create_property(connector);
+
+       if (sdvo_priv->is_tv || sdvo_priv->is_lvds)
+               intel_sdvo_create_enhance_property(connector);
+
         drm_sysfs_connector_add(connector);
  
         intel_sdvo_select_ddc_bus(sdvo_priv);
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig

index ed7711d11ae8e8e5d013da8b1527634cb2c6e79f..6857560144bd782a4c8a7d41af2041b5171b7a5a 100644 (file)
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -325,34 +325,6 @@ config SENSORS_F75375S
           This driver can also be built as a module.  If so, the module
           will be called f75375s.
  
-config SENSORS_FSCHER
-       tristate "FSC Hermes (DEPRECATED)"
-       depends on X86 && I2C
-       help
-         This driver is DEPRECATED please use the new merged fschmd
-         ("FSC Poseidon, Scylla, Hermes, Heimdall and Heracles") driver
-         instead.
-
-         If you say yes here you get support for Fujitsu Siemens
-         Computers Hermes sensor chips.
-
-         This driver can also be built as a module.  If so, the module
-         will be called fscher.
-
-config SENSORS_FSCPOS
-       tristate "FSC Poseidon (DEPRECATED)"
-       depends on X86 && I2C
-       help
-         This driver is DEPRECATED please use the new merged fschmd
-         ("FSC Poseidon, Scylla, Hermes, Heimdall and Heracles") driver
-         instead.
-
-         If you say yes here you get support for Fujitsu Siemens
-         Computers Poseidon sensor chips.
-
-         This driver can also be built as a module.  If so, the module
-         will be called fscpos.
-
  config SENSORS_FSCHMD
         tristate "Fujitsu Siemens Computers sensor chips"
         depends on X86 && I2C
@@ -401,12 +373,12 @@ config SENSORS_GL520SM
           will be called gl520sm.
  
  config SENSORS_CORETEMP
-       tristate "Intel Core (2) Duo/Solo temperature sensor"
+       tristate "Intel Core/Core2/Atom temperature sensor"
         depends on X86 && EXPERIMENTAL
         help
           If you say yes here you get support for the temperature
-         sensor inside your CPU. Supported all are all known variants
-         of Intel Core family.
+         sensor inside your CPU. Most of the family 6 CPUs
+         are supported. Check documentation/driver for details.
  
  config SENSORS_IBMAEM
         tristate "IBM Active Energy Manager temperature/power sensors and control"
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile

index bcf73a9bb61955c93348fabeb4d8cac352c82840..9f46cb019cc6d99e28dbec508c2d73a68a6eb384 100644 (file)
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -42,9 +42,7 @@ obj-$(CONFIG_SENSORS_DS1621)  += ds1621.o
  obj-$(CONFIG_SENSORS_F71805F)  += f71805f.o
  obj-$(CONFIG_SENSORS_F71882FG) += f71882fg.o
  obj-$(CONFIG_SENSORS_F75375S)  += f75375s.o
-obj-$(CONFIG_SENSORS_FSCHER)   += fscher.o
  obj-$(CONFIG_SENSORS_FSCHMD)   += fschmd.o
-obj-$(CONFIG_SENSORS_FSCPOS)   += fscpos.o
  obj-$(CONFIG_SENSORS_G760A)    += g760a.o
  obj-$(CONFIG_SENSORS_GL518SM)  += gl518sm.o
  obj-$(CONFIG_SENSORS_GL520SM)  += gl520sm.o
diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c

index 789441830cd8fb8a5bc3c19ffbcd639ef2935a47..56905955352cac894b0c381bad597050be236438 100644 (file)
--- a/drivers/hwmon/adm1031.c
+++ b/drivers/hwmon/adm1031.c
@@ -37,6 +37,7 @@
  #define ADM1031_REG_PWM                        (0x22)
  #define ADM1031_REG_FAN_MIN(nr)                (0x10 + (nr))
  
+#define ADM1031_REG_TEMP_OFFSET(nr)    (0x0d + (nr))
  #define ADM1031_REG_TEMP_MAX(nr)       (0x14 + 4 * (nr))
  #define ADM1031_REG_TEMP_MIN(nr)       (0x15 + 4 * (nr))
  #define ADM1031_REG_TEMP_CRIT(nr)      (0x16 + 4 * (nr))
@@ -93,6 +94,7 @@ struct adm1031_data {
         u8 auto_temp_min[3];
         u8 auto_temp_off[3];
         u8 auto_temp_max[3];
+       s8 temp_offset[3];
         s8 temp_min[3];
         s8 temp_max[3];
         s8 temp_crit[3];
@@ -145,6 +147,10 @@ adm1031_write_value(struct i2c_client *client, u8 reg, unsigned int value)
  
  #define TEMP_FROM_REG_EXT(val, ext)    (TEMP_FROM_REG(val) + (ext) * 125)
  
+#define TEMP_OFFSET_TO_REG(val)                (TEMP_TO_REG(val) & 0x8f)
+#define TEMP_OFFSET_FROM_REG(val)      TEMP_FROM_REG((val) < 0 ? \
+                                                     (val) | 0x70 : (val))
+
  #define FAN_FROM_REG(reg, div)         ((reg) ? (11250 * 60) / ((reg) * (div)) : 0)
  
  static int FAN_TO_REG(int reg, int div)
@@ -585,6 +591,14 @@ static ssize_t show_temp(struct device *dev,
             (((data->ext_temp[nr] >> ((nr - 1) * 3)) & 7));
         return sprintf(buf, "%d\n", TEMP_FROM_REG_EXT(data->temp[nr], ext));
  }
+static ssize_t show_temp_offset(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       int nr = to_sensor_dev_attr(attr)->index;
+       struct adm1031_data *data = adm1031_update_device(dev);
+       return sprintf(buf, "%d\n",
+                      TEMP_OFFSET_FROM_REG(data->temp_offset[nr]));
+}
  static ssize_t show_temp_min(struct device *dev,
                              struct device_attribute *attr, char *buf)
  {
@@ -606,6 +620,24 @@ static ssize_t show_temp_crit(struct device *dev,
         struct adm1031_data *data = adm1031_update_device(dev);
         return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_crit[nr]));
  }
+static ssize_t set_temp_offset(struct device *dev,
+                              struct device_attribute *attr, const char *buf,
+                              size_t count)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct adm1031_data *data = i2c_get_clientdata(client);
+       int nr = to_sensor_dev_attr(attr)->index;
+       int val;
+
+       val = simple_strtol(buf, NULL, 10);
+       val = SENSORS_LIMIT(val, -15000, 15000);
+       mutex_lock(&data->update_lock);
+       data->temp_offset[nr] = TEMP_OFFSET_TO_REG(val);
+       adm1031_write_value(client, ADM1031_REG_TEMP_OFFSET(nr),
+                           data->temp_offset[nr]);
+       mutex_unlock(&data->update_lock);
+       return count;
+}
  static ssize_t set_temp_min(struct device *dev, struct device_attribute *attr,
                             const char *buf, size_t count)
  {
@@ -661,6 +693,8 @@ static ssize_t set_temp_crit(struct device *dev, struct device_attribute *attr,
  #define temp_reg(offset)                                               \
  static SENSOR_DEVICE_ATTR(temp##offset##_input, S_IRUGO,               \
                 show_temp, NULL, offset - 1);                           \
+static SENSOR_DEVICE_ATTR(temp##offset##_offset, S_IRUGO | S_IWUSR,    \
+               show_temp_offset, set_temp_offset, offset - 1);         \
  static SENSOR_DEVICE_ATTR(temp##offset##_min, S_IRUGO | S_IWUSR,       \
                 show_temp_min, set_temp_min, offset - 1);               \
  static SENSOR_DEVICE_ATTR(temp##offset##_max, S_IRUGO | S_IWUSR,       \
@@ -714,6 +748,7 @@ static struct attribute *adm1031_attributes[] = {
         &sensor_dev_attr_pwm1.dev_attr.attr,
         &sensor_dev_attr_auto_fan1_channel.dev_attr.attr,
         &sensor_dev_attr_temp1_input.dev_attr.attr,
+       &sensor_dev_attr_temp1_offset.dev_attr.attr,
         &sensor_dev_attr_temp1_min.dev_attr.attr,
         &sensor_dev_attr_temp1_min_alarm.dev_attr.attr,
         &sensor_dev_attr_temp1_max.dev_attr.attr,
@@ -721,6 +756,7 @@ static struct attribute *adm1031_attributes[] = {
         &sensor_dev_attr_temp1_crit.dev_attr.attr,
         &sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
         &sensor_dev_attr_temp2_input.dev_attr.attr,
+       &sensor_dev_attr_temp2_offset.dev_attr.attr,
         &sensor_dev_attr_temp2_min.dev_attr.attr,
         &sensor_dev_attr_temp2_min_alarm.dev_attr.attr,
         &sensor_dev_attr_temp2_max.dev_attr.attr,
@@ -757,6 +793,7 @@ static struct attribute *adm1031_attributes_opt[] = {
         &sensor_dev_attr_pwm2.dev_attr.attr,
         &sensor_dev_attr_auto_fan2_channel.dev_attr.attr,
         &sensor_dev_attr_temp3_input.dev_attr.attr,
+       &sensor_dev_attr_temp3_offset.dev_attr.attr,
         &sensor_dev_attr_temp3_min.dev_attr.attr,
         &sensor_dev_attr_temp3_min_alarm.dev_attr.attr,
         &sensor_dev_attr_temp3_max.dev_attr.attr,
@@ -937,6 +974,9 @@ static struct adm1031_data *adm1031_update_device(struct device *dev)
                         }
                         data->temp[chan] = newh;
  
+                       data->temp_offset[chan] =
+                           adm1031_read_value(client,
+                                              ADM1031_REG_TEMP_OFFSET(chan));
                         data->temp_min[chan] =
                             adm1031_read_value(client,
                                                ADM1031_REG_TEMP_MIN(chan));
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c

index 972cf4ba963ce06d8ae2ea65c712ff61e05f74c9..caef39cda8c8e8d504eb1777d6aa409ff6bf7424 100644 (file)
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -157,17 +157,26 @@ static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *
         /* The 100C is default for both mobile and non mobile CPUs */
  
         int tjmax = 100000;
-       int ismobile = 1;
+       int tjmax_ee = 85000;
+       int usemsr_ee = 1;
         int err;
         u32 eax, edx;
  
         /* Early chips have no MSR for TjMax */
  
         if ((c->x86_model == 0xf) && (c->x86_mask < 4)) {
-               ismobile = 0;
+               usemsr_ee = 0;
         }
  
-       if ((c->x86_model > 0xe) && (ismobile)) {
+       /* Atoms seems to have TjMax at 90C */
+
+       if (c->x86_model == 0x1c) {
+               usemsr_ee = 0;
+               tjmax = 90000;
+       }
+
+       if ((c->x86_model > 0xe) && (usemsr_ee)) {
+               u8 platform_id;
  
                 /* Now we can detect the mobile CPU using Intel provided table
                    http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
@@ -179,13 +188,29 @@ static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *
                         dev_warn(dev,
                                  "Unable to access MSR 0x17, assuming desktop"
                                  " CPU\n");
-                       ismobile = 0;
-               } else if (!(eax & 0x10000000)) {
-                       ismobile = 0;
+                       usemsr_ee = 0;
+               } else if (c->x86_model < 0x17 && !(eax & 0x10000000)) {
+                       /* Trust bit 28 up to Penryn, I could not find any
+                          documentation on that; if you happen to know
+                          someone at Intel please ask */
+                       usemsr_ee = 0;
+               } else {
+                       /* Platform ID bits 52:50 (EDX starts at bit 32) */
+                       platform_id = (edx >> 18) & 0x7;
+
+                       /* Mobile Penryn CPU seems to be platform ID 7 or 5
+                         (guesswork) */
+                       if ((c->x86_model == 0x17) &&
+                           ((platform_id == 5) || (platform_id == 7))) {
+                               /* If MSR EE bit is set, set it to 90 degrees C,
+                                  otherwise 105 degrees C */
+                               tjmax_ee = 90000;
+                               tjmax = 105000;
+                       }
                 }
         }
  
-       if (ismobile || c->x86_model == 0x1c) {
+       if (usemsr_ee) {
  
                 err = rdmsr_safe_on_cpu(id, 0xee, &eax, &edx);
                 if (err) {
@@ -193,9 +218,11 @@ static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *
                                  "Unable to access MSR 0xEE, for Tjmax, left"
                                  " at default");
                 } else if (eax & 0x40000000) {
-                       tjmax = 85000;
+                       tjmax = tjmax_ee;
                 }
-       } else {
+       /* if we dont use msr EE it means we are desktop CPU (with exeception
+          of Atom) */
+       } else if (tjmax == 100000) {
                 dev_warn(dev, "Using relative temperature scale!\n");
         }
  
@@ -248,9 +275,9 @@ static int __devinit coretemp_probe(struct platform_device *pdev)
         platform_set_drvdata(pdev, data);
  
         /* read the still undocumented IA32_TEMPERATURE_TARGET it exists
-          on older CPUs but not in this register */
+          on older CPUs but not in this register, Atoms don't have it either */
  
-       if (c->x86_model > 0xe) {
+       if ((c->x86_model > 0xe) && (c->x86_model != 0x1c)) {
                 err = rdmsr_safe_on_cpu(data->id, 0x1a2, &eax, &edx);
                 if (err) {
                         dev_warn(&pdev->dev, "Unable to read"
@@ -413,11 +440,15 @@ static int __init coretemp_init(void)
         for_each_online_cpu(i) {
                 struct cpuinfo_x86 *c = &cpu_data(i);
  
-               /* check if family 6, models 0xe, 0xf, 0x16, 0x17, 0x1A */
+               /* check if family 6, models 0xe (Pentium M DC),
+                 0xf (Core 2 DC 65nm), 0x16 (Core 2 SC 65nm),
+                 0x17 (Penryn 45nm), 0x1a (Nehalem), 0x1c (Atom),
+                 0x1e (Lynnfield) */
                 if ((c->cpuid_level < 0) || (c->x86 != 0x6) ||
                     !((c->x86_model == 0xe) || (c->x86_model == 0xf) ||
                         (c->x86_model == 0x16) || (c->x86_model == 0x17) ||
-                       (c->x86_model == 0x1A) || (c->x86_model == 0x1c))) {
+                       (c->x86_model == 0x1a) || (c->x86_model == 0x1c) ||
+                       (c->x86_model == 0x1e))) {
  
                         /* supported CPU not found, but report the unknown
                            family 6 CPU */
diff --git a/drivers/hwmon/fscher.c b/drivers/hwmon/fscher.c

deleted file mode 100644 (file)

index 12c70e4..0000000
--- a/drivers/hwmon/fscher.c
+++ /dev/null
@@ -1,680 +0,0 @@
-/*
- * fscher.c - Part of lm_sensors, Linux kernel modules for hardware
- * monitoring
- * Copyright (C) 2003, 2004 Reinhard Nissl <rnissl@gmx.de>
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* 
- *  fujitsu siemens hermes chip, 
- *  module based on fscpos.c 
- *  Copyright (C) 2000 Hermann Jung <hej@odn.de>
- *  Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl>
- *  and Philip Edelbrock <phil@netroedge.com>
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/i2c.h>
-#include <linux/hwmon.h>
-#include <linux/err.h>
-#include <linux/mutex.h>
-#include <linux/sysfs.h>
-
-/*
- * Addresses to scan
- */
-
-static const unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END };
-
-/*
- * Insmod parameters
- */
-
-I2C_CLIENT_INSMOD_1(fscher);
-
-/*
- * The FSCHER registers
- */
-
-/* chip identification */
-#define FSCHER_REG_IDENT_0             0x00
-#define FSCHER_REG_IDENT_1             0x01
-#define FSCHER_REG_IDENT_2             0x02
-#define FSCHER_REG_REVISION            0x03
-
-/* global control and status */
-#define FSCHER_REG_EVENT_STATE         0x04
-#define FSCHER_REG_CONTROL             0x05
-
-/* watchdog */
-#define FSCHER_REG_WDOG_PRESET         0x28
-#define FSCHER_REG_WDOG_STATE          0x23
-#define FSCHER_REG_WDOG_CONTROL                0x21
-
-/* fan 0 */
-#define FSCHER_REG_FAN0_MIN            0x55
-#define FSCHER_REG_FAN0_ACT            0x0e
-#define FSCHER_REG_FAN0_STATE          0x0d
-#define FSCHER_REG_FAN0_RIPPLE         0x0f
-
-/* fan 1 */
-#define FSCHER_REG_FAN1_MIN            0x65
-#define FSCHER_REG_FAN1_ACT            0x6b
-#define FSCHER_REG_FAN1_STATE          0x62
-#define FSCHER_REG_FAN1_RIPPLE         0x6f
-
-/* fan 2 */
-#define FSCHER_REG_FAN2_MIN            0xb5
-#define FSCHER_REG_FAN2_ACT            0xbb
-#define FSCHER_REG_FAN2_STATE          0xb2
-#define FSCHER_REG_FAN2_RIPPLE         0xbf
-
-/* voltage supervision */
-#define FSCHER_REG_VOLT_12             0x45
-#define FSCHER_REG_VOLT_5              0x42
-#define FSCHER_REG_VOLT_BATT           0x48
-
-/* temperature 0 */
-#define FSCHER_REG_TEMP0_ACT           0x64
-#define FSCHER_REG_TEMP0_STATE         0x71
-
-/* temperature 1 */
-#define FSCHER_REG_TEMP1_ACT           0x32
-#define FSCHER_REG_TEMP1_STATE         0x81
-
-/* temperature 2 */
-#define FSCHER_REG_TEMP2_ACT           0x35
-#define FSCHER_REG_TEMP2_STATE         0x91
-
-/*
- * Functions declaration
- */
-
-static int fscher_probe(struct i2c_client *client,
-                       const struct i2c_device_id *id);
-static int fscher_detect(struct i2c_client *client, int kind,
-                        struct i2c_board_info *info);
-static int fscher_remove(struct i2c_client *client);
-static struct fscher_data *fscher_update_device(struct device *dev);
-static void fscher_init_client(struct i2c_client *client);
-
-static int fscher_read_value(struct i2c_client *client, u8 reg);
-static int fscher_write_value(struct i2c_client *client, u8 reg, u8 value);
-
-/*
- * Driver data (common to all clients)
- */
- 
-static const struct i2c_device_id fscher_id[] = {
-       { "fscher", fscher },
-       { }
-};
-
-static struct i2c_driver fscher_driver = {
-       .class          = I2C_CLASS_HWMON,
-       .driver = {
-               .name   = "fscher",
-       },
-       .probe          = fscher_probe,
-       .remove         = fscher_remove,
-       .id_table       = fscher_id,
-       .detect         = fscher_detect,
-       .address_data   = &addr_data,
-};
-
-/*
- * Client data (each client gets its own)
- */
-
-struct fscher_data {
-       struct device *hwmon_dev;
-       struct mutex update_lock;
-       char valid; /* zero until following fields are valid */
-       unsigned long last_updated; /* in jiffies */
-
-       /* register values */
-       u8 revision;            /* revision of chip */
-       u8 global_event;        /* global event status */
-       u8 global_control;      /* global control register */
-       u8 watchdog[3];         /* watchdog */
-       u8 volt[3];             /* 12, 5, battery voltage */ 
-       u8 temp_act[3];         /* temperature */
-       u8 temp_status[3];      /* status of sensor */
-       u8 fan_act[3];          /* fans revolutions per second */
-       u8 fan_status[3];       /* fan status */
-       u8 fan_min[3];          /* fan min value for rps */
-       u8 fan_ripple[3];       /* divider for rps */
-};
-
-/*
- * Sysfs stuff
- */
-
-#define sysfs_r(kind, sub, offset, reg) \
-static ssize_t show_##kind##sub (struct fscher_data *, char *, int); \
-static ssize_t show_##kind##offset##sub (struct device *, struct device_attribute *attr, char *); \
-static ssize_t show_##kind##offset##sub (struct device *dev, struct device_attribute *attr, char *buf) \
-{ \
-       struct fscher_data *data = fscher_update_device(dev); \
-       return show_##kind##sub(data, buf, (offset)); \
-}
-
-#define sysfs_w(kind, sub, offset, reg) \
-static ssize_t set_##kind##sub (struct i2c_client *, struct fscher_data *, const char *, size_t, int, int); \
-static ssize_t set_##kind##offset##sub (struct device *, struct device_attribute *attr, const char *, size_t); \
-static ssize_t set_##kind##offset##sub (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) \
-{ \
-       struct i2c_client *client = to_i2c_client(dev); \
-       struct fscher_data *data = i2c_get_clientdata(client); \
-       return set_##kind##sub(client, data, buf, count, (offset), reg); \
-}
-
-#define sysfs_rw_n(kind, sub, offset, reg) \
-sysfs_r(kind, sub, offset, reg) \
-sysfs_w(kind, sub, offset, reg) \
-static DEVICE_ATTR(kind##offset##sub, S_IRUGO | S_IWUSR, show_##kind##offset##sub, set_##kind##offset##sub);
-
-#define sysfs_rw(kind, sub, reg) \
-sysfs_r(kind, sub, 0, reg) \
-sysfs_w(kind, sub, 0, reg) \
-static DEVICE_ATTR(kind##sub, S_IRUGO | S_IWUSR, show_##kind##0##sub, set_##kind##0##sub);
-
-#define sysfs_ro_n(kind, sub, offset, reg) \
-sysfs_r(kind, sub, offset, reg) \
-static DEVICE_ATTR(kind##offset##sub, S_IRUGO, show_##kind##offset##sub, NULL);
-
-#define sysfs_ro(kind, sub, reg) \
-sysfs_r(kind, sub, 0, reg) \
-static DEVICE_ATTR(kind, S_IRUGO, show_##kind##0##sub, NULL);
-
-#define sysfs_fan(offset, reg_status, reg_min, reg_ripple, reg_act) \
-sysfs_rw_n(pwm,        , offset, reg_min) \
-sysfs_rw_n(fan, _status, offset, reg_status) \
-sysfs_rw_n(fan, _div   , offset, reg_ripple) \
-sysfs_ro_n(fan, _input , offset, reg_act)
-
-#define sysfs_temp(offset, reg_status, reg_act) \
-sysfs_rw_n(temp, _status, offset, reg_status) \
-sysfs_ro_n(temp, _input , offset, reg_act)
-    
-#define sysfs_in(offset, reg_act) \
-sysfs_ro_n(in, _input, offset, reg_act)
-
-#define sysfs_revision(reg_revision) \
-sysfs_ro(revision, , reg_revision)
-
-#define sysfs_alarms(reg_events) \
-sysfs_ro(alarms, , reg_events)
-
-#define sysfs_control(reg_control) \
-sysfs_rw(control, , reg_control)
-
-#define sysfs_watchdog(reg_control, reg_status, reg_preset) \
-sysfs_rw(watchdog, _control, reg_control) \
-sysfs_rw(watchdog, _status , reg_status) \
-sysfs_rw(watchdog, _preset , reg_preset)
-
-sysfs_fan(1, FSCHER_REG_FAN0_STATE, FSCHER_REG_FAN0_MIN,
-            FSCHER_REG_FAN0_RIPPLE, FSCHER_REG_FAN0_ACT)
-sysfs_fan(2, FSCHER_REG_FAN1_STATE, FSCHER_REG_FAN1_MIN,
-            FSCHER_REG_FAN1_RIPPLE, FSCHER_REG_FAN1_ACT)
-sysfs_fan(3, FSCHER_REG_FAN2_STATE, FSCHER_REG_FAN2_MIN,
-            FSCHER_REG_FAN2_RIPPLE, FSCHER_REG_FAN2_ACT)
-
-sysfs_temp(1, FSCHER_REG_TEMP0_STATE, FSCHER_REG_TEMP0_ACT)
-sysfs_temp(2, FSCHER_REG_TEMP1_STATE, FSCHER_REG_TEMP1_ACT)
-sysfs_temp(3, FSCHER_REG_TEMP2_STATE, FSCHER_REG_TEMP2_ACT)
-
-sysfs_in(0, FSCHER_REG_VOLT_12)
-sysfs_in(1, FSCHER_REG_VOLT_5)
-sysfs_in(2, FSCHER_REG_VOLT_BATT)
-
-sysfs_revision(FSCHER_REG_REVISION)
-sysfs_alarms(FSCHER_REG_EVENTS)
-sysfs_control(FSCHER_REG_CONTROL)
-sysfs_watchdog(FSCHER_REG_WDOG_CONTROL, FSCHER_REG_WDOG_STATE, FSCHER_REG_WDOG_PRESET)
-  
-static struct attribute *fscher_attributes[] = {
-       &dev_attr_revision.attr,
-       &dev_attr_alarms.attr,
-       &dev_attr_control.attr,
-
-       &dev_attr_watchdog_status.attr,
-       &dev_attr_watchdog_control.attr,
-       &dev_attr_watchdog_preset.attr,
-
-       &dev_attr_in0_input.attr,
-       &dev_attr_in1_input.attr,
-       &dev_attr_in2_input.attr,
-
-       &dev_attr_fan1_status.attr,
-       &dev_attr_fan1_div.attr,
-       &dev_attr_fan1_input.attr,
-       &dev_attr_pwm1.attr,
-       &dev_attr_fan2_status.attr,
-       &dev_attr_fan2_div.attr,
-       &dev_attr_fan2_input.attr,
-       &dev_attr_pwm2.attr,
-       &dev_attr_fan3_status.attr,
-       &dev_attr_fan3_div.attr,
-       &dev_attr_fan3_input.attr,
-       &dev_attr_pwm3.attr,
-
-       &dev_attr_temp1_status.attr,
-       &dev_attr_temp1_input.attr,
-       &dev_attr_temp2_status.attr,
-       &dev_attr_temp2_input.attr,
-       &dev_attr_temp3_status.attr,
-       &dev_attr_temp3_input.attr,
-       NULL
-};
-
-static const struct attribute_group fscher_group = {
-       .attrs = fscher_attributes,
-};
-
-/*
- * Real code
- */
-
-/* Return 0 if detection is successful, -ENODEV otherwise */
-static int fscher_detect(struct i2c_client *new_client, int kind,
-                        struct i2c_board_info *info)
-{
-       struct i2c_adapter *adapter = new_client->adapter;
-
-       if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
-               return -ENODEV;
-
-       /* Do the remaining detection unless force or force_fscher parameter */
-       if (kind < 0) {
-               if ((i2c_smbus_read_byte_data(new_client,
-                    FSCHER_REG_IDENT_0) != 0x48)       /* 'H' */
-                || (i2c_smbus_read_byte_data(new_client,
-                    FSCHER_REG_IDENT_1) != 0x45)       /* 'E' */
-                || (i2c_smbus_read_byte_data(new_client,
-                    FSCHER_REG_IDENT_2) != 0x52))      /* 'R' */
-                       return -ENODEV;
-       }
-
-       strlcpy(info->type, "fscher", I2C_NAME_SIZE);
-
-       return 0;
-}
-
-static int fscher_probe(struct i2c_client *new_client,
-                       const struct i2c_device_id *id)
-{
-       struct fscher_data *data;
-       int err;
-
-       data = kzalloc(sizeof(struct fscher_data), GFP_KERNEL);
-       if (!data) {
-               err = -ENOMEM;
-               goto exit;
-       }
-
-       i2c_set_clientdata(new_client, data);
-       data->valid = 0;
-       mutex_init(&data->update_lock);
-
-       fscher_init_client(new_client);
-
-       /* Register sysfs hooks */
-       if ((err = sysfs_create_group(&new_client->dev.kobj, &fscher_group)))
-               goto exit_free;
-
-       data->hwmon_dev = hwmon_device_register(&new_client->dev);
-       if (IS_ERR(data->hwmon_dev)) {
-               err = PTR_ERR(data->hwmon_dev);
-               goto exit_remove_files;
-       }
-
-       return 0;
-
-exit_remove_files:
-       sysfs_remove_group(&new_client->dev.kobj, &fscher_group);
-exit_free:
-       kfree(data);
-exit:
-       return err;
-}
-
-static int fscher_remove(struct i2c_client *client)
-{
-       struct fscher_data *data = i2c_get_clientdata(client);
-
-       hwmon_device_unregister(data->hwmon_dev);
-       sysfs_remove_group(&client->dev.kobj, &fscher_group);
-
-       kfree(data);
-       return 0;
-}
-
-static int fscher_read_value(struct i2c_client *client, u8 reg)
-{
-       dev_dbg(&client->dev, "read reg 0x%02x\n", reg);
-
-       return i2c_smbus_read_byte_data(client, reg);
-}
-
-static int fscher_write_value(struct i2c_client *client, u8 reg, u8 value)
-{
-       dev_dbg(&client->dev, "write reg 0x%02x, val 0x%02x\n",
-               reg, value);
-
-       return i2c_smbus_write_byte_data(client, reg, value);
-}
-
-/* Called when we have found a new FSC Hermes. */
-static void fscher_init_client(struct i2c_client *client)
-{
-       struct fscher_data *data = i2c_get_clientdata(client);
-
-       /* Read revision from chip */
-       data->revision =  fscher_read_value(client, FSCHER_REG_REVISION);
-}
-
-static struct fscher_data *fscher_update_device(struct device *dev)
-{
-       struct i2c_client *client = to_i2c_client(dev);
-       struct fscher_data *data = i2c_get_clientdata(client);
-
-       mutex_lock(&data->update_lock);
-
-       if (time_after(jiffies, data->last_updated + 2 * HZ) || !data->valid) {
-
-               dev_dbg(&client->dev, "Starting fscher update\n");
-
-               data->temp_act[0] = fscher_read_value(client, FSCHER_REG_TEMP0_ACT);
-               data->temp_act[1] = fscher_read_value(client, FSCHER_REG_TEMP1_ACT);
-               data->temp_act[2] = fscher_read_value(client, FSCHER_REG_TEMP2_ACT);
-               data->temp_status[0] = fscher_read_value(client, FSCHER_REG_TEMP0_STATE);
-               data->temp_status[1] = fscher_read_value(client, FSCHER_REG_TEMP1_STATE);
-               data->temp_status[2] = fscher_read_value(client, FSCHER_REG_TEMP2_STATE);
-
-               data->volt[0] = fscher_read_value(client, FSCHER_REG_VOLT_12);
-               data->volt[1] = fscher_read_value(client, FSCHER_REG_VOLT_5);
-               data->volt[2] = fscher_read_value(client, FSCHER_REG_VOLT_BATT);
-
-               data->fan_act[0] = fscher_read_value(client, FSCHER_REG_FAN0_ACT);
-               data->fan_act[1] = fscher_read_value(client, FSCHER_REG_FAN1_ACT);
-               data->fan_act[2] = fscher_read_value(client, FSCHER_REG_FAN2_ACT);
-               data->fan_status[0] = fscher_read_value(client, FSCHER_REG_FAN0_STATE);
-               data->fan_status[1] = fscher_read_value(client, FSCHER_REG_FAN1_STATE);
-               data->fan_status[2] = fscher_read_value(client, FSCHER_REG_FAN2_STATE);
-               data->fan_min[0] = fscher_read_value(client, FSCHER_REG_FAN0_MIN);
-               data->fan_min[1] = fscher_read_value(client, FSCHER_REG_FAN1_MIN);
-               data->fan_min[2] = fscher_read_value(client, FSCHER_REG_FAN2_MIN);
-               data->fan_ripple[0] = fscher_read_value(client, FSCHER_REG_FAN0_RIPPLE);
-               data->fan_ripple[1] = fscher_read_value(client, FSCHER_REG_FAN1_RIPPLE);
-               data->fan_ripple[2] = fscher_read_value(client, FSCHER_REG_FAN2_RIPPLE);
-
-               data->watchdog[0] = fscher_read_value(client, FSCHER_REG_WDOG_PRESET);
-               data->watchdog[1] = fscher_read_value(client, FSCHER_REG_WDOG_STATE);
-               data->watchdog[2] = fscher_read_value(client, FSCHER_REG_WDOG_CONTROL);
-
-               data->global_event = fscher_read_value(client, FSCHER_REG_EVENT_STATE);
-               data->global_control = fscher_read_value(client,
-                                                       FSCHER_REG_CONTROL);
-
-               data->last_updated = jiffies;
-               data->valid = 1;                 
-       }
-
-       mutex_unlock(&data->update_lock);
-
-       return data;
-}
-
-
-
-#define FAN_INDEX_FROM_NUM(nr) ((nr) - 1)
-
-static ssize_t set_fan_status(struct i2c_client *client, struct fscher_data *data,
-                             const char *buf, size_t count, int nr, int reg)
-{
-       /* bits 0..1, 3..7 reserved => mask with 0x04 */  
-       unsigned long v = simple_strtoul(buf, NULL, 10) & 0x04;
-       
-       mutex_lock(&data->update_lock);
-       data->fan_status[FAN_INDEX_FROM_NUM(nr)] &= ~v;
-       fscher_write_value(client, reg, v);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static ssize_t show_fan_status(struct fscher_data *data, char *buf, int nr)
-{
-       /* bits 0..1, 3..7 reserved => mask with 0x04 */  
-       return sprintf(buf, "%u\n", data->fan_status[FAN_INDEX_FROM_NUM(nr)] & 0x04);
-}
-
-static ssize_t set_pwm(struct i2c_client *client, struct fscher_data *data,
-                      const char *buf, size_t count, int nr, int reg)
-{
-       unsigned long v = simple_strtoul(buf, NULL, 10);
-
-       mutex_lock(&data->update_lock);
-       data->fan_min[FAN_INDEX_FROM_NUM(nr)] = v > 0xff ? 0xff : v;
-       fscher_write_value(client, reg, data->fan_min[FAN_INDEX_FROM_NUM(nr)]);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static ssize_t show_pwm(struct fscher_data *data, char *buf, int nr)
-{
-       return sprintf(buf, "%u\n", data->fan_min[FAN_INDEX_FROM_NUM(nr)]);
-}
-
-static ssize_t set_fan_div(struct i2c_client *client, struct fscher_data *data,
-                          const char *buf, size_t count, int nr, int reg)
-{
-       /* supported values: 2, 4, 8 */
-       unsigned long v = simple_strtoul(buf, NULL, 10);
-
-       switch (v) {
-       case 2: v = 1; break;
-       case 4: v = 2; break;
-       case 8: v = 3; break;
-       default:
-               dev_err(&client->dev, "fan_div value %ld not "
-                        "supported. Choose one of 2, 4 or 8!\n", v);
-               return -EINVAL;
-       }
-
-       mutex_lock(&data->update_lock);
-
-       /* bits 2..7 reserved => mask with 0x03 */
-       data->fan_ripple[FAN_INDEX_FROM_NUM(nr)] &= ~0x03;
-       data->fan_ripple[FAN_INDEX_FROM_NUM(nr)] |= v;
-
-       fscher_write_value(client, reg, data->fan_ripple[FAN_INDEX_FROM_NUM(nr)]);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static ssize_t show_fan_div(struct fscher_data *data, char *buf, int nr)
-{
-       /* bits 2..7 reserved => mask with 0x03 */  
-       return sprintf(buf, "%u\n", 1 << (data->fan_ripple[FAN_INDEX_FROM_NUM(nr)] & 0x03));
-}
-
-#define RPM_FROM_REG(val)      (val*60)
-
-static ssize_t show_fan_input (struct fscher_data *data, char *buf, int nr)
-{
-       return sprintf(buf, "%u\n", RPM_FROM_REG(data->fan_act[FAN_INDEX_FROM_NUM(nr)]));
-}
-
-
-
-#define TEMP_INDEX_FROM_NUM(nr)                ((nr) - 1)
-
-static ssize_t set_temp_status(struct i2c_client *client, struct fscher_data *data,
-                              const char *buf, size_t count, int nr, int reg)
-{
-       /* bits 2..7 reserved, 0 read only => mask with 0x02 */  
-       unsigned long v = simple_strtoul(buf, NULL, 10) & 0x02;
-
-       mutex_lock(&data->update_lock);
-       data->temp_status[TEMP_INDEX_FROM_NUM(nr)] &= ~v;
-       fscher_write_value(client, reg, v);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static ssize_t show_temp_status(struct fscher_data *data, char *buf, int nr)
-{
-       /* bits 2..7 reserved => mask with 0x03 */
-       return sprintf(buf, "%u\n", data->temp_status[TEMP_INDEX_FROM_NUM(nr)] & 0x03);
-}
-
-#define TEMP_FROM_REG(val)     (((val) - 128) * 1000)
-
-static ssize_t show_temp_input(struct fscher_data *data, char *buf, int nr)
-{
-       return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_act[TEMP_INDEX_FROM_NUM(nr)]));
-}
-
-/*
- * The final conversion is specified in sensors.conf, as it depends on
- * mainboard specific values. We export the registers contents as
- * pseudo-hundredths-of-Volts (range 0V - 2.55V). Not that it makes much
- * sense per se, but it minimizes the conversions count and keeps the
- * values within a usual range.
- */
-#define VOLT_FROM_REG(val)     ((val) * 10)
-
-static ssize_t show_in_input(struct fscher_data *data, char *buf, int nr)
-{
-       return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[nr]));
-}
-
-
-
-static ssize_t show_revision(struct fscher_data *data, char *buf, int nr)
-{
-       return sprintf(buf, "%u\n", data->revision);
-}
-
-
-
-static ssize_t show_alarms(struct fscher_data *data, char *buf, int nr)
-{
-       /* bits 2, 5..6 reserved => mask with 0x9b */
-       return sprintf(buf, "%u\n", data->global_event & 0x9b);
-}
-
-
-
-static ssize_t set_control(struct i2c_client *client, struct fscher_data *data,
-                          const char *buf, size_t count, int nr, int reg)
-{
-       /* bits 1..7 reserved => mask with 0x01 */  
-       unsigned long v = simple_strtoul(buf, NULL, 10) & 0x01;
-
-       mutex_lock(&data->update_lock);
-       data->global_control = v;
-       fscher_write_value(client, reg, v);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static ssize_t show_control(struct fscher_data *data, char *buf, int nr)
-{
-       /* bits 1..7 reserved => mask with 0x01 */
-       return sprintf(buf, "%u\n", data->global_control & 0x01);
-}
-
-
-
-static ssize_t set_watchdog_control(struct i2c_client *client, struct
-                                   fscher_data *data, const char *buf, size_t count,
-                                   int nr, int reg)
-{
-       /* bits 0..3 reserved => mask with 0xf0 */  
-       unsigned long v = simple_strtoul(buf, NULL, 10) & 0xf0;
-
-       mutex_lock(&data->update_lock);
-       data->watchdog[2] &= ~0xf0;
-       data->watchdog[2] |= v;
-       fscher_write_value(client, reg, data->watchdog[2]);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static ssize_t show_watchdog_control(struct fscher_data *data, char *buf, int nr)
-{
-       /* bits 0..3 reserved, bit 5 write only => mask with 0xd0 */
-       return sprintf(buf, "%u\n", data->watchdog[2] & 0xd0);
-}
-
-static ssize_t set_watchdog_status(struct i2c_client *client, struct fscher_data *data,
-                                  const char *buf, size_t count, int nr, int reg)
-{
-       /* bits 0, 2..7 reserved => mask with 0x02 */  
-       unsigned long v = simple_strtoul(buf, NULL, 10) & 0x02;
-
-       mutex_lock(&data->update_lock);
-       data->watchdog[1] &= ~v;
-       fscher_write_value(client, reg, v);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static ssize_t show_watchdog_status(struct fscher_data *data, char *buf, int nr)
-{
-       /* bits 0, 2..7 reserved => mask with 0x02 */
-       return sprintf(buf, "%u\n", data->watchdog[1] & 0x02);
-}
-
-static ssize_t set_watchdog_preset(struct i2c_client *client, struct fscher_data *data,
-                                  const char *buf, size_t count, int nr, int reg)
-{
-       unsigned long v = simple_strtoul(buf, NULL, 10) & 0xff;
-       
-       mutex_lock(&data->update_lock);
-       data->watchdog[0] = v;
-       fscher_write_value(client, reg, data->watchdog[0]);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static ssize_t show_watchdog_preset(struct fscher_data *data, char *buf, int nr)
-{
-       return sprintf(buf, "%u\n", data->watchdog[0]);
-}
-
-static int __init sensors_fscher_init(void)
-{
-       return i2c_add_driver(&fscher_driver);
-}
-
-static void __exit sensors_fscher_exit(void)
-{
-       i2c_del_driver(&fscher_driver);
-}
-
-MODULE_AUTHOR("Reinhard Nissl <rnissl@gmx.de>");
-MODULE_DESCRIPTION("FSC Hermes driver");
-MODULE_LICENSE("GPL");
-
-module_init(sensors_fscher_init);
-module_exit(sensors_fscher_exit);
diff --git a/drivers/hwmon/fscpos.c b/drivers/hwmon/fscpos.c

deleted file mode 100644 (file)

index 8a7bcf5..0000000
--- a/drivers/hwmon/fscpos.c
+++ /dev/null
@@ -1,654 +0,0 @@
-/*
-       fscpos.c - Kernel module for hardware monitoring with FSC Poseidon chips
-       Copyright (C) 2004, 2005 Stefan Ott <stefan@desire.ch>
-
-       This program is free software; you can redistribute it and/or modify
-       it under the terms of the GNU General Public License as published by
-       the Free Software Foundation; either version 2 of the License, or
-       (at your option) any later version.
-
-       This program is distributed in the hope that it will be useful,
-       but WITHOUT ANY WARRANTY; without even the implied warranty of
-       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-       GNU General Public License for more details.
-
-       You should have received a copy of the GNU General Public License
-       along with this program; if not, write to the Free Software
-       Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-/*
-       fujitsu siemens poseidon chip,
-       module based on the old fscpos module by Hermann Jung <hej@odn.de> and
-       the fscher module by Reinhard Nissl <rnissl@gmx.de>
-
-       original module based on lm80.c
-       Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl>
-       and Philip Edelbrock <phil@netroedge.com>
-
-       Thanks to Jean Delvare for reviewing my code and suggesting a lot of
-       improvements.
-*/
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/i2c.h>
-#include <linux/init.h>
-#include <linux/hwmon.h>
-#include <linux/err.h>
-#include <linux/mutex.h>
-#include <linux/sysfs.h>
-
-/*
- * Addresses to scan
- */
-static const unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END };
-
-/*
- * Insmod parameters
- */
-I2C_CLIENT_INSMOD_1(fscpos);
-
-/*
- * The FSCPOS registers
- */
-
-/* chip identification */
-#define FSCPOS_REG_IDENT_0             0x00
-#define FSCPOS_REG_IDENT_1             0x01
-#define FSCPOS_REG_IDENT_2             0x02
-#define FSCPOS_REG_REVISION            0x03
-
-/* global control and status */
-#define FSCPOS_REG_EVENT_STATE         0x04
-#define FSCPOS_REG_CONTROL             0x05
-
-/* watchdog */
-#define FSCPOS_REG_WDOG_PRESET         0x28
-#define FSCPOS_REG_WDOG_STATE          0x23
-#define FSCPOS_REG_WDOG_CONTROL                0x21
-
-/* voltages */
-#define FSCPOS_REG_VOLT_12             0x45
-#define FSCPOS_REG_VOLT_5              0x42
-#define FSCPOS_REG_VOLT_BATT           0x48
-
-/* fans - the chip does not support minimum speed for fan2 */
-static u8 FSCPOS_REG_PWM[] = { 0x55, 0x65 };
-static u8 FSCPOS_REG_FAN_ACT[] = { 0x0e, 0x6b, 0xab };
-static u8 FSCPOS_REG_FAN_STATE[] = { 0x0d, 0x62, 0xa2 };
-static u8 FSCPOS_REG_FAN_RIPPLE[] = { 0x0f, 0x6f, 0xaf };
-
-/* temperatures */
-static u8 FSCPOS_REG_TEMP_ACT[] = { 0x64, 0x32, 0x35 };
-static u8 FSCPOS_REG_TEMP_STATE[] = { 0x71, 0x81, 0x91 };
-
-/*
- * Functions declaration
- */
-static int fscpos_probe(struct i2c_client *client,
-                       const struct i2c_device_id *id);
-static int fscpos_detect(struct i2c_client *client, int kind,
-                        struct i2c_board_info *info);
-static int fscpos_remove(struct i2c_client *client);
-
-static int fscpos_read_value(struct i2c_client *client, u8 reg);
-static int fscpos_write_value(struct i2c_client *client, u8 reg, u8 value);
-static struct fscpos_data *fscpos_update_device(struct device *dev);
-static void fscpos_init_client(struct i2c_client *client);
-
-static void reset_fan_alarm(struct i2c_client *client, int nr);
-
-/*
- * Driver data (common to all clients)
- */
-static const struct i2c_device_id fscpos_id[] = {
-       { "fscpos", fscpos },
-       { }
-};
-
-static struct i2c_driver fscpos_driver = {
-       .class          = I2C_CLASS_HWMON,
-       .driver = {
-               .name   = "fscpos",
-       },
-       .probe          = fscpos_probe,
-       .remove         = fscpos_remove,
-       .id_table       = fscpos_id,
-       .detect         = fscpos_detect,
-       .address_data   = &addr_data,
-};
-
-/*
- * Client data (each client gets its own)
- */
-struct fscpos_data {
-       struct device *hwmon_dev;
-       struct mutex update_lock;
-       char valid;             /* 0 until following fields are valid */
-       unsigned long last_updated;     /* In jiffies */
-
-       /* register values */
-       u8 revision;            /* revision of chip */
-       u8 global_event;        /* global event status */
-       u8 global_control;      /* global control register */
-       u8 wdog_control;        /* watchdog control */
-       u8 wdog_state;          /* watchdog status */
-       u8 wdog_preset;         /* watchdog preset */
-       u8 volt[3];             /* 12, 5, battery current */
-       u8 temp_act[3];         /* temperature */
-       u8 temp_status[3];      /* status of sensor */
-       u8 fan_act[3];          /* fans revolutions per second */
-       u8 fan_status[3];       /* fan status */
-       u8 pwm[2];              /* fan min value for rps */
-       u8 fan_ripple[3];       /* divider for rps */
-};
-
-/* Temperature */
-#define TEMP_FROM_REG(val)     (((val) - 128) * 1000)
-
-static ssize_t show_temp_input(struct fscpos_data *data, char *buf, int nr)
-{
-       return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_act[nr - 1]));
-}
-
-static ssize_t show_temp_status(struct fscpos_data *data, char *buf, int nr)
-{
-       /* bits 2..7 reserved => mask with 0x03 */
-       return sprintf(buf, "%u\n", data->temp_status[nr - 1] & 0x03);
-}
-
-static ssize_t show_temp_reset(struct fscpos_data *data, char *buf, int nr)
-{
-       return sprintf(buf, "1\n");
-}
-
-static ssize_t set_temp_reset(struct i2c_client *client, struct fscpos_data
-                       *data, const char *buf, size_t count, int nr, int reg)
-{
-       unsigned long v = simple_strtoul(buf, NULL, 10);
-       if (v != 1) {
-               dev_err(&client->dev, "temp_reset value %ld not supported. "
-                                       "Use 1 to reset the alarm!\n", v);
-               return -EINVAL;
-       }
-
-       dev_info(&client->dev, "You used the temp_reset feature which has not "
-                               "been proplerly tested. Please report your "
-                               "experience to the module author.\n");
-
-       /* Supported value: 2 (clears the status) */
-       fscpos_write_value(client, FSCPOS_REG_TEMP_STATE[nr - 1], 2);
-       return count;
-}
-
-/* Fans */
-#define RPM_FROM_REG(val)      ((val) * 60)
-
-static ssize_t show_fan_status(struct fscpos_data *data, char *buf, int nr)
-{
-       /* bits 0..1, 3..7 reserved => mask with 0x04 */
-       return sprintf(buf, "%u\n", data->fan_status[nr - 1] & 0x04);
-}
-
-static ssize_t show_fan_input(struct fscpos_data *data, char *buf, int nr)
-{
-       return sprintf(buf, "%u\n", RPM_FROM_REG(data->fan_act[nr - 1]));
-}
-
-static ssize_t show_fan_ripple(struct fscpos_data *data, char *buf, int nr)
-{
-       /* bits 2..7 reserved => mask with 0x03 */
-       return sprintf(buf, "%u\n", data->fan_ripple[nr - 1] & 0x03);
-}
-
-static ssize_t set_fan_ripple(struct i2c_client *client, struct fscpos_data
-                       *data, const char *buf, size_t count, int nr, int reg)
-{
-       /* supported values: 2, 4, 8 */
-       unsigned long v = simple_strtoul(buf, NULL, 10);
-
-       switch (v) {
-               case 2: v = 1; break;
-               case 4: v = 2; break;
-               case 8: v = 3; break;
-       default:
-               dev_err(&client->dev, "fan_ripple value %ld not supported. "
-                                       "Must be one of 2, 4 or 8!\n", v);
-               return -EINVAL;
-       }
-       
-       mutex_lock(&data->update_lock);
-       /* bits 2..7 reserved => mask with 0x03 */
-       data->fan_ripple[nr - 1] &= ~0x03;
-       data->fan_ripple[nr - 1] |= v;
-       
-       fscpos_write_value(client, reg, data->fan_ripple[nr - 1]);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static ssize_t show_pwm(struct fscpos_data *data, char *buf, int nr)
-{
-       return sprintf(buf, "%u\n", data->pwm[nr - 1]);
-}
-
-static ssize_t set_pwm(struct i2c_client *client, struct fscpos_data *data,
-                               const char *buf, size_t count, int nr, int reg)
-{
-       unsigned long v = simple_strtoul(buf, NULL, 10);
-
-       /* Range: 0..255 */
-       if (v < 0) v = 0;
-       if (v > 255) v = 255;
-
-       mutex_lock(&data->update_lock);
-       data->pwm[nr - 1] = v;
-       fscpos_write_value(client, reg, data->pwm[nr - 1]);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static void reset_fan_alarm(struct i2c_client *client, int nr)
-{
-       fscpos_write_value(client, FSCPOS_REG_FAN_STATE[nr], 4);
-}
-
-/* Volts */
-#define VOLT_FROM_REG(val, mult)       ((val) * (mult) / 255)
-
-static ssize_t show_volt_12(struct device *dev, struct device_attribute *attr, char *buf)
-{
-       struct fscpos_data *data = fscpos_update_device(dev);
-       return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[0], 14200));
-}
-
-static ssize_t show_volt_5(struct device *dev, struct device_attribute *attr, char *buf)
-{
-       struct fscpos_data *data = fscpos_update_device(dev);
-       return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[1], 6600));
-}
-
-static ssize_t show_volt_batt(struct device *dev, struct device_attribute *attr, char *buf)
-{
-       struct fscpos_data *data = fscpos_update_device(dev);
-       return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[2], 3300));
-}
-
-/* Watchdog */
-static ssize_t show_wdog_control(struct fscpos_data *data, char *buf)
-{
-       /* bits 0..3 reserved, bit 6 write only => mask with 0xb0 */
-       return sprintf(buf, "%u\n", data->wdog_control & 0xb0);
-}
-
-static ssize_t set_wdog_control(struct i2c_client *client, struct fscpos_data
-                               *data, const char *buf, size_t count, int reg)
-{
-       /* bits 0..3 reserved => mask with 0xf0 */
-       unsigned long v = simple_strtoul(buf, NULL, 10) & 0xf0;
-
-       mutex_lock(&data->update_lock);
-       data->wdog_control &= ~0xf0;
-       data->wdog_control |= v;
-       fscpos_write_value(client, reg, data->wdog_control);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static ssize_t show_wdog_state(struct fscpos_data *data, char *buf)
-{
-       /* bits 0, 2..7 reserved => mask with 0x02 */
-       return sprintf(buf, "%u\n", data->wdog_state & 0x02);
-}
-
-static ssize_t set_wdog_state(struct i2c_client *client, struct fscpos_data
-                               *data, const char *buf, size_t count, int reg)
-{
-       unsigned long v = simple_strtoul(buf, NULL, 10) & 0x02;
-
-       /* Valid values: 2 (clear) */
-       if (v != 2) {
-               dev_err(&client->dev, "wdog_state value %ld not supported. "
-                                       "Must be 2 to clear the state!\n", v);
-               return -EINVAL;
-       }
-
-       mutex_lock(&data->update_lock);
-       data->wdog_state &= ~v;
-       fscpos_write_value(client, reg, v);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static ssize_t show_wdog_preset(struct fscpos_data *data, char *buf)
-{
-       return sprintf(buf, "%u\n", data->wdog_preset);
-}
-
-static ssize_t set_wdog_preset(struct i2c_client *client, struct fscpos_data
-                               *data, const char *buf, size_t count, int reg)
-{
-       unsigned long v = simple_strtoul(buf, NULL, 10) & 0xff;
-
-       mutex_lock(&data->update_lock);
-       data->wdog_preset = v;
-       fscpos_write_value(client, reg, data->wdog_preset);
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-/* Event */
-static ssize_t show_event(struct device *dev, struct device_attribute *attr, char *buf)
-{
-       /* bits 5..7 reserved => mask with 0x1f */
-       struct fscpos_data *data = fscpos_update_device(dev);
-       return sprintf(buf, "%u\n", data->global_event & 0x9b);
-}
-
-/*
- * Sysfs stuff
- */
-#define create_getter(kind, sub) \
-       static ssize_t sysfs_show_##kind##sub(struct device *dev, struct device_attribute *attr, char *buf) \
-       { \
-               struct fscpos_data *data = fscpos_update_device(dev); \
-               return show_##kind##sub(data, buf); \
-       }
-
-#define create_getter_n(kind, offset, sub) \
-       static ssize_t sysfs_show_##kind##offset##sub(struct device *dev, struct device_attribute *attr, char\
-                                                                       *buf) \
-       { \
-               struct fscpos_data *data = fscpos_update_device(dev); \
-               return show_##kind##sub(data, buf, offset); \
-       }
-
-#define create_setter(kind, sub, reg) \
-       static ssize_t sysfs_set_##kind##sub (struct device *dev, struct device_attribute *attr, const char \
-                                                       *buf, size_t count) \
-       { \
-               struct i2c_client *client = to_i2c_client(dev); \
-               struct fscpos_data *data = i2c_get_clientdata(client); \
-               return set_##kind##sub(client, data, buf, count, reg); \
-       }
-
-#define create_setter_n(kind, offset, sub, reg) \
-       static ssize_t sysfs_set_##kind##offset##sub (struct device *dev, struct device_attribute *attr, \
-                                       const char *buf, size_t count) \
-       { \
-               struct i2c_client *client = to_i2c_client(dev); \
-               struct fscpos_data *data = i2c_get_clientdata(client); \
-               return set_##kind##sub(client, data, buf, count, offset, reg);\
-       }
-
-#define create_sysfs_device_ro(kind, sub, offset) \
-       static DEVICE_ATTR(kind##offset##sub, S_IRUGO, \
-                                       sysfs_show_##kind##offset##sub, NULL);
-
-#define create_sysfs_device_rw(kind, sub, offset) \
-       static DEVICE_ATTR(kind##offset##sub, S_IRUGO | S_IWUSR, \
-               sysfs_show_##kind##offset##sub, sysfs_set_##kind##offset##sub);
-
-#define sysfs_ro_n(kind, sub, offset) \
-       create_getter_n(kind, offset, sub); \
-       create_sysfs_device_ro(kind, sub, offset);
-
-#define sysfs_rw_n(kind, sub, offset, reg) \
-       create_getter_n(kind, offset, sub); \
-       create_setter_n(kind, offset, sub, reg); \
-       create_sysfs_device_rw(kind, sub, offset);
-
-#define sysfs_rw(kind, sub, reg) \
-       create_getter(kind, sub); \
-       create_setter(kind, sub, reg); \
-       create_sysfs_device_rw(kind, sub,);
-
-#define sysfs_fan_with_min(offset, reg_status, reg_ripple, reg_min) \
-       sysfs_fan(offset, reg_status, reg_ripple); \
-       sysfs_rw_n(pwm,, offset, reg_min);
-
-#define sysfs_fan(offset, reg_status, reg_ripple) \
-       sysfs_ro_n(fan, _input, offset); \
-       sysfs_ro_n(fan, _status, offset); \
-       sysfs_rw_n(fan, _ripple, offset, reg_ripple);
-
-#define sysfs_temp(offset, reg_status) \
-       sysfs_ro_n(temp, _input, offset); \
-       sysfs_ro_n(temp, _status, offset); \
-       sysfs_rw_n(temp, _reset, offset, reg_status);
-
-#define sysfs_watchdog(reg_wdog_preset, reg_wdog_state, reg_wdog_control) \
-       sysfs_rw(wdog, _control, reg_wdog_control); \
-       sysfs_rw(wdog, _preset, reg_wdog_preset); \
-       sysfs_rw(wdog, _state, reg_wdog_state);
-
-sysfs_fan_with_min(1, FSCPOS_REG_FAN_STATE[0], FSCPOS_REG_FAN_RIPPLE[0],
-                                                       FSCPOS_REG_PWM[0]);
-sysfs_fan_with_min(2, FSCPOS_REG_FAN_STATE[1], FSCPOS_REG_FAN_RIPPLE[1],
-                                                       FSCPOS_REG_PWM[1]);
-sysfs_fan(3, FSCPOS_REG_FAN_STATE[2], FSCPOS_REG_FAN_RIPPLE[2]);
-
-sysfs_temp(1, FSCPOS_REG_TEMP_STATE[0]);
-sysfs_temp(2, FSCPOS_REG_TEMP_STATE[1]);
-sysfs_temp(3, FSCPOS_REG_TEMP_STATE[2]);
-
-sysfs_watchdog(FSCPOS_REG_WDOG_PRESET, FSCPOS_REG_WDOG_STATE,
-                                               FSCPOS_REG_WDOG_CONTROL);
-
-static DEVICE_ATTR(event, S_IRUGO, show_event, NULL);
-static DEVICE_ATTR(in0_input, S_IRUGO, show_volt_12, NULL);
-static DEVICE_ATTR(in1_input, S_IRUGO, show_volt_5, NULL);
-static DEVICE_ATTR(in2_input, S_IRUGO, show_volt_batt, NULL);
-
-static struct attribute *fscpos_attributes[] = {
-       &dev_attr_event.attr,
-       &dev_attr_in0_input.attr,
-       &dev_attr_in1_input.attr,
-       &dev_attr_in2_input.attr,
-
-       &dev_attr_wdog_control.attr,
-       &dev_attr_wdog_preset.attr,
-       &dev_attr_wdog_state.attr,
-
-       &dev_attr_temp1_input.attr,
-       &dev_attr_temp1_status.attr,
-       &dev_attr_temp1_reset.attr,
-       &dev_attr_temp2_input.attr,
-       &dev_attr_temp2_status.attr,
-       &dev_attr_temp2_reset.attr,
-       &dev_attr_temp3_input.attr,
-       &dev_attr_temp3_status.attr,
-       &dev_attr_temp3_reset.attr,
-
-       &dev_attr_fan1_input.attr,
-       &dev_attr_fan1_status.attr,
-       &dev_attr_fan1_ripple.attr,
-       &dev_attr_pwm1.attr,
-       &dev_attr_fan2_input.attr,
-       &dev_attr_fan2_status.attr,
-       &dev_attr_fan2_ripple.attr,
-       &dev_attr_pwm2.attr,
-       &dev_attr_fan3_input.attr,
-       &dev_attr_fan3_status.attr,
-       &dev_attr_fan3_ripple.attr,
-       NULL
-};
-
-static const struct attribute_group fscpos_group = {
-       .attrs = fscpos_attributes,
-};
-
-/* Return 0 if detection is successful, -ENODEV otherwise */
-static int fscpos_detect(struct i2c_client *new_client, int kind,
-                        struct i2c_board_info *info)
-{
-       struct i2c_adapter *adapter = new_client->adapter;
-
-       if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
-               return -ENODEV;
-
-       /* Do the remaining detection unless force or force_fscpos parameter */
-       if (kind < 0) {
-               if ((fscpos_read_value(new_client, FSCPOS_REG_IDENT_0)
-                       != 0x50) /* 'P' */
-               || (fscpos_read_value(new_client, FSCPOS_REG_IDENT_1)
-                       != 0x45) /* 'E' */
-               || (fscpos_read_value(new_client, FSCPOS_REG_IDENT_2)
-                       != 0x47))/* 'G' */
-                       return -ENODEV;
-       }
-
-       strlcpy(info->type, "fscpos", I2C_NAME_SIZE);
-
-       return 0;
-}
-
-static int fscpos_probe(struct i2c_client *new_client,
-                       const struct i2c_device_id *id)
-{
-       struct fscpos_data *data;
-       int err;
-
-       data = kzalloc(sizeof(struct fscpos_data), GFP_KERNEL);
-       if (!data) {
-               err = -ENOMEM;
-               goto exit;
-       }
-
-       i2c_set_clientdata(new_client, data);
-       data->valid = 0;
-       mutex_init(&data->update_lock);
-
-       /* Inizialize the fscpos chip */
-       fscpos_init_client(new_client);
-
-       /* Announce that the chip was found */
-       dev_info(&new_client->dev, "Found fscpos chip, rev %u\n", data->revision);
-
-       /* Register sysfs hooks */
-       if ((err = sysfs_create_group(&new_client->dev.kobj, &fscpos_group)))
-               goto exit_free;
-
-       data->hwmon_dev = hwmon_device_register(&new_client->dev);
-       if (IS_ERR(data->hwmon_dev)) {
-               err = PTR_ERR(data->hwmon_dev);
-               goto exit_remove_files;
-       }
-
-       return 0;
-
-exit_remove_files:
-       sysfs_remove_group(&new_client->dev.kobj, &fscpos_group);
-exit_free:
-       kfree(data);
-exit:
-       return err;
-}
-
-static int fscpos_remove(struct i2c_client *client)
-{
-       struct fscpos_data *data = i2c_get_clientdata(client);
-
-       hwmon_device_unregister(data->hwmon_dev);
-       sysfs_remove_group(&client->dev.kobj, &fscpos_group);
-
-       kfree(data);
-       return 0;
-}
-
-static int fscpos_read_value(struct i2c_client *client, u8 reg)
-{
-       dev_dbg(&client->dev, "Read reg 0x%02x\n", reg);
-       return i2c_smbus_read_byte_data(client, reg);
-}
-
-static int fscpos_write_value(struct i2c_client *client, u8 reg, u8 value)
-{
-       dev_dbg(&client->dev, "Write reg 0x%02x, val 0x%02x\n", reg, value);
-       return i2c_smbus_write_byte_data(client, reg, value);
-}
-
-/* Called when we have found a new FSCPOS chip */
-static void fscpos_init_client(struct i2c_client *client)
-{
-       struct fscpos_data *data = i2c_get_clientdata(client);
-
-       /* read revision from chip */
-       data->revision = fscpos_read_value(client, FSCPOS_REG_REVISION);
-}
-
-static struct fscpos_data *fscpos_update_device(struct device *dev)
-{
-       struct i2c_client *client = to_i2c_client(dev);
-       struct fscpos_data *data = i2c_get_clientdata(client);
-
-       mutex_lock(&data->update_lock);
-
-       if (time_after(jiffies, data->last_updated + 2 * HZ) || !data->valid) {
-               int i;
-
-               dev_dbg(&client->dev, "Starting fscpos update\n");
-
-               for (i = 0; i < 3; i++) {
-                       data->temp_act[i] = fscpos_read_value(client,
-                                               FSCPOS_REG_TEMP_ACT[i]);
-                       data->temp_status[i] = fscpos_read_value(client,
-                                               FSCPOS_REG_TEMP_STATE[i]);
-                       data->fan_act[i] = fscpos_read_value(client,
-                                               FSCPOS_REG_FAN_ACT[i]);
-                       data->fan_status[i] = fscpos_read_value(client,
-                                               FSCPOS_REG_FAN_STATE[i]);
-                       data->fan_ripple[i] = fscpos_read_value(client,
-                                               FSCPOS_REG_FAN_RIPPLE[i]);
-                       if (i < 2) {
-                               /* fan2_min is not supported by the chip */
-                               data->pwm[i] = fscpos_read_value(client,
-                                                       FSCPOS_REG_PWM[i]);
-                       }
-                       /* reset fan status if speed is back to > 0 */
-                       if (data->fan_status[i] != 0 && data->fan_act[i] > 0) {
-                               reset_fan_alarm(client, i);
-                       }
-               }
-
-               data->volt[0] = fscpos_read_value(client, FSCPOS_REG_VOLT_12);
-               data->volt[1] = fscpos_read_value(client, FSCPOS_REG_VOLT_5);
-               data->volt[2] = fscpos_read_value(client, FSCPOS_REG_VOLT_BATT);
-
-               data->wdog_preset = fscpos_read_value(client,
-                                                       FSCPOS_REG_WDOG_PRESET);
-               data->wdog_state = fscpos_read_value(client,
-                                                       FSCPOS_REG_WDOG_STATE);
-               data->wdog_control = fscpos_read_value(client,
-                                               FSCPOS_REG_WDOG_CONTROL);
-
-               data->global_event = fscpos_read_value(client,
-                                               FSCPOS_REG_EVENT_STATE);
-
-               data->last_updated = jiffies;
-               data->valid = 1;
-       }
-       mutex_unlock(&data->update_lock);
-       return data;
-}
-
-static int __init sm_fscpos_init(void)
-{
-       return i2c_add_driver(&fscpos_driver);
-}
-
-static void __exit sm_fscpos_exit(void)
-{
-       i2c_del_driver(&fscpos_driver);
-}
-
-MODULE_AUTHOR("Stefan Ott <stefan@desire.ch> based on work from Hermann Jung "
-                               "<hej@odn.de>, Frodo Looijaard <frodol@dds.nl>"
-                               " and Philip Edelbrock <phil@netroedge.com>");
-MODULE_DESCRIPTION("fujitsu siemens poseidon chip driver");
-MODULE_LICENSE("GPL");
-
-module_init(sm_fscpos_init);
-module_exit(sm_fscpos_exit);
diff --git a/drivers/hwmon/ltc4215.c b/drivers/hwmon/ltc4215.c

index 9386e2a39211f75e9a610e899fc43ff755c9dc25..6c9a04136e0aefb9a1b4430146b1453ed9acbe3f 100644 (file)
--- a/drivers/hwmon/ltc4215.c
+++ b/drivers/hwmon/ltc4215.c
@@ -259,7 +259,7 @@ static int ltc4215_probe(struct i2c_client *client,
         mutex_init(&data->update_lock);
  
         /* Initialize the LTC4215 chip */
-       /* TODO */
+       i2c_smbus_write_byte_data(client, LTC4215_FAULT, 0x00);
  
         /* Register sysfs hooks */
         ret = sysfs_create_group(&client->dev.kobj, &ltc4215_group);
diff --git a/drivers/hwmon/ltc4245.c b/drivers/hwmon/ltc4245.c

index 034b2c5158486dd0c3ccc5482b82de02098961b1..e389643336126897d9abd8adf08cfe3464425712 100644 (file)
--- a/drivers/hwmon/ltc4245.c
+++ b/drivers/hwmon/ltc4245.c
@@ -382,7 +382,8 @@ static int ltc4245_probe(struct i2c_client *client,
         mutex_init(&data->update_lock);
  
         /* Initialize the LTC4245 chip */
-       /* TODO */
+       i2c_smbus_write_byte_data(client, LTC4245_FAULT1, 0x00);
+       i2c_smbus_write_byte_data(client, LTC4245_FAULT2, 0x00);
  
         /* Register sysfs hooks */
         ret = sysfs_create_group(&client->dev.kobj, &ltc4245_group);
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c

index 949c97ff57e35bec6917c4aab4aca2ebaa165c32..1f20a042a4f508197ff45ebec146a9ff2daeeccd 100644 (file)
--- a/drivers/idle/i7300_idle.c
+++ b/drivers/idle/i7300_idle.c
@@ -29,8 +29,8 @@
  
  #include <asm/idle.h>
  
-#include "../dma/ioatdma_hw.h"
-#include "../dma/ioatdma_registers.h"
+#include "../dma/ioat/hw.h"
+#include "../dma/ioat/registers.h"
  
  #define I7300_IDLE_DRIVER_VERSION      "1.55"
  #define I7300_PRINT                    "i7300_idle:"
@@ -126,9 +126,9 @@ static void i7300_idle_ioat_stop(void)
                 udelay(10);
  
                 sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
-                       IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+                       IOAT_CHANSTS_STATUS;
  
-               if (sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE)
+               if (sts != IOAT_CHANSTS_ACTIVE)
                         break;
  
         }
@@ -160,9 +160,9 @@ static int __init i7300_idle_ioat_selftest(u8 *ctl,
         udelay(1000);
  
         chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
-                       IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+                       IOAT_CHANSTS_STATUS;
  
-       if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE) {
+       if (chan_sts != IOAT_CHANSTS_DONE) {
                 /* Not complete, reset the channel */
                 writeb(IOAT_CHANCMD_RESET,
                        ioat_chanbase + IOAT1_CHANCMD_OFFSET);
@@ -288,9 +288,9 @@ static void __exit i7300_idle_ioat_exit(void)
                        ioat_chanbase + IOAT1_CHANCMD_OFFSET);
  
                 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
-                       IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+                       IOAT_CHANSTS_STATUS;
  
-               if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) {
+               if (chan_sts != IOAT_CHANSTS_ACTIVE) {
                         writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET);
                         break;
                 }
@@ -298,14 +298,14 @@ static void __exit i7300_idle_ioat_exit(void)
         }
  
         chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
-                       IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+                       IOAT_CHANSTS_STATUS;
  
         /*
          * We tried to reset multiple times. If IO A/T channel is still active
          * flag an error and return without cleanup. Memory leak is better
          * than random corruption in that extreme error situation.
          */
-       if (chan_sts == IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) {
+       if (chan_sts == IOAT_CHANSTS_ACTIVE) {
                 printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels."
                         " Not freeing resources\n");
                 return;
diff --git a/drivers/input/input.c b/drivers/input/input.c

index 556539d617a43295a8e4db43a3df6230da19aac5..e828aab7daceada9eb8858fb19fa3fc755baa7ea 100644 (file)
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -11,6 +11,7 @@
   */
  
  #include <linux/init.h>
+#include <linux/types.h>
  #include <linux/input.h>
  #include <linux/module.h>
  #include <linux/random.h>
@@ -514,7 +515,7 @@ static void input_disconnect_device(struct input_dev *dev)
          * that there are no threads in the middle of input_open_device()
          */
         mutex_lock(&dev->mutex);
-       dev->going_away = 1;
+       dev->going_away = true;
         mutex_unlock(&dev->mutex);
  
         spin_lock_irq(&dev->event_lock);
@@ -1259,10 +1260,71 @@ static int input_dev_uevent(struct device *device, struct kobj_uevent_env *env)
         return 0;
  }
  
+#define INPUT_DO_TOGGLE(dev, type, bits, on)                   \
+       do {                                                    \
+               int i;                                          \
+               if (!test_bit(EV_##type, dev->evbit))           \
+                       break;                                  \
+               for (i = 0; i < type##_MAX; i++) {              \
+                       if (!test_bit(i, dev->bits##bit) ||     \
+                           !test_bit(i, dev->bits))            \
+                               continue;                       \
+                       dev->event(dev, EV_##type, i, on);      \
+               }                                               \
+       } while (0)
+
+static void input_dev_reset(struct input_dev *dev, bool activate)
+{
+       if (!dev->event)
+               return;
+
+       INPUT_DO_TOGGLE(dev, LED, led, activate);
+       INPUT_DO_TOGGLE(dev, SND, snd, activate);
+
+       if (activate && test_bit(EV_REP, dev->evbit)) {
+               dev->event(dev, EV_REP, REP_PERIOD, dev->rep[REP_PERIOD]);
+               dev->event(dev, EV_REP, REP_DELAY, dev->rep[REP_DELAY]);
+       }
+}
+
+#ifdef CONFIG_PM
+static int input_dev_suspend(struct device *dev)
+{
+       struct input_dev *input_dev = to_input_dev(dev);
+
+       mutex_lock(&input_dev->mutex);
+       input_dev_reset(input_dev, false);
+       mutex_unlock(&input_dev->mutex);
+
+       return 0;
+}
+
+static int input_dev_resume(struct device *dev)
+{
+       struct input_dev *input_dev = to_input_dev(dev);
+
+       mutex_lock(&input_dev->mutex);
+       input_dev_reset(input_dev, true);
+       mutex_unlock(&input_dev->mutex);
+
+       return 0;
+}
+
+static const struct dev_pm_ops input_dev_pm_ops = {
+       .suspend        = input_dev_suspend,
+       .resume         = input_dev_resume,
+       .poweroff       = input_dev_suspend,
+       .restore        = input_dev_resume,
+};
+#endif /* CONFIG_PM */
+
  static struct device_type input_dev_type = {
         .groups         = input_dev_attr_groups,
         .release        = input_dev_release,
         .uevent         = input_dev_uevent,
+#ifdef CONFIG_PM
+       .pm             = &input_dev_pm_ops,
+#endif
  };
  
  static char *input_devnode(struct device *dev, mode_t *mode)
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig

index 3525c19be4286e14f71568656142ff7215fc8dbc..ee98b1bc5d890c28de0f50aea1db487535fedee8 100644 (file)
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -24,6 +24,16 @@ config KEYBOARD_AAED2000
           To compile this driver as a module, choose M here: the
           module will be called aaed2000_kbd.
  
+config KEYBOARD_ADP5588
+       tristate "ADP5588 I2C QWERTY Keypad and IO Expander"
+       depends on I2C
+       help
+         Say Y here if you want to use a ADP5588 attached to your
+         system I2C bus.
+
+         To compile this driver as a module, choose M here: the
+         module will be called adp5588-keys.
+
  config KEYBOARD_AMIGA
         tristate "Amiga keyboard"
         depends on AMIGA
@@ -104,6 +114,16 @@ config KEYBOARD_ATKBD_RDI_KEYCODES
           right-hand column will be interpreted as the key shown in the
           left-hand column.
  
+config QT2160
+       tristate "Atmel AT42QT2160 Touch Sensor Chip"
+       depends on I2C && EXPERIMENTAL
+       help
+         If you say yes here you get support for Atmel AT42QT2160 Touch
+         Sensor chip as a keyboard input.
+
+         This driver can also be built as a module. If so, the module
+         will be called qt2160.
+
  config KEYBOARD_BFIN
         tristate "Blackfin BF54x keypad support"
         depends on (BF54x && !BF544)
@@ -251,6 +271,17 @@ config KEYBOARD_MAPLE
           To compile this driver as a module, choose M here: the
           module will be called maple_keyb.
  
+config KEYBOARD_MAX7359
+       tristate "Maxim MAX7359 Key Switch Controller"
+       depends on I2C
+       help
+         If you say yes here you get support for the Maxim MAX7359 Key
+         Switch Controller chip. This providers microprocessors with
+         management of up to 64 key switches
+
+         To compile this driver as a module, choose M here: the
+         module will be called max7359_keypad.
+
  config KEYBOARD_NEWTON
         tristate "Newton keyboard"
         select SERIO
@@ -260,6 +291,15 @@ config KEYBOARD_NEWTON
           To compile this driver as a module, choose M here: the
           module will be called newtonkbd.
  
+config KEYBOARD_OPENCORES
+       tristate "OpenCores Keyboard Controller"
+       help
+         Say Y here if you want to use the OpenCores Keyboard Controller
+         http://www.opencores.org/project,keyboardcontroller
+
+         To compile this driver as a module, choose M here; the
+         module will be called opencores-kbd.
+
  config KEYBOARD_PXA27x
         tristate "PXA27x/PXA3xx keypad support"
         depends on PXA27x || PXA3xx
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile

index 8a7a22b302666618c4472aa11b8d91b395e39bd6..babad5e58b77d323f357618da87accd4f97dcce9 100644 (file)
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -5,6 +5,7 @@
  # Each configuration option enables a list of files.
  
  obj-$(CONFIG_KEYBOARD_AAED2000)                += aaed2000_kbd.o
+obj-$(CONFIG_KEYBOARD_ADP5588)         += adp5588-keys.o
  obj-$(CONFIG_KEYBOARD_AMIGA)           += amikbd.o
  obj-$(CONFIG_KEYBOARD_ATARI)           += atakbd.o
  obj-$(CONFIG_KEYBOARD_ATKBD)           += atkbd.o
@@ -21,10 +22,13 @@ obj-$(CONFIG_KEYBOARD_LM8323)               += lm8323.o
  obj-$(CONFIG_KEYBOARD_LOCOMO)          += locomokbd.o
  obj-$(CONFIG_KEYBOARD_MAPLE)           += maple_keyb.o
  obj-$(CONFIG_KEYBOARD_MATRIX)          += matrix_keypad.o
+obj-$(CONFIG_KEYBOARD_MAX7359)         += max7359_keypad.o
  obj-$(CONFIG_KEYBOARD_NEWTON)          += newtonkbd.o
  obj-$(CONFIG_KEYBOARD_OMAP)            += omap-keypad.o
+obj-$(CONFIG_KEYBOARD_OPENCORES)       += opencores-kbd.o
  obj-$(CONFIG_KEYBOARD_PXA27x)          += pxa27x_keypad.o
  obj-$(CONFIG_KEYBOARD_PXA930_ROTARY)   += pxa930_rotary.o
+obj-$(CONFIG_KEYBOARD_QT2160)          += qt2160.o
  obj-$(CONFIG_KEYBOARD_SH_KEYSC)                += sh_keysc.o
  obj-$(CONFIG_KEYBOARD_SPITZ)           += spitzkbd.o
  obj-$(CONFIG_KEYBOARD_STOWAWAY)                += stowaway.o
diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c

new file mode 100644 (file)

index 0000000..d48c808
--- /dev/null
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -0,0 +1,361 @@
+/*
+ * File: drivers/input/keyboard/adp5588_keys.c
+ * Description:  keypad driver for ADP5588 I2C QWERTY Keypad and IO Expander
+ * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright (C) 2008-2009 Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/input.h>
+#include <linux/i2c.h>
+
+#include <linux/i2c/adp5588.h>
+
+ /* Configuration Register1 */
+#define AUTO_INC       (1 << 7)
+#define GPIEM_CFG      (1 << 6)
+#define OVR_FLOW_M     (1 << 5)
+#define INT_CFG                (1 << 4)
+#define OVR_FLOW_IEN   (1 << 3)
+#define K_LCK_IM       (1 << 2)
+#define GPI_IEN                (1 << 1)
+#define KE_IEN         (1 << 0)
+
+/* Interrupt Status Register */
+#define CMP2_INT       (1 << 5)
+#define CMP1_INT       (1 << 4)
+#define OVR_FLOW_INT   (1 << 3)
+#define K_LCK_INT      (1 << 2)
+#define GPI_INT                (1 << 1)
+#define KE_INT         (1 << 0)
+
+/* Key Lock and Event Counter Register */
+#define K_LCK_EN       (1 << 6)
+#define LCK21          0x30
+#define KEC            0xF
+
+/* Key Event Register xy */
+#define KEY_EV_PRESSED         (1 << 7)
+#define KEY_EV_MASK            (0x7F)
+
+#define KP_SEL(x)              (0xFFFF >> (16 - x))    /* 2^x-1 */
+
+#define KEYP_MAX_EVENT         10
+
+/*
+ * Early pre 4.0 Silicon required to delay readout by at least 25ms,
+ * since the Event Counter Register updated 25ms after the interrupt
+ * asserted.
+ */
+#define WA_DELAYED_READOUT_REVID(rev)          ((rev) < 4)
+
+struct adp5588_kpad {
+       struct i2c_client *client;
+       struct input_dev *input;
+       struct delayed_work work;
+       unsigned long delay;
+       unsigned short keycode[ADP5588_KEYMAPSIZE];
+};
+
+static int adp5588_read(struct i2c_client *client, u8 reg)
+{
+       int ret = i2c_smbus_read_byte_data(client, reg);
+
+       if (ret < 0)
+               dev_err(&client->dev, "Read Error\n");
+
+       return ret;
+}
+
+static int adp5588_write(struct i2c_client *client, u8 reg, u8 val)
+{
+       return i2c_smbus_write_byte_data(client, reg, val);
+}
+
+static void adp5588_work(struct work_struct *work)
+{
+       struct adp5588_kpad *kpad = container_of(work,
+                                               struct adp5588_kpad, work.work);
+       struct i2c_client *client = kpad->client;
+       int i, key, status, ev_cnt;
+
+       status = adp5588_read(client, INT_STAT);
+
+       if (status & OVR_FLOW_INT)      /* Unlikely and should never happen */
+               dev_err(&client->dev, "Event Overflow Error\n");
+
+       if (status & KE_INT) {
+               ev_cnt = adp5588_read(client, KEY_LCK_EC_STAT) & KEC;
+               if (ev_cnt) {
+                       for (i = 0; i < ev_cnt; i++) {
+                               key = adp5588_read(client, Key_EVENTA + i);
+                               input_report_key(kpad->input,
+                                       kpad->keycode[(key & KEY_EV_MASK) - 1],
+                                       key & KEY_EV_PRESSED);
+                       }
+                       input_sync(kpad->input);
+               }
+       }
+       adp5588_write(client, INT_STAT, status); /* Status is W1C */
+}
+
+static irqreturn_t adp5588_irq(int irq, void *handle)
+{
+       struct adp5588_kpad *kpad = handle;
+
+       /*
+        * use keventd context to read the event fifo registers
+        * Schedule readout at least 25ms after notification for
+        * REVID < 4
+        */
+
+       schedule_delayed_work(&kpad->work, kpad->delay);
+
+       return IRQ_HANDLED;
+}
+
+static int __devinit adp5588_setup(struct i2c_client *client)
+{
+       struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+       int i, ret;
+
+       ret = adp5588_write(client, KP_GPIO1, KP_SEL(pdata->rows));
+       ret |= adp5588_write(client, KP_GPIO2, KP_SEL(pdata->cols) & 0xFF);
+       ret |= adp5588_write(client, KP_GPIO3, KP_SEL(pdata->cols) >> 8);
+
+       if (pdata->en_keylock) {
+               ret |= adp5588_write(client, UNLOCK1, pdata->unlock_key1);
+               ret |= adp5588_write(client, UNLOCK2, pdata->unlock_key2);
+               ret |= adp5588_write(client, KEY_LCK_EC_STAT, K_LCK_EN);
+       }
+
+       for (i = 0; i < KEYP_MAX_EVENT; i++)
+               ret |= adp5588_read(client, Key_EVENTA);
+
+       ret |= adp5588_write(client, INT_STAT, CMP2_INT | CMP1_INT |
+                                       OVR_FLOW_INT | K_LCK_INT |
+                                       GPI_INT | KE_INT); /* Status is W1C */
+
+       ret |= adp5588_write(client, CFG, INT_CFG | OVR_FLOW_IEN | KE_IEN);
+
+       if (ret < 0) {
+               dev_err(&client->dev, "Write Error\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static int __devinit adp5588_probe(struct i2c_client *client,
+                                       const struct i2c_device_id *id)
+{
+       struct adp5588_kpad *kpad;
+       struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+       struct input_dev *input;
+       unsigned int revid;
+       int ret, i;
+       int error;
+
+       if (!i2c_check_functionality(client->adapter,
+                                       I2C_FUNC_SMBUS_BYTE_DATA)) {
+               dev_err(&client->dev, "SMBUS Byte Data not Supported\n");
+               return -EIO;
+       }
+
+       if (!pdata) {
+               dev_err(&client->dev, "no platform data?\n");
+               return -EINVAL;
+       }
+
+       if (!pdata->rows || !pdata->cols || !pdata->keymap) {
+               dev_err(&client->dev, "no rows, cols or keymap from pdata\n");
+               return -EINVAL;
+       }
+
+       if (pdata->keymapsize != ADP5588_KEYMAPSIZE) {
+               dev_err(&client->dev, "invalid keymapsize\n");
+               return -EINVAL;
+       }
+
+       if (!client->irq) {
+               dev_err(&client->dev, "no IRQ?\n");
+               return -EINVAL;
+       }
+
+       kpad = kzalloc(sizeof(*kpad), GFP_KERNEL);
+       input = input_allocate_device();
+       if (!kpad || !input) {
+               error = -ENOMEM;
+               goto err_free_mem;
+       }
+
+       kpad->client = client;
+       kpad->input = input;
+       INIT_DELAYED_WORK(&kpad->work, adp5588_work);
+
+       ret = adp5588_read(client, DEV_ID);
+       if (ret < 0) {
+               error = ret;
+               goto err_free_mem;
+       }
+
+       revid = (u8) ret & ADP5588_DEVICE_ID_MASK;
+       if (WA_DELAYED_READOUT_REVID(revid))
+               kpad->delay = msecs_to_jiffies(30);
+
+       input->name = client->name;
+       input->phys = "adp5588-keys/input0";
+       input->dev.parent = &client->dev;
+
+       input_set_drvdata(input, kpad);
+
+       input->id.bustype = BUS_I2C;
+       input->id.vendor = 0x0001;
+       input->id.product = 0x0001;
+       input->id.version = revid;
+
+       input->keycodesize = sizeof(kpad->keycode[0]);
+       input->keycodemax = pdata->keymapsize;
+       input->keycode = kpad->keycode;
+
+       memcpy(kpad->keycode, pdata->keymap,
+               pdata->keymapsize * input->keycodesize);
+
+       /* setup input device */
+       __set_bit(EV_KEY, input->evbit);
+
+       if (pdata->repeat)
+               __set_bit(EV_REP, input->evbit);
+
+       for (i = 0; i < input->keycodemax; i++)
+               __set_bit(kpad->keycode[i] & KEY_MAX, input->keybit);
+       __clear_bit(KEY_RESERVED, input->keybit);
+
+       error = input_register_device(input);
+       if (error) {
+               dev_err(&client->dev, "unable to register input device\n");
+               goto err_free_mem;
+       }
+
+       error = request_irq(client->irq, adp5588_irq,
+                           IRQF_TRIGGER_FALLING | IRQF_DISABLED,
+                           client->dev.driver->name, kpad);
+       if (error) {
+               dev_err(&client->dev, "irq %d busy?\n", client->irq);
+               goto err_unreg_dev;
+       }
+
+       error = adp5588_setup(client);
+       if (error)
+               goto err_free_irq;
+
+       device_init_wakeup(&client->dev, 1);
+       i2c_set_clientdata(client, kpad);
+
+       dev_info(&client->dev, "Rev.%d keypad, irq %d\n", revid, client->irq);
+       return 0;
+
+ err_free_irq:
+       free_irq(client->irq, kpad);
+ err_unreg_dev:
+       input_unregister_device(input);
+       input = NULL;
+ err_free_mem:
+       input_free_device(input);
+       kfree(kpad);
+
+       return error;
+}
+
+static int __devexit adp5588_remove(struct i2c_client *client)
+{
+       struct adp5588_kpad *kpad = i2c_get_clientdata(client);
+
+       adp5588_write(client, CFG, 0);
+       free_irq(client->irq, kpad);
+       cancel_delayed_work_sync(&kpad->work);
+       input_unregister_device(kpad->input);
+       i2c_set_clientdata(client, NULL);
+       kfree(kpad);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM
+static int adp5588_suspend(struct device *dev)
+{
+       struct adp5588_kpad *kpad = dev_get_drvdata(dev);
+       struct i2c_client *client = kpad->client;
+
+       disable_irq(client->irq);
+       cancel_delayed_work_sync(&kpad->work);
+
+       if (device_may_wakeup(&client->dev))
+               enable_irq_wake(client->irq);
+
+       return 0;
+}
+
+static int adp5588_resume(struct device *dev)
+{
+       struct adp5588_kpad *kpad = dev_get_drvdata(dev);
+       struct i2c_client *client = kpad->client;
+
+       if (device_may_wakeup(&client->dev))
+               disable_irq_wake(client->irq);
+
+       enable_irq(client->irq);
+
+       return 0;
+}
+
+static struct dev_pm_ops adp5588_dev_pm_ops = {
+       .suspend = adp5588_suspend,
+       .resume  = adp5588_resume,
+};
+#endif
+
+static const struct i2c_device_id adp5588_id[] = {
+       { KBUILD_MODNAME, 0 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, adp5588_id);
+
+static struct i2c_driver adp5588_driver = {
+       .driver = {
+               .name = KBUILD_MODNAME,
+#ifdef CONFIG_PM
+               .pm   = &adp5588_dev_pm_ops,
+#endif
+       },
+       .probe    = adp5588_probe,
+       .remove   = __devexit_p(adp5588_remove),
+       .id_table = adp5588_id,
+};
+
+static int __init adp5588_init(void)
+{
+       return i2c_add_driver(&adp5588_driver);
+}
+module_init(adp5588_init);
+
+static void __exit adp5588_exit(void)
+{
+       i2c_del_driver(&adp5588_driver);
+}
+module_exit(adp5588_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("ADP5588 Keypad driver");
+MODULE_ALIAS("platform:adp5588-keys");
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c

index adb09e2ba3944d8b0649186a14a8221673811429..4709e15af6070522dcf03516acb058bd024b07d7 100644 (file)
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -773,23 +773,6 @@ static int atkbd_select_set(struct atkbd *atkbd, int target_set, int allow_extra
  static int atkbd_activate(struct atkbd *atkbd)
  {
         struct ps2dev *ps2dev = &atkbd->ps2dev;
-       unsigned char param[1];
-
-/*
- * Set the LEDs to a defined state.
- */
-
-       param[0] = 0;
-       if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS))
-               return -1;
-
-/*
- * Set autorepeat to fastest possible.
- */
-
-       param[0] = 0;
-       if (ps2_command(ps2dev, param, ATKBD_CMD_SETREP))
-               return -1;
  
  /*
   * Enable the keyboard to receive keystrokes.
@@ -1158,14 +1141,6 @@ static int atkbd_reconnect(struct serio *serio)
                         return -1;
  
                 atkbd_activate(atkbd);
-
-/*
- * Restore repeat rate and LEDs (that were reset by atkbd_activate)
- * to pre-resume state
- */
-               if (!atkbd->softrepeat)
-                       atkbd_set_repeat_rate(atkbd);
-               atkbd_set_leds(atkbd);
         }
  
         atkbd_enable(atkbd);
diff --git a/drivers/input/keyboard/max7359_keypad.c b/drivers/input/keyboard/max7359_keypad.c

new file mode 100644 (file)

index 0000000..3b5b948
--- /dev/null
+++ b/drivers/input/keyboard/max7359_keypad.c
@@ -0,0 +1,330 @@
+/*
+ * max7359_keypad.c - MAX7359 Key Switch Controller Driver
+ *
+ * Copyright (C) 2009 Samsung Electronics
+ * Kim Kyuwon <q1.kim@samsung.com>
+ *
+ * Based on pxa27x_keypad.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Datasheet: http://www.maxim-ic.com/quick_view2.cfm/qv_pk/5456
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/input/matrix_keypad.h>
+
+#define MAX7359_MAX_KEY_ROWS   8
+#define MAX7359_MAX_KEY_COLS   8
+#define MAX7359_MAX_KEY_NUM    (MAX7359_MAX_KEY_ROWS * MAX7359_MAX_KEY_COLS)
+#define MAX7359_ROW_SHIFT      3
+
+/*
+ * MAX7359 registers
+ */
+#define MAX7359_REG_KEYFIFO    0x00
+#define MAX7359_REG_CONFIG     0x01
+#define MAX7359_REG_DEBOUNCE   0x02
+#define MAX7359_REG_INTERRUPT  0x03
+#define MAX7359_REG_PORTS      0x04
+#define MAX7359_REG_KEYREP     0x05
+#define MAX7359_REG_SLEEP      0x06
+
+/*
+ * Configuration register bits
+ */
+#define MAX7359_CFG_SLEEP      (1 << 7)
+#define MAX7359_CFG_INTERRUPT  (1 << 5)
+#define MAX7359_CFG_KEY_RELEASE        (1 << 3)
+#define MAX7359_CFG_WAKEUP     (1 << 1)
+#define MAX7359_CFG_TIMEOUT    (1 << 0)
+
+/*
+ * Autosleep register values (ms)
+ */
+#define MAX7359_AUTOSLEEP_8192 0x01
+#define MAX7359_AUTOSLEEP_4096 0x02
+#define MAX7359_AUTOSLEEP_2048 0x03
+#define MAX7359_AUTOSLEEP_1024 0x04
+#define MAX7359_AUTOSLEEP_512  0x05
+#define MAX7359_AUTOSLEEP_256  0x06
+
+struct max7359_keypad {
+       /* matrix key code map */
+       unsigned short keycodes[MAX7359_MAX_KEY_NUM];
+
+       struct input_dev *input_dev;
+       struct i2c_client *client;
+};
+
+static int max7359_write_reg(struct i2c_client *client, u8 reg, u8 val)
+{
+       int ret = i2c_smbus_write_byte_data(client, reg, val);
+
+       if (ret < 0)
+               dev_err(&client->dev, "%s: reg 0x%x, val 0x%x, err %d\n",
+                       __func__, reg, val, ret);
+       return ret;
+}
+
+static int max7359_read_reg(struct i2c_client *client, int reg)
+{
+       int ret = i2c_smbus_read_byte_data(client, reg);
+
+       if (ret < 0)
+               dev_err(&client->dev, "%s: reg 0x%x, err %d\n",
+                       __func__, reg, ret);
+       return ret;
+}
+
+static void max7359_build_keycode(struct max7359_keypad *keypad,
+                               const struct matrix_keymap_data *keymap_data)
+{
+       struct input_dev *input_dev = keypad->input_dev;
+       int i;
+
+       for (i = 0; i < keymap_data->keymap_size; i++) {
+               unsigned int key = keymap_data->keymap[i];
+               unsigned int row = KEY_ROW(key);
+               unsigned int col = KEY_COL(key);
+               unsigned int scancode = MATRIX_SCAN_CODE(row, col,
+                                               MAX7359_ROW_SHIFT);
+               unsigned short keycode = KEY_VAL(key);
+
+               keypad->keycodes[scancode] = keycode;
+               __set_bit(keycode, input_dev->keybit);
+       }
+       __clear_bit(KEY_RESERVED, input_dev->keybit);
+}
+
+/* runs in an IRQ thread -- can (and will!) sleep */
+static irqreturn_t max7359_interrupt(int irq, void *dev_id)
+{
+       struct max7359_keypad *keypad = dev_id;
+       struct input_dev *input_dev = keypad->input_dev;
+       int val, row, col, release, code;
+
+       val = max7359_read_reg(keypad->client, MAX7359_REG_KEYFIFO);
+       row = val & 0x7;
+       col = (val >> 3) & 0x7;
+       release = val & 0x40;
+
+       code = MATRIX_SCAN_CODE(row, col, MAX7359_ROW_SHIFT);
+
+       dev_dbg(&keypad->client->dev,
+               "key[%d:%d] %s\n", row, col, release ? "release" : "press");
+
+       input_event(input_dev, EV_MSC, MSC_SCAN, code);
+       input_report_key(input_dev, keypad->keycodes[code], !release);
+       input_sync(input_dev);
+
+       return IRQ_HANDLED;
+}
+
+/*
+ * Let MAX7359 fall into a deep sleep:
+ * If no keys are pressed, enter sleep mode for 8192 ms. And if any
+ * key is pressed, the MAX7359 returns to normal operating mode.
+ */
+static inline void max7359_fall_deepsleep(struct i2c_client *client)
+{
+       max7359_write_reg(client, MAX7359_REG_SLEEP, MAX7359_AUTOSLEEP_8192);
+}
+
+/*
+ * Let MAX7359 take a catnap:
+ * Autosleep just for 256 ms.
+ */
+static inline void max7359_take_catnap(struct i2c_client *client)
+{
+       max7359_write_reg(client, MAX7359_REG_SLEEP, MAX7359_AUTOSLEEP_256);
+}
+
+static int max7359_open(struct input_dev *dev)
+{
+       struct max7359_keypad *keypad = input_get_drvdata(dev);
+
+       max7359_take_catnap(keypad->client);
+
+       return 0;
+}
+
+static void max7359_close(struct input_dev *dev)
+{
+       struct max7359_keypad *keypad = input_get_drvdata(dev);
+
+       max7359_fall_deepsleep(keypad->client);
+}
+
+static void max7359_initialize(struct i2c_client *client)
+{
+       max7359_write_reg(client, MAX7359_REG_CONFIG,
+               MAX7359_CFG_INTERRUPT | /* Irq clears after host read */
+               MAX7359_CFG_KEY_RELEASE | /* Key release enable */
+               MAX7359_CFG_WAKEUP); /* Key press wakeup enable */
+
+       /* Full key-scan functionality */
+       max7359_write_reg(client, MAX7359_REG_DEBOUNCE, 0x1F);
+
+       /* nINT asserts every debounce cycles */
+       max7359_write_reg(client, MAX7359_REG_INTERRUPT, 0x01);
+
+       max7359_fall_deepsleep(client);
+}
+
+static int __devinit max7359_probe(struct i2c_client *client,
+                                       const struct i2c_device_id *id)
+{
+       const struct matrix_keymap_data *keymap_data = client->dev.platform_data;
+       struct max7359_keypad *keypad;
+       struct input_dev *input_dev;
+       int ret;
+       int error;
+
+       if (!client->irq) {
+               dev_err(&client->dev, "The irq number should not be zero\n");
+               return -EINVAL;
+       }
+
+       /* Detect MAX7359: The initial Keys FIFO value is '0x3F' */
+       ret = max7359_read_reg(client, MAX7359_REG_KEYFIFO);
+       if (ret < 0) {
+               dev_err(&client->dev, "failed to detect device\n");
+               return -ENODEV;
+       }
+
+       dev_dbg(&client->dev, "keys FIFO is 0x%02x\n", ret);
+
+       keypad = kzalloc(sizeof(struct max7359_keypad), GFP_KERNEL);
+       input_dev = input_allocate_device();
+       if (!keypad || !input_dev) {
+               dev_err(&client->dev, "failed to allocate memory\n");
+               error = -ENOMEM;
+               goto failed_free_mem;
+       }
+
+       keypad->client = client;
+       keypad->input_dev = input_dev;
+
+       input_dev->name = client->name;
+       input_dev->id.bustype = BUS_I2C;
+       input_dev->open = max7359_open;
+       input_dev->close = max7359_close;
+       input_dev->dev.parent = &client->dev;
+
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
+       input_dev->keycodesize = sizeof(keypad->keycodes[0]);
+       input_dev->keycodemax = ARRAY_SIZE(keypad->keycodes);
+       input_dev->keycode = keypad->keycodes;
+
+       input_set_capability(input_dev, EV_MSC, MSC_SCAN);
+       input_set_drvdata(input_dev, keypad);
+
+       max7359_build_keycode(keypad, keymap_data);
+
+       error = request_threaded_irq(client->irq, NULL, max7359_interrupt,
+                                    IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                    client->name, keypad);
+       if (error) {
+               dev_err(&client->dev, "failed to register interrupt\n");
+               goto failed_free_mem;
+       }
+
+       /* Register the input device */
+       error = input_register_device(input_dev);
+       if (error) {
+               dev_err(&client->dev, "failed to register input device\n");
+               goto failed_free_irq;
+       }
+
+       /* Initialize MAX7359 */
+       max7359_initialize(client);
+
+       i2c_set_clientdata(client, keypad);
+       device_init_wakeup(&client->dev, 1);
+
+       return 0;
+
+failed_free_irq:
+       free_irq(client->irq, keypad);
+failed_free_mem:
+       input_free_device(input_dev);
+       kfree(keypad);
+       return error;
+}
+
+static int __devexit max7359_remove(struct i2c_client *client)
+{
+       struct max7359_keypad *keypad = i2c_get_clientdata(client);
+
+       free_irq(client->irq, keypad);
+       input_unregister_device(keypad->input_dev);
+       i2c_set_clientdata(client, NULL);
+       kfree(keypad);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM
+static int max7359_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+       max7359_fall_deepsleep(client);
+
+       if (device_may_wakeup(&client->dev))
+               enable_irq_wake(client->irq);
+
+       return 0;
+}
+
+static int max7359_resume(struct i2c_client *client)
+{
+       if (device_may_wakeup(&client->dev))
+               disable_irq_wake(client->irq);
+
+       /* Restore the default setting */
+       max7359_take_catnap(client);
+
+       return 0;
+}
+#else
+#define max7359_suspend        NULL
+#define max7359_resume NULL
+#endif
+
+static const struct i2c_device_id max7359_ids[] = {
+       { "max7359", 0 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, max7359_ids);
+
+static struct i2c_driver max7359_i2c_driver = {
+       .driver = {
+               .name = "max7359",
+       },
+       .probe          = max7359_probe,
+       .remove         = __devexit_p(max7359_remove),
+       .suspend        = max7359_suspend,
+       .resume         = max7359_resume,
+       .id_table       = max7359_ids,
+};
+
+static int __init max7359_init(void)
+{
+       return i2c_add_driver(&max7359_i2c_driver);
+}
+module_init(max7359_init);
+
+static void __exit max7359_exit(void)
+{
+       i2c_del_driver(&max7359_i2c_driver);
+}
+module_exit(max7359_exit);
+
+MODULE_AUTHOR("Kim Kyuwon <q1.kim@samsung.com>");
+MODULE_DESCRIPTION("MAX7359 Key Switch Controller Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/input/keyboard/opencores-kbd.c b/drivers/input/keyboard/opencores-kbd.c

new file mode 100644 (file)

index 0000000..78cccdd
--- /dev/null
+++ b/drivers/input/keyboard/opencores-kbd.c
@@ -0,0 +1,180 @@
+/*
+ * OpenCores Keyboard Controller Driver
+ * http://www.opencores.org/project,keyboardcontroller
+ *
+ * Copyright 2007-2009 HV Sistemas S.L.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+struct opencores_kbd {
+       struct input_dev *input;
+       struct resource *addr_res;
+       void __iomem *addr;
+       int irq;
+       unsigned short keycodes[128];
+};
+
+static irqreturn_t opencores_kbd_isr(int irq, void *dev_id)
+{
+       struct opencores_kbd *opencores_kbd = dev_id;
+       struct input_dev *input = opencores_kbd->input;
+       unsigned char c;
+
+       c = readb(opencores_kbd->addr);
+       input_report_key(input, c & 0x7f, c & 0x80 ? 0 : 1);
+       input_sync(input);
+
+       return IRQ_HANDLED;
+}
+
+static int __devinit opencores_kbd_probe(struct platform_device *pdev)
+{
+       struct input_dev *input;
+       struct opencores_kbd *opencores_kbd;
+       struct resource *res;
+       int irq, i, error;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(&pdev->dev, "missing board memory resource\n");
+               return -EINVAL;
+       }
+
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0) {
+               dev_err(&pdev->dev, "missing board IRQ resource\n");
+               return -EINVAL;
+       }
+
+       opencores_kbd = kzalloc(sizeof(*opencores_kbd), GFP_KERNEL);
+       input = input_allocate_device();
+       if (!opencores_kbd || !input) {
+               dev_err(&pdev->dev, "failed to allocate device structures\n");
+               error = -ENOMEM;
+               goto err_free_mem;
+       }
+
+       opencores_kbd->addr_res = res;
+       res = request_mem_region(res->start, resource_size(res), pdev->name);
+       if (!res) {
+               dev_err(&pdev->dev, "failed to request I/O memory\n");
+               error = -EBUSY;
+               goto err_free_mem;
+       }
+
+       opencores_kbd->addr = ioremap(res->start, resource_size(res));
+       if (!opencores_kbd->addr) {
+               dev_err(&pdev->dev, "failed to remap I/O memory\n");
+               error = -ENXIO;
+               goto err_rel_mem;
+       }
+
+       opencores_kbd->input = input;
+       opencores_kbd->irq = irq;
+
+       input->name = pdev->name;
+       input->phys = "opencores-kbd/input0";
+       input->dev.parent = &pdev->dev;
+
+       input_set_drvdata(input, opencores_kbd);
+
+       input->id.bustype = BUS_HOST;
+       input->id.vendor = 0x0001;
+       input->id.product = 0x0001;
+       input->id.version = 0x0100;
+
+       input->keycode = opencores_kbd->keycodes;
+       input->keycodesize = sizeof(opencores_kbd->keycodes[0]);
+       input->keycodemax = ARRAY_SIZE(opencores_kbd->keycodes);
+
+       __set_bit(EV_KEY, input->evbit);
+
+       for (i = 0; i < ARRAY_SIZE(opencores_kbd->keycodes); i++) {
+               /*
+                * OpenCores controller happens to have scancodes match
+                * our KEY_* definitions.
+                */
+               opencores_kbd->keycodes[i] = i;
+               __set_bit(opencores_kbd->keycodes[i], input->keybit);
+       }
+       __clear_bit(KEY_RESERVED, input->keybit);
+
+       error = request_irq(irq, &opencores_kbd_isr,
+                           IRQF_TRIGGER_RISING, pdev->name, opencores_kbd);
+       if (error) {
+               dev_err(&pdev->dev, "unable to claim irq %d\n", irq);
+               goto err_unmap_mem;
+       }
+
+       error = input_register_device(input);
+       if (error) {
+               dev_err(&pdev->dev, "unable to register input device\n");
+               goto err_free_irq;
+       }
+
+       platform_set_drvdata(pdev, opencores_kbd);
+
+       return 0;
+
+ err_free_irq:
+       free_irq(irq, opencores_kbd);
+ err_unmap_mem:
+       iounmap(opencores_kbd->addr);
+ err_rel_mem:
+       release_mem_region(res->start, resource_size(res));
+ err_free_mem:
+       input_free_device(input);
+       kfree(opencores_kbd);
+
+       return error;
+}
+
+static int __devexit opencores_kbd_remove(struct platform_device *pdev)
+{
+       struct opencores_kbd *opencores_kbd = platform_get_drvdata(pdev);
+
+       free_irq(opencores_kbd->irq, opencores_kbd);
+
+       iounmap(opencores_kbd->addr);
+       release_mem_region(opencores_kbd->addr_res->start,
+               resource_size(opencores_kbd->addr_res));
+       input_unregister_device(opencores_kbd->input);
+       kfree(opencores_kbd);
+
+       platform_set_drvdata(pdev, NULL);
+
+       return 0;
+}
+
+static struct platform_driver opencores_kbd_device_driver = {
+       .probe    = opencores_kbd_probe,
+       .remove   = __devexit_p(opencores_kbd_remove),
+       .driver   = {
+               .name = "opencores-kbd",
+       },
+};
+
+static int __init opencores_kbd_init(void)
+{
+       return platform_driver_register(&opencores_kbd_device_driver);
+}
+module_init(opencores_kbd_init);
+
+static void __exit opencores_kbd_exit(void)
+{
+       platform_driver_unregister(&opencores_kbd_device_driver);
+}
+module_exit(opencores_kbd_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Javier Herrero <jherrero@hvsistemas.es>");
+MODULE_DESCRIPTION("Keyboard driver for OpenCores Keyboard Controller");
diff --git a/drivers/input/keyboard/qt2160.c b/drivers/input/keyboard/qt2160.c

new file mode 100644 (file)

index 0000000..191cc51
--- /dev/null
+++ b/drivers/input/keyboard/qt2160.c
@@ -0,0 +1,397 @@
+/*
+ *  qt2160.c - Atmel AT42QT2160 Touch Sense Controller
+ *
+ *  Copyright (C) 2009 Raphael Derosso Pereira <raphaelpereira@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+
+#define QT2160_VALID_CHIPID  0x11
+
+#define QT2160_CMD_CHIPID     0
+#define QT2160_CMD_CODEVER    1
+#define QT2160_CMD_GSTAT      2
+#define QT2160_CMD_KEYS3      3
+#define QT2160_CMD_KEYS4      4
+#define QT2160_CMD_SLIDE      5
+#define QT2160_CMD_GPIOS      6
+#define QT2160_CMD_SUBVER     7
+#define QT2160_CMD_CALIBRATE  10
+
+#define QT2160_CYCLE_INTERVAL  (2*HZ)
+
+static unsigned char qt2160_key2code[] = {
+       KEY_0, KEY_1, KEY_2, KEY_3,
+       KEY_4, KEY_5, KEY_6, KEY_7,
+       KEY_8, KEY_9, KEY_A, KEY_B,
+       KEY_C, KEY_D, KEY_E, KEY_F,
+};
+
+struct qt2160_data {
+       struct i2c_client *client;
+       struct input_dev *input;
+       struct delayed_work dwork;
+       spinlock_t lock;        /* Protects canceling/rescheduling of dwork */
+       unsigned short keycodes[ARRAY_SIZE(qt2160_key2code)];
+       u16 key_matrix;
+};
+
+static int qt2160_read_block(struct i2c_client *client,
+                            u8 inireg, u8 *buffer, unsigned int count)
+{
+       int error, idx = 0;
+
+       /*
+        * Can't use SMBus block data read. Check for I2C functionality to speed
+        * things up whenever possible. Otherwise we will be forced to read
+        * sequentially.
+        */
+       if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C))     {
+
+               error = i2c_smbus_write_byte(client, inireg + idx);
+               if (error) {
+                       dev_err(&client->dev,
+                               "couldn't send request. Returned %d\n", error);
+                       return error;
+               }
+
+               error = i2c_master_recv(client, buffer, count);
+               if (error != count) {
+                       dev_err(&client->dev,
+                               "couldn't read registers. Returned %d bytes\n", error);
+                       return error;
+               }
+       } else {
+
+               while (count--) {
+                       int data;
+
+                       error = i2c_smbus_write_byte(client, inireg + idx);
+                       if (error) {
+                               dev_err(&client->dev,
+                                       "couldn't send request. Returned %d\n", error);
+                               return error;
+                       }
+
+                       data = i2c_smbus_read_byte(client);
+                       if (data < 0) {
+                               dev_err(&client->dev,
+                                       "couldn't read register. Returned %d\n", data);
+                               return data;
+                       }
+
+                       buffer[idx++] = data;
+               }
+       }
+
+       return 0;
+}
+
+static int qt2160_get_key_matrix(struct qt2160_data *qt2160)
+{
+       struct i2c_client *client = qt2160->client;
+       struct input_dev *input = qt2160->input;
+       u8 regs[6];
+       u16 old_matrix, new_matrix;
+       int ret, i, mask;
+
+       dev_dbg(&client->dev, "requesting keys...\n");
+
+       /*
+        * Read all registers from General Status Register
+        * to GPIOs register
+        */
+       ret = qt2160_read_block(client, QT2160_CMD_GSTAT, regs, 6);
+       if (ret) {
+               dev_err(&client->dev,
+                       "could not perform chip read.\n");
+               return ret;
+       }
+
+       old_matrix = qt2160->key_matrix;
+       qt2160->key_matrix = new_matrix = (regs[2] << 8) | regs[1];
+
+       mask = 0x01;
+       for (i = 0; i < 16; ++i, mask <<= 1) {
+               int keyval = new_matrix & mask;
+
+               if ((old_matrix & mask) != keyval) {
+                       input_report_key(input, qt2160->keycodes[i], keyval);
+                       dev_dbg(&client->dev, "key %d %s\n",
+                               i, keyval ? "pressed" : "released");
+               }
+       }
+
+       input_sync(input);
+
+       return 0;
+}
+
+static irqreturn_t qt2160_irq(int irq, void *_qt2160)
+{
+       struct qt2160_data *qt2160 = _qt2160;
+       unsigned long flags;
+
+       spin_lock_irqsave(&qt2160->lock, flags);
+
+       __cancel_delayed_work(&qt2160->dwork);
+       schedule_delayed_work(&qt2160->dwork, 0);
+
+       spin_unlock_irqrestore(&qt2160->lock, flags);
+
+       return IRQ_HANDLED;
+}
+
+static void qt2160_schedule_read(struct qt2160_data *qt2160)
+{
+       spin_lock_irq(&qt2160->lock);
+       schedule_delayed_work(&qt2160->dwork, QT2160_CYCLE_INTERVAL);
+       spin_unlock_irq(&qt2160->lock);
+}
+
+static void qt2160_worker(struct work_struct *work)
+{
+       struct qt2160_data *qt2160 =
+               container_of(work, struct qt2160_data, dwork.work);
+
+       dev_dbg(&qt2160->client->dev, "worker\n");
+
+       qt2160_get_key_matrix(qt2160);
+
+       /* Avoid device lock up by checking every so often */
+       qt2160_schedule_read(qt2160);
+}
+
+static int __devinit qt2160_read(struct i2c_client *client, u8 reg)
+{
+       int ret;
+
+       ret = i2c_smbus_write_byte(client, reg);
+       if (ret) {
+               dev_err(&client->dev,
+                       "couldn't send request. Returned %d\n", ret);
+               return ret;
+       }
+
+       ret = i2c_smbus_read_byte(client);
+       if (ret < 0) {
+               dev_err(&client->dev,
+                       "couldn't read register. Returned %d\n", ret);
+               return ret;
+       }
+
+       return ret;
+}
+
+static int __devinit qt2160_write(struct i2c_client *client, u8 reg, u8 data)
+{
+       int error;
+
+       error = i2c_smbus_write_byte(client, reg);
+       if (error) {
+               dev_err(&client->dev,
+                       "couldn't send request. Returned %d\n", error);
+               return error;
+       }
+
+       error = i2c_smbus_write_byte(client, data);
+       if (error) {
+               dev_err(&client->dev,
+                       "couldn't write data. Returned %d\n", error);
+               return error;
+       }
+
+       return error;
+}
+
+
+static bool __devinit qt2160_identify(struct i2c_client *client)
+{
+       int id, ver, rev;
+
+       /* Read Chid ID to check if chip is valid */
+       id = qt2160_read(client, QT2160_CMD_CHIPID);
+       if (id != QT2160_VALID_CHIPID) {
+               dev_err(&client->dev, "ID %d not supported\n", id);
+               return false;
+       }
+
+       /* Read chip firmware version */
+       ver = qt2160_read(client, QT2160_CMD_CODEVER);
+       if (ver < 0) {
+               dev_err(&client->dev, "could not get firmware version\n");
+               return false;
+       }
+
+       /* Read chip firmware revision */
+       rev = qt2160_read(client, QT2160_CMD_SUBVER);
+       if (rev < 0) {
+               dev_err(&client->dev, "could not get firmware revision\n");
+               return false;
+       }
+
+       dev_info(&client->dev, "AT42QT2160 firmware version %d.%d.%d\n",
+                       ver >> 4, ver & 0xf, rev);
+
+       return true;
+}
+
+static int __devinit qt2160_probe(struct i2c_client *client,
+                                 const struct i2c_device_id *id)
+{
+       struct qt2160_data *qt2160;
+       struct input_dev *input;
+       int i;
+       int error;
+
+       /* Check functionality */
+       error = i2c_check_functionality(client->adapter,
+                       I2C_FUNC_SMBUS_BYTE);
+       if (!error) {
+               dev_err(&client->dev, "%s adapter not supported\n",
+                               dev_driver_string(&client->adapter->dev));
+               return -ENODEV;
+       }
+
+       if (!qt2160_identify(client))
+               return -ENODEV;
+
+       /* Chip is valid and active. Allocate structure */
+       qt2160 = kzalloc(sizeof(struct qt2160_data), GFP_KERNEL);
+       input = input_allocate_device();
+       if (!qt2160 || !input) {
+               dev_err(&client->dev, "insufficient memory\n");
+               error = -ENOMEM;
+               goto err_free_mem;
+       }
+
+       qt2160->client = client;
+       qt2160->input = input;
+       INIT_DELAYED_WORK(&qt2160->dwork, qt2160_worker);
+       spin_lock_init(&qt2160->lock);
+
+       input->name = "AT42QT2160 Touch Sense Keyboard";
+       input->id.bustype = BUS_I2C;
+
+       input->keycode = qt2160->keycodes;
+       input->keycodesize = sizeof(qt2160->keycodes[0]);
+       input->keycodemax = ARRAY_SIZE(qt2160_key2code);
+
+       __set_bit(EV_KEY, input->evbit);
+       __clear_bit(EV_REP, input->evbit);
+       for (i = 0; i < ARRAY_SIZE(qt2160_key2code); i++) {
+               qt2160->keycodes[i] = qt2160_key2code[i];
+               __set_bit(qt2160_key2code[i], input->keybit);
+       }
+       __clear_bit(KEY_RESERVED, input->keybit);
+
+       /* Calibrate device */
+       error = qt2160_write(client, QT2160_CMD_CALIBRATE, 1);
+       if (error) {
+               dev_err(&client->dev, "failed to calibrate device\n");
+               goto err_free_mem;
+       }
+
+       if (client->irq) {
+               error = request_irq(client->irq, qt2160_irq,
+                                   IRQF_TRIGGER_FALLING, "qt2160", qt2160);
+               if (error) {
+                       dev_err(&client->dev,
+                               "failed to allocate irq %d\n", client->irq);
+                       goto err_free_mem;
+               }
+       }
+
+       error = input_register_device(qt2160->input);
+       if (error) {
+               dev_err(&client->dev,
+                       "Failed to register input device\n");
+               goto err_free_irq;
+       }
+
+       i2c_set_clientdata(client, qt2160);
+       qt2160_schedule_read(qt2160);
+
+       return 0;
+
+err_free_irq:
+       if (client->irq)
+               free_irq(client->irq, qt2160);
+err_free_mem:
+       input_free_device(input);
+       kfree(qt2160);
+       return error;
+}
+
+static int __devexit qt2160_remove(struct i2c_client *client)
+{
+       struct qt2160_data *qt2160 = i2c_get_clientdata(client);
+
+       /* Release IRQ so no queue will be scheduled */
+       if (client->irq)
+               free_irq(client->irq, qt2160);
+
+       cancel_delayed_work_sync(&qt2160->dwork);
+
+       input_unregister_device(qt2160->input);
+       kfree(qt2160);
+
+       i2c_set_clientdata(client, NULL);
+       return 0;
+}
+
+static struct i2c_device_id qt2160_idtable[] = {
+       { "qt2160", 0, },
+       { }
+};
+
+MODULE_DEVICE_TABLE(i2c, qt2160_idtable);
+
+static struct i2c_driver qt2160_driver = {
+       .driver = {
+               .name   = "qt2160",
+               .owner  = THIS_MODULE,
+       },
+
+       .id_table       = qt2160_idtable,
+       .probe          = qt2160_probe,
+       .remove         = __devexit_p(qt2160_remove),
+};
+
+static int __init qt2160_init(void)
+{
+       return i2c_add_driver(&qt2160_driver);
+}
+module_init(qt2160_init);
+
+static void __exit qt2160_cleanup(void)
+{
+       i2c_del_driver(&qt2160_driver);
+}
+module_exit(qt2160_cleanup);
+
+MODULE_AUTHOR("Raphael Derosso Pereira <raphaelpereira@gmail.com>");
+MODULE_DESCRIPTION("Driver for AT42QT2160 Touch Sensor");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig

index 76d6751f89a7607784a5293b566d4d3b2f87f36e..02f4f8f1db6f3af8a440167518a02a2ebd61e31a 100644 (file)
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -225,6 +225,7 @@ config INPUT_SGI_BTNS
  config INPUT_WINBOND_CIR
         tristate "Winbond IR remote control"
         depends on X86 && PNP
+       select NEW_LEDS
         select LEDS_CLASS
         select BITREVERSE
         help
diff --git a/drivers/input/misc/dm355evm_keys.c b/drivers/input/misc/dm355evm_keys.c

index 0918acae584ac9844d2dd64c500f235b56f22b72..f2b67dc81d80b92ef186d698737190ff1b4f3fb1 100644 (file)
--- a/drivers/input/misc/dm355evm_keys.c
+++ b/drivers/input/misc/dm355evm_keys.c
@@ -96,7 +96,13 @@ static struct {
         { 0x3169, KEY_PAUSE, },
  };
  
-/* runs in an IRQ thread -- can (and will!) sleep */
+/*
+ * Because we communicate with the MSP430 using I2C, and all I2C calls
+ * in Linux sleep, we use a threaded IRQ handler.  The IRQ itself is
+ * active low, but we go through the GPIO controller so we can trigger
+ * on falling edges and not worry about enabling/disabling the IRQ in
+ * the keypress handling path.
+ */
  static irqreturn_t dm355evm_keys_irq(int irq, void *_keys)
  {
         struct dm355evm_keys    *keys = _keys;
@@ -171,18 +177,6 @@ static irqreturn_t dm355evm_keys_irq(int irq, void *_keys)
         return IRQ_HANDLED;
  }
  
-/*
- * Because we communicate with the MSP430 using I2C, and all I2C calls
- * in Linux sleep, we use a threaded IRQ handler.  The IRQ itself is
- * active low, but we go through the GPIO controller so we can trigger
- * on falling edges and not worry about enabling/disabling the IRQ in
- * the keypress handling path.
- */
-static irqreturn_t dm355evm_keys_hardirq(int irq, void *_keys)
-{
-       return IRQ_WAKE_THREAD;
-}
-
  static int dm355evm_setkeycode(struct input_dev *dev, int index, int keycode)
  {
         u16             old_keycode;
@@ -257,10 +251,8 @@ static int __devinit dm355evm_keys_probe(struct platform_device *pdev)
  
         /* REVISIT:  flush the event queue? */
  
-       status = request_threaded_irq(keys->irq,
-                       dm355evm_keys_hardirq, dm355evm_keys_irq,
-                       IRQF_TRIGGER_FALLING,
-                       dev_name(&pdev->dev), keys);
+       status = request_threaded_irq(keys->irq, NULL, dm355evm_keys_irq,
+                       IRQF_TRIGGER_FALLING, dev_name(&pdev->dev), keys);
         if (status < 0)
                 goto fail1;
  
diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c

index 84e2fc04d11bbda7f68d93cf7f3f64c98da2e0c6..f84cbd97c8842ff12a406b61d543bfd7c036a75d 100644 (file)
--- a/drivers/input/mouse/sentelic.c
+++ b/drivers/input/mouse/sentelic.c
@@ -92,7 +92,8 @@ static int fsp_reg_read(struct psmouse *psmouse, int reg_addr, int *reg_val)
          */
         ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE);
         psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
-       mutex_lock(&ps2dev->cmd_mutex);
+
+       ps2_begin_command(ps2dev);
  
         if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
                 goto out;
@@ -126,7 +127,7 @@ static int fsp_reg_read(struct psmouse *psmouse, int reg_addr, int *reg_val)
         rc = 0;
  
   out:
-       mutex_unlock(&ps2dev->cmd_mutex);
+       ps2_end_command(ps2dev);
         ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE);
         psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
         dev_dbg(&ps2dev->serio->dev, "READ REG: 0x%02x is 0x%02x (rc = %d)\n",
@@ -140,7 +141,7 @@ static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val)
         unsigned char v;
         int rc = -1;
  
-       mutex_lock(&ps2dev->cmd_mutex);
+       ps2_begin_command(ps2dev);
  
         if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
                 goto out;
@@ -179,7 +180,7 @@ static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val)
         rc = 0;
  
   out:
-       mutex_unlock(&ps2dev->cmd_mutex);
+       ps2_end_command(ps2dev);
         dev_dbg(&ps2dev->serio->dev, "WRITE REG: 0x%02x to 0x%02x (rc = %d)\n",
                 reg_addr, reg_val, rc);
         return rc;
@@ -214,7 +215,8 @@ static int fsp_page_reg_read(struct psmouse *psmouse, int *reg_val)
  
         ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE);
         psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
-       mutex_lock(&ps2dev->cmd_mutex);
+
+       ps2_begin_command(ps2dev);
  
         if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
                 goto out;
@@ -236,7 +238,7 @@ static int fsp_page_reg_read(struct psmouse *psmouse, int *reg_val)
         rc = 0;
  
   out:
-       mutex_unlock(&ps2dev->cmd_mutex);
+       ps2_end_command(ps2dev);
         ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE);
         psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
         dev_dbg(&ps2dev->serio->dev, "READ PAGE REG: 0x%02x (rc = %d)\n",
@@ -250,7 +252,7 @@ static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val)
         unsigned char v;
         int rc = -1;
  
-       mutex_lock(&ps2dev->cmd_mutex);
+       ps2_begin_command(ps2dev);
  
         if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
                 goto out;
@@ -275,7 +277,7 @@ static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val)
         rc = 0;
  
   out:
-       mutex_unlock(&ps2dev->cmd_mutex);
+       ps2_end_command(ps2dev);
         dev_dbg(&ps2dev->serio->dev, "WRITE PAGE REG: to 0x%02x (rc = %d)\n",
                 reg_val, rc);
         return rc;
diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c

index eac9fdde7ee9c13db9f5dc0c64aed2fcb82ee24b..7283c78044af3cd9a6a701bdb9f94684daf80e5f 100644 (file)
--- a/drivers/input/mouse/synaptics_i2c.c
+++ b/drivers/input/mouse/synaptics_i2c.c
@@ -203,7 +203,7 @@ MODULE_PARM_DESC(no_filter, "No Filter. Default = 0 (off)");
   * and the irq configuration should be set to Falling Edge Trigger
   */
  /* Control IRQ / Polling option */
-static int polling_req;
+static bool polling_req;
  module_param(polling_req, bool, 0444);
  MODULE_PARM_DESC(polling_req, "Request Polling. Default = 0 (use irq)");
  
@@ -217,6 +217,7 @@ struct synaptics_i2c {
         struct i2c_client       *client;
         struct input_dev        *input;
         struct delayed_work     dwork;
+       spinlock_t              lock;
         int                     no_data_count;
         int                     no_decel_param;
         int                     reduce_report_param;
@@ -366,17 +367,28 @@ static bool synaptics_i2c_get_input(struct synaptics_i2c *touch)
         return xy_delta || gesture;
  }
  
-static irqreturn_t synaptics_i2c_irq(int irq, void *dev_id)
+static void synaptics_i2c_reschedule_work(struct synaptics_i2c *touch,
+                                         unsigned long delay)
  {
-       struct synaptics_i2c *touch = dev_id;
+       unsigned long flags;
+
+       spin_lock_irqsave(&touch->lock, flags);
  
         /*
-        * We want to have the work run immediately but it might have
-        * already been scheduled with a delay, that's why we have to
-        * cancel it first.
+        * If work is already scheduled then subsequent schedules will not
+        * change the scheduled time that's why we have to cancel it first.
          */
-       cancel_delayed_work(&touch->dwork);
-       schedule_delayed_work(&touch->dwork, 0);
+       __cancel_delayed_work(&touch->dwork);
+       schedule_delayed_work(&touch->dwork, delay);
+
+       spin_unlock_irqrestore(&touch->lock, flags);
+}
+
+static irqreturn_t synaptics_i2c_irq(int irq, void *dev_id)
+{
+       struct synaptics_i2c *touch = dev_id;
+
+       synaptics_i2c_reschedule_work(touch, 0);
  
         return IRQ_HANDLED;
  }
@@ -452,7 +464,7 @@ static void synaptics_i2c_work_handler(struct work_struct *work)
          * We poll the device once in THREAD_IRQ_SLEEP_SECS and
          * if error is detected, we try to reset and reconfigure the touchpad.
          */
-       schedule_delayed_work(&touch->dwork, delay);
+       synaptics_i2c_reschedule_work(touch, delay);
  }
  
  static int synaptics_i2c_open(struct input_dev *input)
@@ -465,8 +477,8 @@ static int synaptics_i2c_open(struct input_dev *input)
                 return ret;
  
         if (polling_req)
-               schedule_delayed_work(&touch->dwork,
-                                      msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
+               synaptics_i2c_reschedule_work(touch,
+                               msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
  
         return 0;
  }
@@ -521,6 +533,7 @@ struct synaptics_i2c *synaptics_i2c_touch_create(struct i2c_client *client)
         touch->scan_rate_param = scan_rate;
         set_scan_rate(touch, scan_rate);
         INIT_DELAYED_WORK(&touch->dwork, synaptics_i2c_work_handler);
+       spin_lock_init(&touch->lock);
  
         return touch;
  }
@@ -535,14 +548,12 @@ static int __devinit synaptics_i2c_probe(struct i2c_client *client,
         if (!touch)
                 return -ENOMEM;
  
-       i2c_set_clientdata(client, touch);
-
         ret = synaptics_i2c_reset_config(client);
         if (ret)
                 goto err_mem_free;
  
         if (client->irq < 1)
-               polling_req = 1;
+               polling_req = true;
  
         touch->input = input_allocate_device();
         if (!touch->input) {
@@ -563,7 +574,7 @@ static int __devinit synaptics_i2c_probe(struct i2c_client *client,
                         dev_warn(&touch->client->dev,
                                   "IRQ request failed: %d, "
                                   "falling back to polling\n", ret);
-                       polling_req = 1;
+                       polling_req = true;
                         synaptics_i2c_reg_set(touch->client,
                                               INTERRUPT_EN_REG, 0);
                 }
@@ -580,12 +591,14 @@ static int __devinit synaptics_i2c_probe(struct i2c_client *client,
                          "Input device register failed: %d\n", ret);
                 goto err_input_free;
         }
+
+       i2c_set_clientdata(client, touch);
+
         return 0;
  
  err_input_free:
         input_free_device(touch->input);
  err_mem_free:
-       i2c_set_clientdata(client, NULL);
         kfree(touch);
  
         return ret;
@@ -596,7 +609,7 @@ static int __devexit synaptics_i2c_remove(struct i2c_client *client)
         struct synaptics_i2c *touch = i2c_get_clientdata(client);
  
         if (!polling_req)
-               free_irq(touch->client->irq, touch);
+               free_irq(client->irq, touch);
  
         input_unregister_device(touch->input);
         i2c_set_clientdata(client, NULL);
@@ -627,8 +640,8 @@ static int synaptics_i2c_resume(struct i2c_client *client)
         if (ret)
                 return ret;
  
-       schedule_delayed_work(&touch->dwork,
-                              msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
+       synaptics_i2c_reschedule_work(touch,
+                               msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
  
         return 0;
  }
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c

index eb3ff94af58c4cd6e87a41600c9a06cf886011a8..bc56e52b945f68c8e0fa604a7d88659dfa77fc23 100644 (file)
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -87,8 +87,22 @@ static bool i8042_bypass_aux_irq_test;
  
  #include "i8042.h"
  
+/*
+ * i8042_lock protects serialization between i8042_command and
+ * the interrupt handler.
+ */
  static DEFINE_SPINLOCK(i8042_lock);
  
+/*
+ * Writers to AUX and KBD ports as well as users issuing i8042_command
+ * directly should acquire i8042_mutex (by means of calling
+ * i8042_lock_chip() and i8042_unlock_ship() helpers) to ensure that
+ * they do not disturb each other (unfortunately in many i8042
+ * implementations write to one of the ports will immediately abort
+ * command that is being processed by another port).
+ */
+static DEFINE_MUTEX(i8042_mutex);
+
  struct i8042_port {
         struct serio *serio;
         int irq;
@@ -113,6 +127,18 @@ static struct platform_device *i8042_platform_device;
  
  static irqreturn_t i8042_interrupt(int irq, void *dev_id);
  
+void i8042_lock_chip(void)
+{
+       mutex_lock(&i8042_mutex);
+}
+EXPORT_SYMBOL(i8042_lock_chip);
+
+void i8042_unlock_chip(void)
+{
+       mutex_unlock(&i8042_mutex);
+}
+EXPORT_SYMBOL(i8042_unlock_chip);
+
  /*
   * The i8042_wait_read() and i8042_wait_write functions wait for the i8042 to
   * be ready for reading values from it / writing values to it.
@@ -1161,6 +1187,21 @@ static void __devexit i8042_unregister_ports(void)
         }
  }
  
+/*
+ * Checks whether port belongs to i8042 controller.
+ */
+bool i8042_check_port_owner(const struct serio *port)
+{
+       int i;
+
+       for (i = 0; i < I8042_NUM_PORTS; i++)
+               if (i8042_ports[i].serio == port)
+                       return true;
+
+       return false;
+}
+EXPORT_SYMBOL(i8042_check_port_owner);
+
  static void i8042_free_irqs(void)
  {
         if (i8042_aux_irq_registered)
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c

index 3a95b508bf27b93b014503f06687a7e9ff92b86b..769ba65a585a562a2f298df5f3e62403e316c721 100644 (file)
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -17,6 +17,7 @@
  #include <linux/interrupt.h>
  #include <linux/input.h>
  #include <linux/serio.h>
+#include <linux/i8042.h>
  #include <linux/init.h>
  #include <linux/libps2.h>
  
@@ -54,6 +55,24 @@ int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout)
  }
  EXPORT_SYMBOL(ps2_sendbyte);
  
+void ps2_begin_command(struct ps2dev *ps2dev)
+{
+       mutex_lock(&ps2dev->cmd_mutex);
+
+       if (i8042_check_port_owner(ps2dev->serio))
+               i8042_lock_chip();
+}
+EXPORT_SYMBOL(ps2_begin_command);
+
+void ps2_end_command(struct ps2dev *ps2dev)
+{
+       if (i8042_check_port_owner(ps2dev->serio))
+               i8042_unlock_chip();
+
+       mutex_unlock(&ps2dev->cmd_mutex);
+}
+EXPORT_SYMBOL(ps2_end_command);
+
  /*
   * ps2_drain() waits for device to transmit requested number of bytes
   * and discards them.
@@ -66,7 +85,7 @@ void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout)
                 maxbytes = sizeof(ps2dev->cmdbuf);
         }
  
-       mutex_lock(&ps2dev->cmd_mutex);
+       ps2_begin_command(ps2dev);
  
         serio_pause_rx(ps2dev->serio);
         ps2dev->flags = PS2_FLAG_CMD;
@@ -76,7 +95,8 @@ void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout)
         wait_event_timeout(ps2dev->wait,
                            !(ps2dev->flags & PS2_FLAG_CMD),
                            msecs_to_jiffies(timeout));
-       mutex_unlock(&ps2dev->cmd_mutex);
+
+       ps2_end_command(ps2dev);
  }
  EXPORT_SYMBOL(ps2_drain);
  
@@ -237,9 +257,9 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
  {
         int rc;
  
-       mutex_lock(&ps2dev->cmd_mutex);
+       ps2_begin_command(ps2dev);
         rc = __ps2_command(ps2dev, param, command);
-       mutex_unlock(&ps2dev->cmd_mutex);
+       ps2_end_command(ps2dev);
  
         return rc;
  }
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig

index ab02d72afbf3b0f5df3b2eb7bbec895b202bad5e..8cc453c85ea704d42ee8411d232c31193e8212e1 100644 (file)
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -48,8 +48,8 @@ config TOUCHSCREEN_AD7879_I2C
         select TOUCHSCREEN_AD7879
         help
           Say Y here if you have a touchscreen interface using the
-         AD7879-1 controller, and your board-specific initialization
-         code includes that in its table of I2C devices.
+         AD7879-1/AD7889-1 controller, and your board-specific
+         initialization code includes that in its table of I2C devices.
  
           If unsure, say N (but it's safe to say "Y").
  
@@ -62,7 +62,7 @@ config TOUCHSCREEN_AD7879_SPI
         select TOUCHSCREEN_AD7879
         help
           Say Y here if you have a touchscreen interface using the
-         AD7879 controller, and your board-specific initialization
+         AD7879/AD7889 controller, and your board-specific initialization
           code includes that in its table of SPI devices.
  
           If unsure, say N (but it's safe to say "Y").
@@ -169,6 +169,17 @@ config TOUCHSCREEN_WACOM_W8001
           To compile this driver as a module, choose M here: the
           module will be called wacom_w8001.
  
+config TOUCHSCREEN_MCS5000
+       tristate "MELFAS MCS-5000 touchscreen"
+       depends on I2C
+       help
+         Say Y here if you have the MELFAS MCS-5000 touchscreen controller
+         chip in your system.
+
+         If unsure, say N.
+
+         To compile this driver as a module, choose M here: the
+         module will be called mcs5000_ts.
  
  config TOUCHSCREEN_MTOUCH
         tristate "MicroTouch serial touchscreens"
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile

index 4599bf7ad8193ec0ce5f8b909a687d86e1887600..15fa62cffc77f576b8cb364e7712ea7e76b402c0 100644 (file)
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_TOUCHSCREEN_EETI)                += eeti_ts.o
  obj-$(CONFIG_TOUCHSCREEN_ELO)          += elo.o
  obj-$(CONFIG_TOUCHSCREEN_FUJITSU)      += fujitsu_ts.o
  obj-$(CONFIG_TOUCHSCREEN_INEXIO)       += inexio.o
+obj-$(CONFIG_TOUCHSCREEN_MCS5000)      += mcs5000_ts.o
  obj-$(CONFIG_TOUCHSCREEN_MIGOR)                += migor_ts.o
  obj-$(CONFIG_TOUCHSCREEN_MTOUCH)       += mtouch.o
  obj-$(CONFIG_TOUCHSCREEN_MK712)                += mk712.o
diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c

index 19b4db7e974d75c42eb3f7e9e21e8c19ac8d782e..f06332c9e21ba069bb177ff915897db2b9492e52 100644 (file)
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -1,7 +1,8 @@
  /*
- * Copyright (C) 2008 Michael Hennerich, Analog Devices Inc.
+ * Copyright (C) 2008-2009 Michael Hennerich, Analog Devices Inc.
   *
- * Description:        AD7879 based touchscreen, and GPIO driver (I2C/SPI Interface)
+ * Description:        AD7879/AD7889 based touchscreen, and GPIO driver
+ *             (I2C/SPI Interface)
   *
   * Bugs:        Enter bugs at http://blackfin.uclinux.org/
   *
@@ -747,6 +748,7 @@ static int __devexit ad7879_remove(struct i2c_client *client)
  
  static const struct i2c_device_id ad7879_id[] = {
         { "ad7879", 0 },
+       { "ad7889", 0 },
         { }
  };
  MODULE_DEVICE_TABLE(i2c, ad7879_id);
diff --git a/drivers/input/touchscreen/mcs5000_ts.c b/drivers/input/touchscreen/mcs5000_ts.c

new file mode 100644 (file)

index 0000000..4c28b89
--- /dev/null
+++ b/drivers/input/touchscreen/mcs5000_ts.c
@@ -0,0 +1,318 @@
+/*
+ * mcs5000_ts.c - Touchscreen driver for MELFAS MCS-5000 controller
+ *
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * Based on wm97xx-core.c
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/i2c/mcs5000_ts.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/irq.h>
+
+/* Registers */
+#define MCS5000_TS_STATUS              0x00
+#define STATUS_OFFSET                  0
+#define STATUS_NO                      (0 << STATUS_OFFSET)
+#define STATUS_INIT                    (1 << STATUS_OFFSET)
+#define STATUS_SENSING                 (2 << STATUS_OFFSET)
+#define STATUS_COORD                   (3 << STATUS_OFFSET)
+#define STATUS_GESTURE                 (4 << STATUS_OFFSET)
+#define ERROR_OFFSET                   4
+#define ERROR_NO                       (0 << ERROR_OFFSET)
+#define ERROR_POWER_ON_RESET           (1 << ERROR_OFFSET)
+#define ERROR_INT_RESET                        (2 << ERROR_OFFSET)
+#define ERROR_EXT_RESET                        (3 << ERROR_OFFSET)
+#define ERROR_INVALID_REG_ADDRESS      (8 << ERROR_OFFSET)
+#define ERROR_INVALID_REG_VALUE                (9 << ERROR_OFFSET)
+
+#define MCS5000_TS_OP_MODE             0x01
+#define RESET_OFFSET                   0
+#define RESET_NO                       (0 << RESET_OFFSET)
+#define RESET_EXT_SOFT                 (1 << RESET_OFFSET)
+#define OP_MODE_OFFSET                 1
+#define OP_MODE_SLEEP                  (0 << OP_MODE_OFFSET)
+#define OP_MODE_ACTIVE                 (1 << OP_MODE_OFFSET)
+#define GESTURE_OFFSET                 4
+#define GESTURE_DISABLE                        (0 << GESTURE_OFFSET)
+#define GESTURE_ENABLE                 (1 << GESTURE_OFFSET)
+#define PROXIMITY_OFFSET               5
+#define PROXIMITY_DISABLE              (0 << PROXIMITY_OFFSET)
+#define PROXIMITY_ENABLE               (1 << PROXIMITY_OFFSET)
+#define SCAN_MODE_OFFSET               6
+#define SCAN_MODE_INTERRUPT            (0 << SCAN_MODE_OFFSET)
+#define SCAN_MODE_POLLING              (1 << SCAN_MODE_OFFSET)
+#define REPORT_RATE_OFFSET             7
+#define REPORT_RATE_40                 (0 << REPORT_RATE_OFFSET)
+#define REPORT_RATE_80                 (1 << REPORT_RATE_OFFSET)
+
+#define MCS5000_TS_SENS_CTL            0x02
+#define MCS5000_TS_FILTER_CTL          0x03
+#define PRI_FILTER_OFFSET              0
+#define SEC_FILTER_OFFSET              4
+
+#define MCS5000_TS_X_SIZE_UPPER                0x08
+#define MCS5000_TS_X_SIZE_LOWER                0x09
+#define MCS5000_TS_Y_SIZE_UPPER                0x0A
+#define MCS5000_TS_Y_SIZE_LOWER                0x0B
+
+#define MCS5000_TS_INPUT_INFO          0x10
+#define INPUT_TYPE_OFFSET              0
+#define INPUT_TYPE_NONTOUCH            (0 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_SINGLE              (1 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_DUAL                        (2 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_PALM                        (3 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_PROXIMITY           (7 << INPUT_TYPE_OFFSET)
+#define GESTURE_CODE_OFFSET            3
+#define GESTURE_CODE_NO                        (0 << GESTURE_CODE_OFFSET)
+
+#define MCS5000_TS_X_POS_UPPER         0x11
+#define MCS5000_TS_X_POS_LOWER         0x12
+#define MCS5000_TS_Y_POS_UPPER         0x13
+#define MCS5000_TS_Y_POS_LOWER         0x14
+#define MCS5000_TS_Z_POS               0x15
+#define MCS5000_TS_WIDTH               0x16
+#define MCS5000_TS_GESTURE_VAL         0x17
+#define MCS5000_TS_MODULE_REV          0x20
+#define MCS5000_TS_FIRMWARE_VER                0x21
+
+/* Touchscreen absolute values */
+#define MCS5000_MAX_XC                 0x3ff
+#define MCS5000_MAX_YC                 0x3ff
+
+enum mcs5000_ts_read_offset {
+       READ_INPUT_INFO,
+       READ_X_POS_UPPER,
+       READ_X_POS_LOWER,
+       READ_Y_POS_UPPER,
+       READ_Y_POS_LOWER,
+       READ_BLOCK_SIZE,
+};
+
+/* Each client has this additional data */
+struct mcs5000_ts_data {
+       struct i2c_client *client;
+       struct input_dev *input_dev;
+       const struct mcs5000_ts_platform_data *platform_data;
+};
+
+static irqreturn_t mcs5000_ts_interrupt(int irq, void *dev_id)
+{
+       struct mcs5000_ts_data *data = dev_id;
+       struct i2c_client *client = data->client;
+       u8 buffer[READ_BLOCK_SIZE];
+       int err;
+       int x;
+       int y;
+
+       err = i2c_smbus_read_i2c_block_data(client, MCS5000_TS_INPUT_INFO,
+                       READ_BLOCK_SIZE, buffer);
+       if (err < 0) {
+               dev_err(&client->dev, "%s, err[%d]\n", __func__, err);
+               goto out;
+       }
+
+       switch (buffer[READ_INPUT_INFO]) {
+       case INPUT_TYPE_NONTOUCH:
+               input_report_key(data->input_dev, BTN_TOUCH, 0);
+               input_sync(data->input_dev);
+               break;
+
+       case INPUT_TYPE_SINGLE:
+               x = (buffer[READ_X_POS_UPPER] << 8) | buffer[READ_X_POS_LOWER];
+               y = (buffer[READ_Y_POS_UPPER] << 8) | buffer[READ_Y_POS_LOWER];
+
+               input_report_key(data->input_dev, BTN_TOUCH, 1);
+               input_report_abs(data->input_dev, ABS_X, x);
+               input_report_abs(data->input_dev, ABS_Y, y);
+               input_sync(data->input_dev);
+               break;
+
+       case INPUT_TYPE_DUAL:
+               /* TODO */
+               break;
+
+       case INPUT_TYPE_PALM:
+               /* TODO */
+               break;
+
+       case INPUT_TYPE_PROXIMITY:
+               /* TODO */
+               break;
+
+       default:
+               dev_err(&client->dev, "Unknown ts input type %d\n",
+                               buffer[READ_INPUT_INFO]);
+               break;
+       }
+
+ out:
+       return IRQ_HANDLED;
+}
+
+static void mcs5000_ts_phys_init(struct mcs5000_ts_data *data)
+{
+       const struct mcs5000_ts_platform_data *platform_data =
+               data->platform_data;
+       struct i2c_client *client = data->client;
+
+       /* Touch reset & sleep mode */
+       i2c_smbus_write_byte_data(client, MCS5000_TS_OP_MODE,
+                       RESET_EXT_SOFT | OP_MODE_SLEEP);
+
+       /* Touch size */
+       i2c_smbus_write_byte_data(client, MCS5000_TS_X_SIZE_UPPER,
+                       platform_data->x_size >> 8);
+       i2c_smbus_write_byte_data(client, MCS5000_TS_X_SIZE_LOWER,
+                       platform_data->x_size & 0xff);
+       i2c_smbus_write_byte_data(client, MCS5000_TS_Y_SIZE_UPPER,
+                       platform_data->y_size >> 8);
+       i2c_smbus_write_byte_data(client, MCS5000_TS_Y_SIZE_LOWER,
+                       platform_data->y_size & 0xff);
+
+       /* Touch active mode & 80 report rate */
+       i2c_smbus_write_byte_data(data->client, MCS5000_TS_OP_MODE,
+                       OP_MODE_ACTIVE | REPORT_RATE_80);
+}
+
+static int __devinit mcs5000_ts_probe(struct i2c_client *client,
+               const struct i2c_device_id *id)
+{
+       struct mcs5000_ts_data *data;
+       struct input_dev *input_dev;
+       int ret;
+
+       if (!client->dev.platform_data)
+               return -EINVAL;
+
+       data = kzalloc(sizeof(struct mcs5000_ts_data), GFP_KERNEL);
+       input_dev = input_allocate_device();
+       if (!data || !input_dev) {
+               dev_err(&client->dev, "Failed to allocate memory\n");
+               ret = -ENOMEM;
+               goto err_free_mem;
+       }
+
+       data->client = client;
+       data->input_dev = input_dev;
+       data->platform_data = client->dev.platform_data;
+
+       input_dev->name = "MELPAS MCS-5000 Touchscreen";
+       input_dev->id.bustype = BUS_I2C;
+       input_dev->dev.parent = &client->dev;
+
+       __set_bit(EV_ABS, input_dev->evbit);
+       __set_bit(EV_KEY, input_dev->evbit);
+       __set_bit(BTN_TOUCH, input_dev->keybit);
+       input_set_abs_params(input_dev, ABS_X, 0, MCS5000_MAX_XC, 0, 0);
+       input_set_abs_params(input_dev, ABS_Y, 0, MCS5000_MAX_YC, 0, 0);
+
+       input_set_drvdata(input_dev, data);
+
+       if (data->platform_data->cfg_pin)
+               data->platform_data->cfg_pin();
+
+       ret = request_threaded_irq(client->irq, NULL, mcs5000_ts_interrupt,
+                       IRQF_TRIGGER_LOW | IRQF_ONESHOT, "mcs5000_ts", data);
+
+       if (ret < 0) {
+               dev_err(&client->dev, "Failed to register interrupt\n");
+               goto err_free_mem;
+       }
+
+       ret = input_register_device(data->input_dev);
+       if (ret < 0)
+               goto err_free_irq;
+
+       mcs5000_ts_phys_init(data);
+       i2c_set_clientdata(client, data);
+
+       return 0;
+
+err_free_irq:
+       free_irq(client->irq, data);
+err_free_mem:
+       input_free_device(input_dev);
+       kfree(data);
+       return ret;
+}
+
+static int __devexit mcs5000_ts_remove(struct i2c_client *client)
+{
+       struct mcs5000_ts_data *data = i2c_get_clientdata(client);
+
+       free_irq(client->irq, data);
+       input_unregister_device(data->input_dev);
+       kfree(data);
+       i2c_set_clientdata(client, NULL);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM
+static int mcs5000_ts_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+       /* Touch sleep mode */
+       i2c_smbus_write_byte_data(client, MCS5000_TS_OP_MODE, OP_MODE_SLEEP);
+
+       return 0;
+}
+
+static int mcs5000_ts_resume(struct i2c_client *client)
+{
+       struct mcs5000_ts_data *data = i2c_get_clientdata(client);
+
+       mcs5000_ts_phys_init(data);
+
+       return 0;
+}
+#else
+#define mcs5000_ts_suspend     NULL
+#define mcs5000_ts_resume      NULL
+#endif
+
+static const struct i2c_device_id mcs5000_ts_id[] = {
+       { "mcs5000_ts", 0 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, mcs5000_ts_id);
+
+static struct i2c_driver mcs5000_ts_driver = {
+       .probe          = mcs5000_ts_probe,
+       .remove         = __devexit_p(mcs5000_ts_remove),
+       .suspend        = mcs5000_ts_suspend,
+       .resume         = mcs5000_ts_resume,
+       .driver = {
+               .name = "mcs5000_ts",
+       },
+       .id_table       = mcs5000_ts_id,
+};
+
+static int __init mcs5000_ts_init(void)
+{
+       return i2c_add_driver(&mcs5000_ts_driver);
+}
+
+static void __exit mcs5000_ts_exit(void)
+{
+       i2c_del_driver(&mcs5000_ts_driver);
+}
+
+module_init(mcs5000_ts_init);
+module_exit(mcs5000_ts_exit);
+
+/* Module information */
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_DESCRIPTION("Touchscreen driver for MELFAS MCS-5000 controller");
+MODULE_LICENSE("GPL");
diff --git a/drivers/leds/leds-clevo-mail.c b/drivers/leds/leds-clevo-mail.c

index 1813c84ea5fccb10293e499a3721dacf94c54290..f2242db5401624d9c7091ea6ac04f330fe7f94fa 100644 (file)
--- a/drivers/leds/leds-clevo-mail.c
+++ b/drivers/leds/leds-clevo-mail.c
@@ -93,6 +93,8 @@ static struct dmi_system_id __initdata mail_led_whitelist[] = {
  static void clevo_mail_led_set(struct led_classdev *led_cdev,
                                 enum led_brightness value)
  {
+       i8042_lock_chip();
+
         if (value == LED_OFF)
                 i8042_command(NULL, CLEVO_MAIL_LED_OFF);
         else if (value <= LED_HALF)
@@ -100,6 +102,8 @@ static void clevo_mail_led_set(struct led_classdev *led_cdev,
         else
                 i8042_command(NULL, CLEVO_MAIL_LED_BLINK_1HZ);
  
+       i8042_unlock_chip();
+
  }
  
  static int clevo_mail_led_blink(struct led_classdev *led_cdev,
@@ -108,6 +112,8 @@ static int clevo_mail_led_blink(struct led_classdev *led_cdev,
  {
         int status = -EINVAL;
  
+       i8042_lock_chip();
+
         if (*delay_on == 0 /* ms */ && *delay_off == 0 /* ms */) {
                 /* Special case: the leds subsystem requested us to
                  * chose one user friendly blinking of the LED, and
@@ -135,6 +141,8 @@ static int clevo_mail_led_blink(struct led_classdev *led_cdev,
                        *delay_on, *delay_off);
         }
  
+       i8042_unlock_chip();
+
         return status;
  }
  
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig

index 020f9573fd82011babb4ad666a2966a44d088aba..2158377a13593a45938278ac860d5de3db8a06fa 100644 (file)
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -124,6 +124,8 @@ config MD_RAID456
         select MD_RAID6_PQ
         select ASYNC_MEMCPY
         select ASYNC_XOR
+       select ASYNC_PQ
+       select ASYNC_RAID6_RECOV
         ---help---
           A RAID-5 set of N drives with a capacity of C MB per drive provides
           the capacity of C * (N - 1) MB, and protects against a failure
@@ -152,9 +154,33 @@ config MD_RAID456
  
           If unsure, say Y.
  
+config MULTICORE_RAID456
+       bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
+       depends on MD_RAID456
+       depends on SMP
+       depends on EXPERIMENTAL
+       ---help---
+         Enable the raid456 module to dispatch per-stripe raid operations to a
+         thread pool.
+
+         If unsure, say N.
+
  config MD_RAID6_PQ
         tristate
  
+config ASYNC_RAID6_TEST
+       tristate "Self test for hardware accelerated raid6 recovery"
+       depends on MD_RAID6_PQ
+       select ASYNC_RAID6_RECOV
+       ---help---
+         This is a one-shot self test that permutes through the
+         recovery of all the possible two disk failure scenarios for a
+         N-disk array.  Recovery is performed with the asynchronous
+         raid6 recovery routines, and will optionally use an offload
+         engine if one is available.
+
+         If unsure, say N.
+
  config MD_MULTIPATH
         tristate "Multipath I/O support"
         depends on BLK_DEV_MD
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c

index 3319c2fec28e40f7908d7e0688595c113f3a597f..6986b0059d23279fd83e67da452b225bdf946708 100644 (file)
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -108,6 +108,8 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page)
   * allocated while we're using it
   */
  static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create)
+__releases(bitmap->lock)
+__acquires(bitmap->lock)
  {
         unsigned char *mappage;
  
@@ -325,7 +327,6 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
         return 0;
  
   bad_alignment:
-       rcu_read_unlock();
         return -EINVAL;
  }
  
@@ -1207,6 +1208,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
  static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
                                             sector_t offset, int *blocks,
                                             int create)
+__releases(bitmap->lock)
+__acquires(bitmap->lock)
  {
         /* If 'create', we might release the lock and reclaim it.
          * The lock must have been taken with interrupts enabled.
diff --git a/drivers/md/linear.c b/drivers/md/linear.c

index ea48429054441e35b4d3230ed876e729b1169806..1ceceb334d5ebe8f5ce637d29898fc984bfcc797 100644 (file)
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -108,6 +108,9 @@ static int linear_congested(void *data, int bits)
         linear_conf_t *conf;
         int i, ret = 0;
  
+       if (mddev_congested(mddev, bits))
+               return 1;
+
         rcu_read_lock();
         conf = rcu_dereference(mddev->private);
  
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 6aa497e4baf85d30526ad12ab8a96dbc179d9538..26ba42a79129bdb953fa5b98628a744689a6594c 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -262,6 +262,12 @@ static void mddev_resume(mddev_t *mddev)
         mddev->pers->quiesce(mddev, 0);
  }
  
+int mddev_congested(mddev_t *mddev, int bits)
+{
+       return mddev->suspended;
+}
+EXPORT_SYMBOL(mddev_congested);
+
  
  static inline mddev_t *mddev_get(mddev_t *mddev)
  {
@@ -4218,7 +4224,7 @@ static int do_md_run(mddev_t * mddev)
                         set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
                         mddev->sync_thread = md_register_thread(md_do_sync,
                                                                 mddev,
-                                                               "%s_resync");
+                                                               "resync");
                         if (!mddev->sync_thread) {
                                 printk(KERN_ERR "%s: could not start resync"
                                        " thread...\n",
@@ -4575,10 +4581,10 @@ static int get_version(void __user * arg)
  static int get_array_info(mddev_t * mddev, void __user * arg)
  {
         mdu_array_info_t info;
-       int nr,working,active,failed,spare;
+       int nr,working,insync,failed,spare;
         mdk_rdev_t *rdev;
  
-       nr=working=active=failed=spare=0;
+       nr=working=insync=failed=spare=0;
         list_for_each_entry(rdev, &mddev->disks, same_set) {
                 nr++;
                 if (test_bit(Faulty, &rdev->flags))
@@ -4586,7 +4592,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
                 else {
                         working++;
                         if (test_bit(In_sync, &rdev->flags))
-                               active++;       
+                               insync++;       
                         else
                                 spare++;
                 }
@@ -4611,7 +4617,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
                 info.state = (1<<MD_SB_CLEAN);
         if (mddev->bitmap && mddev->bitmap_offset)
                 info.state = (1<<MD_SB_BITMAP_PRESENT);
-       info.active_disks  = active;
+       info.active_disks  = insync;
         info.working_disks = working;
         info.failed_disks  = failed;
         info.spare_disks   = spare;
@@ -4721,7 +4727,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
                 if (!list_empty(&mddev->disks)) {
                         mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
                                                         mdk_rdev_t, same_set);
-                       int err = super_types[mddev->major_version]
+                       err = super_types[mddev->major_version]
                                 .load_super(rdev, rdev0, mddev->minor_version);
                         if (err < 0) {
                                 printk(KERN_WARNING 
@@ -5631,7 +5637,10 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
         thread->run = run;
         thread->mddev = mddev;
         thread->timeout = MAX_SCHEDULE_TIMEOUT;
-       thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev));
+       thread->tsk = kthread_run(md_thread, thread,
+                                 "%s_%s",
+                                 mdname(thread->mddev),
+                                 name ?: mddev->pers->name);
         if (IS_ERR(thread->tsk)) {
                 kfree(thread);
                 return NULL;
@@ -6745,7 +6754,7 @@ void md_check_recovery(mddev_t *mddev)
                         }
                         mddev->sync_thread = md_register_thread(md_do_sync,
                                                                 mddev,
-                                                               "%s_resync");
+                                                               "resync");
                         if (!mddev->sync_thread) {
                                 printk(KERN_ERR "%s: could not start resync"
                                         " thread...\n", 
diff --git a/drivers/md/md.h b/drivers/md/md.h

index f55d2ff9513329b973fe0c6c96ec9434eb53de9c..f184b69ef337514d460d669ffc4309a7d5dcc515 100644 (file)
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -430,6 +430,7 @@ extern void md_write_end(mddev_t *mddev);
  extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
  extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
  
+extern int mddev_congested(mddev_t *mddev, int bits);
  extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
                            sector_t sector, int size, struct page *page);
  extern void md_super_wait(mddev_t *mddev);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c

index d2d3fd54cc681184cab146bf90ddc000f3b706de..ee7646f974a07165bd368deb3a18f6390e5553dc 100644 (file)
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -150,7 +150,6 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
         }
  
         mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
-       memset(mp_bh, 0, sizeof(*mp_bh));
  
         mp_bh->master_bio = bio;
         mp_bh->mddev = mddev;
@@ -199,6 +198,9 @@ static int multipath_congested(void *data, int bits)
         multipath_conf_t *conf = mddev->private;
         int i, ret = 0;
  
+       if (mddev_congested(mddev, bits))
+               return 1;
+
         rcu_read_lock();
         for (i = 0; i < mddev->raid_disks ; i++) {
                 mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
@@ -504,7 +506,7 @@ static int multipath_run (mddev_t *mddev)
         }
  
         {
-               mddev->thread = md_register_thread(multipathd, mddev, "%s_multipath");
+               mddev->thread = md_register_thread(multipathd, mddev, NULL);
                 if (!mddev->thread) {
                         printk(KERN_ERR "multipath: couldn't allocate thread"
                                 " for %s\n", mdname(mddev));
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c

index f845ed98fec9c5126862f85a3e298f7083b80d83..d3a4ce06015a300e9d4df5e95ee63669613db17e 100644 (file)
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -44,6 +44,9 @@ static int raid0_congested(void *data, int bits)
         mdk_rdev_t **devlist = conf->devlist;
         int i, ret = 0;
  
+       if (mddev_congested(mddev, bits))
+               return 1;
+
         for (i = 0; i < mddev->raid_disks && !ret ; i++) {
                 struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
  
@@ -86,7 +89,7 @@ static void dump_zones(mddev_t *mddev)
  
  static int create_strip_zones(mddev_t *mddev)
  {
-       int i, c, j, err;
+       int i, c, err;
         sector_t curr_zone_end, sectors;
         mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev;
         struct strip_zone *zone;
@@ -198,6 +201,8 @@ static int create_strip_zones(mddev_t *mddev)
         /* now do the other zones */
         for (i = 1; i < conf->nr_strip_zones; i++)
         {
+               int j;
+
                 zone = conf->strip_zone + i;
                 dev = conf->devlist + i * mddev->raid_disks;
  
@@ -207,7 +212,6 @@ static int create_strip_zones(mddev_t *mddev)
                 c = 0;
  
                 for (j=0; j<cnt; j++) {
-                       char b[BDEVNAME_SIZE];
                         rdev = conf->devlist[j];
                         printk(KERN_INFO "raid0: checking %s ...",
                                 bdevname(rdev->bdev, b));
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index ff7ed33359959e39747509e4a16a24a0c584b292..d1b9bd5fd4f6cad04f4037ece137f43904f50de6 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -576,6 +576,9 @@ static int raid1_congested(void *data, int bits)
         conf_t *conf = mddev->private;
         int i, ret = 0;
  
+       if (mddev_congested(mddev, bits))
+               return 1;
+
         rcu_read_lock();
         for (i = 0; i < mddev->raid_disks; i++) {
                 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
@@ -851,7 +854,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
                 read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
                 read_bio->bi_bdev = mirror->rdev->bdev;
                 read_bio->bi_end_io = raid1_end_read_request;
-               read_bio->bi_rw = READ | do_sync;
+               read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
                 read_bio->bi_private = r1_bio;
  
                 generic_make_request(read_bio);
@@ -943,7 +946,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
                 mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
                 mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
                 mbio->bi_end_io = raid1_end_write_request;
-               mbio->bi_rw = WRITE | do_barriers | do_sync;
+               mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) |
+                       (do_sync << BIO_RW_SYNCIO);
                 mbio->bi_private = r1_bio;
  
                 if (behind_pages) {
@@ -1623,7 +1627,8 @@ static void raid1d(mddev_t *mddev)
                                                 conf->mirrors[i].rdev->data_offset;
                                         bio->bi_bdev = conf->mirrors[i].rdev->bdev;
                                         bio->bi_end_io = raid1_end_write_request;
-                                       bio->bi_rw = WRITE | do_sync;
+                                       bio->bi_rw = WRITE |
+                                               (do_sync << BIO_RW_SYNCIO);
                                         bio->bi_private = r1_bio;
                                         r1_bio->bios[i] = bio;
                                         generic_make_request(bio);
@@ -1672,7 +1677,7 @@ static void raid1d(mddev_t *mddev)
                                 bio->bi_sector = r1_bio->sector + rdev->data_offset;
                                 bio->bi_bdev = rdev->bdev;
                                 bio->bi_end_io = raid1_end_read_request;
-                               bio->bi_rw = READ | do_sync;
+                               bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
                                 bio->bi_private = r1_bio;
                                 unplug = 1;
                                 generic_make_request(bio);
@@ -2047,7 +2052,7 @@ static int run(mddev_t *mddev)
         conf->last_used = j;
  
  
-       mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1");
+       mddev->thread = md_register_thread(raid1d, mddev, NULL);
         if (!mddev->thread) {
                 printk(KERN_ERR
                        "raid1: couldn't allocate thread for %s\n",
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index d0a2152e064f75430afa8117db184392c1a4ef3b..51c4c5c4d87add417a297714f7873df95979001d 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -631,6 +631,8 @@ static int raid10_congested(void *data, int bits)
         conf_t *conf = mddev->private;
         int i, ret = 0;
  
+       if (mddev_congested(mddev, bits))
+               return 1;
         rcu_read_lock();
         for (i = 0; i < mddev->raid_disks && ret == 0; i++) {
                 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
@@ -882,7 +884,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
                         mirror->rdev->data_offset;
                 read_bio->bi_bdev = mirror->rdev->bdev;
                 read_bio->bi_end_io = raid10_end_read_request;
-               read_bio->bi_rw = READ | do_sync;
+               read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
                 read_bio->bi_private = r10_bio;
  
                 generic_make_request(read_bio);
@@ -950,7 +952,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
                         conf->mirrors[d].rdev->data_offset;
                 mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
                 mbio->bi_end_io = raid10_end_write_request;
-               mbio->bi_rw = WRITE | do_sync;
+               mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO);
                 mbio->bi_private = r10_bio;
  
                 atomic_inc(&r10_bio->remaining);
@@ -1623,7 +1625,7 @@ static void raid10d(mddev_t *mddev)
                                 bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
                                         + rdev->data_offset;
                                 bio->bi_bdev = rdev->bdev;
-                               bio->bi_rw = READ | do_sync;
+                               bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
                                 bio->bi_private = r10_bio;
                                 bio->bi_end_io = raid10_end_read_request;
                                 unplug = 1;
@@ -1773,7 +1775,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
         max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
         if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
                 /* recovery... the complicated one */
-               int i, j, k;
+               int j, k;
                 r10_bio = NULL;
  
                 for (i=0 ; i<conf->raid_disks; i++)
@@ -2188,7 +2190,7 @@ static int run(mddev_t *mddev)
         }
  
  
-       mddev->thread = md_register_thread(raid10d, mddev, "%s_raid10");
+       mddev->thread = md_register_thread(raid10d, mddev, NULL);
         if (!mddev->thread) {
                 printk(KERN_ERR
                        "raid10: couldn't allocate thread for %s\n",
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 826eb3467357f72af5952599525bc93355f5279c..94829804ab7fd2f68c839e0f481444bfaeb3be7a 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -47,7 +47,9 @@
  #include <linux/kthread.h>
  #include <linux/raid/pq.h>
  #include <linux/async_tx.h>
+#include <linux/async.h>
  #include <linux/seq_file.h>
+#include <linux/cpu.h>
  #include "md.h"
  #include "raid5.h"
  #include "bitmap.h"
@@ -499,11 +501,18 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
         struct page *bio_page;
         int i;
         int page_offset;
+       struct async_submit_ctl submit;
+       enum async_tx_flags flags = 0;
  
         if (bio->bi_sector >= sector)
                 page_offset = (signed)(bio->bi_sector - sector) * 512;
         else
                 page_offset = (signed)(sector - bio->bi_sector) * -512;
+
+       if (frombio)
+               flags |= ASYNC_TX_FENCE;
+       init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
+
         bio_for_each_segment(bvl, bio, i) {
                 int len = bio_iovec_idx(bio, i)->bv_len;
                 int clen;
@@ -525,15 +534,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
                         bio_page = bio_iovec_idx(bio, i)->bv_page;
                         if (frombio)
                                 tx = async_memcpy(page, bio_page, page_offset,
-                                       b_offset, clen,
-                                       ASYNC_TX_DEP_ACK,
-                                       tx, NULL, NULL);
+                                                 b_offset, clen, &submit);
                         else
                                 tx = async_memcpy(bio_page, page, b_offset,
-                                       page_offset, clen,
-                                       ASYNC_TX_DEP_ACK,
-                                       tx, NULL, NULL);
+                                                 page_offset, clen, &submit);
                 }
+               /* chain the operations */
+               submit.depend_tx = tx;
+
                 if (clen < len) /* hit end of page */
                         break;
                 page_offset +=  len;
@@ -592,6 +600,7 @@ static void ops_run_biofill(struct stripe_head *sh)
  {
         struct dma_async_tx_descriptor *tx = NULL;
         raid5_conf_t *conf = sh->raid_conf;
+       struct async_submit_ctl submit;
         int i;
  
         pr_debug("%s: stripe %llu\n", __func__,
@@ -615,22 +624,34 @@ static void ops_run_biofill(struct stripe_head *sh)
         }
  
         atomic_inc(&sh->count);
-       async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
-               ops_complete_biofill, sh);
+       init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
+       async_trigger_callback(&submit);
  }
  
-static void ops_complete_compute5(void *stripe_head_ref)
+static void mark_target_uptodate(struct stripe_head *sh, int target)
  {
-       struct stripe_head *sh = stripe_head_ref;
-       int target = sh->ops.target;
-       struct r5dev *tgt = &sh->dev[target];
+       struct r5dev *tgt;
  
-       pr_debug("%s: stripe %llu\n", __func__,
-               (unsigned long long)sh->sector);
+       if (target < 0)
+               return;
  
+       tgt = &sh->dev[target];
         set_bit(R5_UPTODATE, &tgt->flags);
         BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
         clear_bit(R5_Wantcompute, &tgt->flags);
+}
+
+static void ops_complete_compute(void *stripe_head_ref)
+{
+       struct stripe_head *sh = stripe_head_ref;
+
+       pr_debug("%s: stripe %llu\n", __func__,
+               (unsigned long long)sh->sector);
+
+       /* mark the computed target(s) as uptodate */
+       mark_target_uptodate(sh, sh->ops.target);
+       mark_target_uptodate(sh, sh->ops.target2);
+
         clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
         if (sh->check_state == check_state_compute_run)
                 sh->check_state = check_state_compute_result;
@@ -638,16 +659,24 @@ static void ops_complete_compute5(void *stripe_head_ref)
         release_stripe(sh);
  }
  
-static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
+/* return a pointer to the address conversion region of the scribble buffer */
+static addr_conv_t *to_addr_conv(struct stripe_head *sh,
+                                struct raid5_percpu *percpu)
+{
+       return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
+}
+
+static struct dma_async_tx_descriptor *
+ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
  {
-       /* kernel stack size limits the total number of disks */
         int disks = sh->disks;
-       struct page *xor_srcs[disks];
+       struct page **xor_srcs = percpu->scribble;
         int target = sh->ops.target;
         struct r5dev *tgt = &sh->dev[target];
         struct page *xor_dest = tgt->page;
         int count = 0;
         struct dma_async_tx_descriptor *tx;
+       struct async_submit_ctl submit;
         int i;
  
         pr_debug("%s: stripe %llu block: %d\n",
@@ -660,17 +689,215 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
  
         atomic_inc(&sh->count);
  
+       init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
+                         ops_complete_compute, sh, to_addr_conv(sh, percpu));
         if (unlikely(count == 1))
-               tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
-                       0, NULL, ops_complete_compute5, sh);
+               tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
         else
-               tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-                       ASYNC_TX_XOR_ZERO_DST, NULL,
-                       ops_complete_compute5, sh);
+               tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
  
         return tx;
  }
  
+/* set_syndrome_sources - populate source buffers for gen_syndrome
+ * @srcs - (struct page *) array of size sh->disks
+ * @sh - stripe_head to parse
+ *
+ * Populates srcs in proper layout order for the stripe and returns the
+ * 'count' of sources to be used in a call to async_gen_syndrome.  The P
+ * destination buffer is recorded in srcs[count] and the Q destination
+ * is recorded in srcs[count+1]].
+ */
+static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
+{
+       int disks = sh->disks;
+       int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
+       int d0_idx = raid6_d0(sh);
+       int count;
+       int i;
+
+       for (i = 0; i < disks; i++)
+               srcs[i] = (void *)raid6_empty_zero_page;
+
+       count = 0;
+       i = d0_idx;
+       do {
+               int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+
+               srcs[slot] = sh->dev[i].page;
+               i = raid6_next_disk(i, disks);
+       } while (i != d0_idx);
+       BUG_ON(count != syndrome_disks);
+
+       return count;
+}
+
+static struct dma_async_tx_descriptor *
+ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
+{
+       int disks = sh->disks;
+       struct page **blocks = percpu->scribble;
+       int target;
+       int qd_idx = sh->qd_idx;
+       struct dma_async_tx_descriptor *tx;
+       struct async_submit_ctl submit;
+       struct r5dev *tgt;
+       struct page *dest;
+       int i;
+       int count;
+
+       if (sh->ops.target < 0)
+               target = sh->ops.target2;
+       else if (sh->ops.target2 < 0)
+               target = sh->ops.target;
+       else
+               /* we should only have one valid target */
+               BUG();
+       BUG_ON(target < 0);
+       pr_debug("%s: stripe %llu block: %d\n",
+               __func__, (unsigned long long)sh->sector, target);
+
+       tgt = &sh->dev[target];
+       BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
+       dest = tgt->page;
+
+       atomic_inc(&sh->count);
+
+       if (target == qd_idx) {
+               count = set_syndrome_sources(blocks, sh);
+               blocks[count] = NULL; /* regenerating p is not necessary */
+               BUG_ON(blocks[count+1] != dest); /* q should already be set */
+               init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+                                 ops_complete_compute, sh,
+                                 to_addr_conv(sh, percpu));
+               tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
+       } else {
+               /* Compute any data- or p-drive using XOR */
+               count = 0;
+               for (i = disks; i-- ; ) {
+                       if (i == target || i == qd_idx)
+                               continue;
+                       blocks[count++] = sh->dev[i].page;
+               }
+
+               init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+                                 NULL, ops_complete_compute, sh,
+                                 to_addr_conv(sh, percpu));
+               tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
+       }
+
+       return tx;
+}
+
+static struct dma_async_tx_descriptor *
+ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
+{
+       int i, count, disks = sh->disks;
+       int syndrome_disks = sh->ddf_layout ? disks : disks-2;
+       int d0_idx = raid6_d0(sh);
+       int faila = -1, failb = -1;
+       int target = sh->ops.target;
+       int target2 = sh->ops.target2;
+       struct r5dev *tgt = &sh->dev[target];
+       struct r5dev *tgt2 = &sh->dev[target2];
+       struct dma_async_tx_descriptor *tx;
+       struct page **blocks = percpu->scribble;
+       struct async_submit_ctl submit;
+
+       pr_debug("%s: stripe %llu block1: %d block2: %d\n",
+                __func__, (unsigned long long)sh->sector, target, target2);
+       BUG_ON(target < 0 || target2 < 0);
+       BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
+       BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
+
+       /* we need to open-code set_syndrome_sources to handle the
+        * slot number conversion for 'faila' and 'failb'
+        */
+       for (i = 0; i < disks ; i++)
+               blocks[i] = (void *)raid6_empty_zero_page;
+       count = 0;
+       i = d0_idx;
+       do {
+               int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+
+               blocks[slot] = sh->dev[i].page;
+
+               if (i == target)
+                       faila = slot;
+               if (i == target2)
+                       failb = slot;
+               i = raid6_next_disk(i, disks);
+       } while (i != d0_idx);
+       BUG_ON(count != syndrome_disks);
+
+       BUG_ON(faila == failb);
+       if (failb < faila)
+               swap(faila, failb);
+       pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
+                __func__, (unsigned long long)sh->sector, faila, failb);
+
+       atomic_inc(&sh->count);
+
+       if (failb == syndrome_disks+1) {
+               /* Q disk is one of the missing disks */
+               if (faila == syndrome_disks) {
+                       /* Missing P+Q, just recompute */
+                       init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+                                         ops_complete_compute, sh,
+                                         to_addr_conv(sh, percpu));
+                       return async_gen_syndrome(blocks, 0, count+2,
+                                                 STRIPE_SIZE, &submit);
+               } else {
+                       struct page *dest;
+                       int data_target;
+                       int qd_idx = sh->qd_idx;
+
+                       /* Missing D+Q: recompute D from P, then recompute Q */
+                       if (target == qd_idx)
+                               data_target = target2;
+                       else
+                               data_target = target;
+
+                       count = 0;
+                       for (i = disks; i-- ; ) {
+                               if (i == data_target || i == qd_idx)
+                                       continue;
+                               blocks[count++] = sh->dev[i].page;
+                       }
+                       dest = sh->dev[data_target].page;
+                       init_async_submit(&submit,
+                                         ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+                                         NULL, NULL, NULL,
+                                         to_addr_conv(sh, percpu));
+                       tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
+                                      &submit);
+
+                       count = set_syndrome_sources(blocks, sh);
+                       init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+                                         ops_complete_compute, sh,
+                                         to_addr_conv(sh, percpu));
+                       return async_gen_syndrome(blocks, 0, count+2,
+                                                 STRIPE_SIZE, &submit);
+               }
+       } else {
+               init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+                                 ops_complete_compute, sh,
+                                 to_addr_conv(sh, percpu));
+               if (failb == syndrome_disks) {
+                       /* We're missing D+P. */
+                       return async_raid6_datap_recov(syndrome_disks+2,
+                                                      STRIPE_SIZE, faila,
+                                                      blocks, &submit);
+               } else {
+                       /* We're missing D+D. */
+                       return async_raid6_2data_recov(syndrome_disks+2,
+                                                      STRIPE_SIZE, faila, failb,
+                                                      blocks, &submit);
+               }
+       }
+}
+
+
  static void ops_complete_prexor(void *stripe_head_ref)
  {
         struct stripe_head *sh = stripe_head_ref;
@@ -680,12 +907,13 @@ static void ops_complete_prexor(void *stripe_head_ref)
  }
  
  static struct dma_async_tx_descriptor *
-ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
+              struct dma_async_tx_descriptor *tx)
  {
-       /* kernel stack size limits the total number of disks */
         int disks = sh->disks;
-       struct page *xor_srcs[disks];
+       struct page **xor_srcs = percpu->scribble;
         int count = 0, pd_idx = sh->pd_idx, i;
+       struct async_submit_ctl submit;
  
         /* existing parity data subtracted */
         struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -700,9 +928,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
                         xor_srcs[count++] = dev->page;
         }
  
-       tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-               ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,
-               ops_complete_prexor, sh);
+       init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+                         ops_complete_prexor, sh, to_addr_conv(sh, percpu));
+       tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
  
         return tx;
  }
@@ -742,17 +970,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
         return tx;
  }
  
-static void ops_complete_postxor(void *stripe_head_ref)
+static void ops_complete_reconstruct(void *stripe_head_ref)
  {
         struct stripe_head *sh = stripe_head_ref;
-       int disks = sh->disks, i, pd_idx = sh->pd_idx;
+       int disks = sh->disks;
+       int pd_idx = sh->pd_idx;
+       int qd_idx = sh->qd_idx;
+       int i;
  
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
  
         for (i = disks; i--; ) {
                 struct r5dev *dev = &sh->dev[i];
-               if (dev->written || i == pd_idx)
+
+               if (dev->written || i == pd_idx || i == qd_idx)
                         set_bit(R5_UPTODATE, &dev->flags);
         }
  
@@ -770,12 +1002,12 @@ static void ops_complete_postxor(void *stripe_head_ref)
  }
  
  static void
-ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
+                    struct dma_async_tx_descriptor *tx)
  {
-       /* kernel stack size limits the total number of disks */
         int disks = sh->disks;
-       struct page *xor_srcs[disks];
-
+       struct page **xor_srcs = percpu->scribble;
+       struct async_submit_ctl submit;
         int count = 0, pd_idx = sh->pd_idx, i;
         struct page *xor_dest;
         int prexor = 0;
@@ -809,18 +1041,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
          * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
          * for the synchronous xor case
          */
-       flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |
+       flags = ASYNC_TX_ACK |
                 (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
  
         atomic_inc(&sh->count);
  
-       if (unlikely(count == 1)) {
-               flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
-               tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
-                       flags, tx, ops_complete_postxor, sh);
-       } else
-               tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-                       flags, tx, ops_complete_postxor, sh);
+       init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
+                         to_addr_conv(sh, percpu));
+       if (unlikely(count == 1))
+               tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+       else
+               tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+}
+
+static void
+ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
+                    struct dma_async_tx_descriptor *tx)
+{
+       struct async_submit_ctl submit;
+       struct page **blocks = percpu->scribble;
+       int count;
+
+       pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
+
+       count = set_syndrome_sources(blocks, sh);
+
+       atomic_inc(&sh->count);
+
+       init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
+                         sh, to_addr_conv(sh, percpu));
+       async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
  }
  
  static void ops_complete_check(void *stripe_head_ref)
@@ -835,63 +1085,115 @@ static void ops_complete_check(void *stripe_head_ref)
         release_stripe(sh);
  }
  
-static void ops_run_check(struct stripe_head *sh)
+static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
  {
-       /* kernel stack size limits the total number of disks */
         int disks = sh->disks;
-       struct page *xor_srcs[disks];
+       int pd_idx = sh->pd_idx;
+       int qd_idx = sh->qd_idx;
+       struct page *xor_dest;
+       struct page **xor_srcs = percpu->scribble;
         struct dma_async_tx_descriptor *tx;
-
-       int count = 0, pd_idx = sh->pd_idx, i;
-       struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
+       struct async_submit_ctl submit;
+       int count;
+       int i;
  
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
  
+       count = 0;
+       xor_dest = sh->dev[pd_idx].page;
+       xor_srcs[count++] = xor_dest;
         for (i = disks; i--; ) {
-               struct r5dev *dev = &sh->dev[i];
-               if (i != pd_idx)
-                       xor_srcs[count++] = dev->page;
+               if (i == pd_idx || i == qd_idx)
+                       continue;
+               xor_srcs[count++] = sh->dev[i].page;
         }
  
-       tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-               &sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
+       init_async_submit(&submit, 0, NULL, NULL, NULL,
+                         to_addr_conv(sh, percpu));
+       tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
+                          &sh->ops.zero_sum_result, &submit);
+
+       atomic_inc(&sh->count);
+       init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
+       tx = async_trigger_callback(&submit);
+}
+
+static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
+{
+       struct page **srcs = percpu->scribble;
+       struct async_submit_ctl submit;
+       int count;
+
+       pr_debug("%s: stripe %llu checkp: %d\n", __func__,
+               (unsigned long long)sh->sector, checkp);
+
+       count = set_syndrome_sources(srcs, sh);
+       if (!checkp)
+               srcs[count] = NULL;
  
         atomic_inc(&sh->count);
-       tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
-               ops_complete_check, sh);
+       init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
+                         sh, to_addr_conv(sh, percpu));
+       async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
+                          &sh->ops.zero_sum_result, percpu->spare_page, &submit);
  }
  
-static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
+static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
  {
         int overlap_clear = 0, i, disks = sh->disks;
         struct dma_async_tx_descriptor *tx = NULL;
+       raid5_conf_t *conf = sh->raid_conf;
+       int level = conf->level;
+       struct raid5_percpu *percpu;
+       unsigned long cpu;
  
+       cpu = get_cpu();
+       percpu = per_cpu_ptr(conf->percpu, cpu);
         if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
                 ops_run_biofill(sh);
                 overlap_clear++;
         }
  
         if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
-               tx = ops_run_compute5(sh);
-               /* terminate the chain if postxor is not set to be run */
-               if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
+               if (level < 6)
+                       tx = ops_run_compute5(sh, percpu);
+               else {
+                       if (sh->ops.target2 < 0 || sh->ops.target < 0)
+                               tx = ops_run_compute6_1(sh, percpu);
+                       else
+                               tx = ops_run_compute6_2(sh, percpu);
+               }
+               /* terminate the chain if reconstruct is not set to be run */
+               if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
                         async_tx_ack(tx);
         }
  
         if (test_bit(STRIPE_OP_PREXOR, &ops_request))
-               tx = ops_run_prexor(sh, tx);
+               tx = ops_run_prexor(sh, percpu, tx);
  
         if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
                 tx = ops_run_biodrain(sh, tx);
                 overlap_clear++;
         }
  
-       if (test_bit(STRIPE_OP_POSTXOR, &ops_request))
-               ops_run_postxor(sh, tx);
+       if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
+               if (level < 6)
+                       ops_run_reconstruct5(sh, percpu, tx);
+               else
+                       ops_run_reconstruct6(sh, percpu, tx);
+       }
  
-       if (test_bit(STRIPE_OP_CHECK, &ops_request))
-               ops_run_check(sh);
+       if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
+               if (sh->check_state == check_state_run)
+                       ops_run_check_p(sh, percpu);
+               else if (sh->check_state == check_state_run_q)
+                       ops_run_check_pq(sh, percpu, 0);
+               else if (sh->check_state == check_state_run_pq)
+                       ops_run_check_pq(sh, percpu, 1);
+               else
+                       BUG();
+       }
  
         if (overlap_clear)
                 for (i = disks; i--; ) {
@@ -899,6 +1201,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
                         if (test_and_clear_bit(R5_Overlap, &dev->flags))
                                 wake_up(&sh->raid_conf->wait_for_overlap);
                 }
+       put_cpu();
  }
  
  static int grow_one_stripe(raid5_conf_t *conf)
@@ -948,6 +1251,28 @@ static int grow_stripes(raid5_conf_t *conf, int num)
         return 0;
  }
  
+/**
+ * scribble_len - return the required size of the scribble region
+ * @num - total number of disks in the array
+ *
+ * The size must be enough to contain:
+ * 1/ a struct page pointer for each device in the array +2
+ * 2/ room to convert each entry in (1) to its corresponding dma
+ *    (dma_map_page()) or page (page_address()) address.
+ *
+ * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
+ * calculate over all devices (not just the data blocks), using zeros in place
+ * of the P and Q blocks.
+ */
+static size_t scribble_len(int num)
+{
+       size_t len;
+
+       len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
+
+       return len;
+}
+
  static int resize_stripes(raid5_conf_t *conf, int newsize)
  {
         /* Make all the stripes able to hold 'newsize' devices.
@@ -976,6 +1301,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
         struct stripe_head *osh, *nsh;
         LIST_HEAD(newstripes);
         struct disk_info *ndisks;
+       unsigned long cpu;
         int err;
         struct kmem_cache *sc;
         int i;
@@ -1041,7 +1367,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
         /* Step 3.
          * At this point, we are holding all the stripes so the array
          * is completely stalled, so now is a good time to resize
-        * conf->disks.
+        * conf->disks and the scribble region
          */
         ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
         if (ndisks) {
@@ -1052,10 +1378,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
         } else
                 err = -ENOMEM;
  
+       get_online_cpus();
+       conf->scribble_len = scribble_len(newsize);
+       for_each_present_cpu(cpu) {
+               struct raid5_percpu *percpu;
+               void *scribble;
+
+               percpu = per_cpu_ptr(conf->percpu, cpu);
+               scribble = kmalloc(conf->scribble_len, GFP_NOIO);
+
+               if (scribble) {
+                       kfree(percpu->scribble);
+                       percpu->scribble = scribble;
+               } else {
+                       err = -ENOMEM;
+                       break;
+               }
+       }
+       put_online_cpus();
+
         /* Step 4, return new stripes to service */
         while(!list_empty(&newstripes)) {
                 nsh = list_entry(newstripes.next, struct stripe_head, lru);
                 list_del_init(&nsh->lru);
+
                 for (i=conf->raid_disks; i < newsize; i++)
                         if (nsh->dev[i].page == NULL) {
                                 struct page *p = alloc_page(GFP_NOIO);
@@ -1594,258 +1940,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
  }
  
  
-
-/*
- * Copy data between a page in the stripe cache, and one or more bion
- * The page could align with the middle of the bio, or there could be
- * several bion, each with several bio_vecs, which cover part of the page
- * Multiple bion are linked together on bi_next.  There may be extras
- * at the end of this list.  We ignore them.
- */
-static void copy_data(int frombio, struct bio *bio,
-                    struct page *page,
-                    sector_t sector)
-{
-       char *pa = page_address(page);
-       struct bio_vec *bvl;
-       int i;
-       int page_offset;
-
-       if (bio->bi_sector >= sector)
-               page_offset = (signed)(bio->bi_sector - sector) * 512;
-       else
-               page_offset = (signed)(sector - bio->bi_sector) * -512;
-       bio_for_each_segment(bvl, bio, i) {
-               int len = bio_iovec_idx(bio,i)->bv_len;
-               int clen;
-               int b_offset = 0;
-
-               if (page_offset < 0) {
-                       b_offset = -page_offset;
-                       page_offset += b_offset;
-                       len -= b_offset;
-               }
-
-               if (len > 0 && page_offset + len > STRIPE_SIZE)
-                       clen = STRIPE_SIZE - page_offset;
-               else clen = len;
-
-               if (clen > 0) {
-                       char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
-                       if (frombio)
-                               memcpy(pa+page_offset, ba+b_offset, clen);
-                       else
-                               memcpy(ba+b_offset, pa+page_offset, clen);
-                       __bio_kunmap_atomic(ba, KM_USER0);
-               }
-               if (clen < len) /* hit end of page */
-                       break;
-               page_offset +=  len;
-       }
-}
-
-#define check_xor()    do {                                              \
-                               if (count == MAX_XOR_BLOCKS) {            \
-                               xor_blocks(count, STRIPE_SIZE, dest, ptr);\
-                               count = 0;                                \
-                          }                                              \
-                       } while(0)
-
-static void compute_parity6(struct stripe_head *sh, int method)
-{
-       raid5_conf_t *conf = sh->raid_conf;
-       int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
-       int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
-       struct bio *chosen;
-       /**** FIX THIS: This could be very bad if disks is close to 256 ****/
-       void *ptrs[syndrome_disks+2];
-
-       pd_idx = sh->pd_idx;
-       qd_idx = sh->qd_idx;
-       d0_idx = raid6_d0(sh);
-
-       pr_debug("compute_parity, stripe %llu, method %d\n",
-               (unsigned long long)sh->sector, method);
-
-       switch(method) {
-       case READ_MODIFY_WRITE:
-               BUG();          /* READ_MODIFY_WRITE N/A for RAID-6 */
-       case RECONSTRUCT_WRITE:
-               for (i= disks; i-- ;)
-                       if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
-                               chosen = sh->dev[i].towrite;
-                               sh->dev[i].towrite = NULL;
-
-                               if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
-                                       wake_up(&conf->wait_for_overlap);
-
-                               BUG_ON(sh->dev[i].written);
-                               sh->dev[i].written = chosen;
-                       }
-               break;
-       case CHECK_PARITY:
-               BUG();          /* Not implemented yet */
-       }
-
-       for (i = disks; i--;)
-               if (sh->dev[i].written) {
-                       sector_t sector = sh->dev[i].sector;
-                       struct bio *wbi = sh->dev[i].written;
-                       while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
-                               copy_data(1, wbi, sh->dev[i].page, sector);
-                               wbi = r5_next_bio(wbi, sector);
-                       }
-
-                       set_bit(R5_LOCKED, &sh->dev[i].flags);
-                       set_bit(R5_UPTODATE, &sh->dev[i].flags);
-               }
-
-       /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
-
-       for (i = 0; i < disks; i++)
-               ptrs[i] = (void *)raid6_empty_zero_page;
-
-       count = 0;
-       i = d0_idx;
-       do {
-               int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
-
-               ptrs[slot] = page_address(sh->dev[i].page);
-               if (slot < syndrome_disks &&
-                   !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
-                       printk(KERN_ERR "block %d/%d not uptodate "
-                              "on parity calc\n", i, count);
-                       BUG();
-               }
-
-               i = raid6_next_disk(i, disks);
-       } while (i != d0_idx);
-       BUG_ON(count != syndrome_disks);
-
-       raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
-
-       switch(method) {
-       case RECONSTRUCT_WRITE:
-               set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
-               set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
-               set_bit(R5_LOCKED,   &sh->dev[pd_idx].flags);
-               set_bit(R5_LOCKED,   &sh->dev[qd_idx].flags);
-               break;
-       case UPDATE_PARITY:
-               set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
-               set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
-               break;
-       }
-}
-
-
-/* Compute one missing block */
-static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
-{
-       int i, count, disks = sh->disks;
-       void *ptr[MAX_XOR_BLOCKS], *dest, *p;
-       int qd_idx = sh->qd_idx;
-
-       pr_debug("compute_block_1, stripe %llu, idx %d\n",
-               (unsigned long long)sh->sector, dd_idx);
-
-       if ( dd_idx == qd_idx ) {
-               /* We're actually computing the Q drive */
-               compute_parity6(sh, UPDATE_PARITY);
-       } else {
-               dest = page_address(sh->dev[dd_idx].page);
-               if (!nozero) memset(dest, 0, STRIPE_SIZE);
-               count = 0;
-               for (i = disks ; i--; ) {
-                       if (i == dd_idx || i == qd_idx)
-                               continue;
-                       p = page_address(sh->dev[i].page);
-                       if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
-                               ptr[count++] = p;
-                       else
-                               printk("compute_block() %d, stripe %llu, %d"
-                                      " not present\n", dd_idx,
-                                      (unsigned long long)sh->sector, i);
-
-                       check_xor();
-               }
-               if (count)
-                       xor_blocks(count, STRIPE_SIZE, dest, ptr);
-               if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
-               else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
-       }
-}
-
-/* Compute two missing blocks */
-static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
-{
-       int i, count, disks = sh->disks;
-       int syndrome_disks = sh->ddf_layout ? disks : disks-2;
-       int d0_idx = raid6_d0(sh);
-       int faila = -1, failb = -1;
-       /**** FIX THIS: This could be very bad if disks is close to 256 ****/
-       void *ptrs[syndrome_disks+2];
-
-       for (i = 0; i < disks ; i++)
-               ptrs[i] = (void *)raid6_empty_zero_page;
-       count = 0;
-       i = d0_idx;
-       do {
-               int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
-
-               ptrs[slot] = page_address(sh->dev[i].page);
-
-               if (i == dd_idx1)
-                       faila = slot;
-               if (i == dd_idx2)
-                       failb = slot;
-               i = raid6_next_disk(i, disks);
-       } while (i != d0_idx);
-       BUG_ON(count != syndrome_disks);
-
-       BUG_ON(faila == failb);
-       if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
-
-       pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
-                (unsigned long long)sh->sector, dd_idx1, dd_idx2,
-                faila, failb);
-
-       if (failb == syndrome_disks+1) {
-               /* Q disk is one of the missing disks */
-               if (faila == syndrome_disks) {
-                       /* Missing P+Q, just recompute */
-                       compute_parity6(sh, UPDATE_PARITY);
-                       return;
-               } else {
-                       /* We're missing D+Q; recompute D from P */
-                       compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
-                                            dd_idx2 : dd_idx1),
-                                       0);
-                       compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
-                       return;
-               }
-       }
-
-       /* We're missing D+P or D+D; */
-       if (failb == syndrome_disks) {
-               /* We're missing D+P. */
-               raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
-       } else {
-               /* We're missing D+D. */
-               raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
-                                 ptrs);
-       }
-
-       /* Both the above update both missing blocks */
-       set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
-       set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
-}
-
  static void
-schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
+schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
                          int rcw, int expand)
  {
         int i, pd_idx = sh->pd_idx, disks = sh->disks;
+       raid5_conf_t *conf = sh->raid_conf;
+       int level = conf->level;
  
         if (rcw) {
                 /* if we are not expanding this is a proper write request, and
@@ -1858,7 +1959,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
                 } else
                         sh->reconstruct_state = reconstruct_state_run;
  
-               set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
+               set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
  
                 for (i = disks; i--; ) {
                         struct r5dev *dev = &sh->dev[i];
@@ -1871,17 +1972,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
                                 s->locked++;
                         }
                 }
-               if (s->locked + 1 == disks)
+               if (s->locked + conf->max_degraded == disks)
                         if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
-                               atomic_inc(&sh->raid_conf->pending_full_writes);
+                               atomic_inc(&conf->pending_full_writes);
         } else {
+               BUG_ON(level == 6);
                 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
                         test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
  
                 sh->reconstruct_state = reconstruct_state_prexor_drain_run;
                 set_bit(STRIPE_OP_PREXOR, &s->ops_request);
                 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
-               set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
+               set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
  
                 for (i = disks; i--; ) {
                         struct r5dev *dev = &sh->dev[i];
@@ -1899,13 +2001,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
                 }
         }
  
-       /* keep the parity disk locked while asynchronous operations
+       /* keep the parity disk(s) locked while asynchronous operations
          * are in flight
          */
         set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
         clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
         s->locked++;
  
+       if (level == 6) {
+               int qd_idx = sh->qd_idx;
+               struct r5dev *dev = &sh->dev[qd_idx];
+
+               set_bit(R5_LOCKED, &dev->flags);
+               clear_bit(R5_UPTODATE, &dev->flags);
+               s->locked++;
+       }
+
         pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
                 __func__, (unsigned long long)sh->sector,
                 s->locked, s->ops_request);
@@ -1986,13 +2097,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
  
  static void end_reshape(raid5_conf_t *conf);
  
-static int page_is_zero(struct page *p)
-{
-       char *a = page_address(p);
-       return ((*(u32*)a) == 0 &&
-               memcmp(a, a+4, STRIPE_SIZE-4)==0);
-}
-
  static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
                             struct stripe_head *sh)
  {
@@ -2132,9 +2236,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
                         set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
                         set_bit(R5_Wantcompute, &dev->flags);
                         sh->ops.target = disk_idx;
+                       sh->ops.target2 = -1;
                         s->req_compute = 1;
                         /* Careful: from this point on 'uptodate' is in the eye
-                        * of raid5_run_ops which services 'compute' operations
+                        * of raid_run_ops which services 'compute' operations
                          * before writes. R5_Wantcompute flags a block that will
                          * be R5_UPTODATE by the time it is needed for a
                          * subsequent operation.
@@ -2173,61 +2278,104 @@ static void handle_stripe_fill5(struct stripe_head *sh,
         set_bit(STRIPE_HANDLE, &sh->state);
  }
  
-static void handle_stripe_fill6(struct stripe_head *sh,
-                       struct stripe_head_state *s, struct r6_state *r6s,
-                       int disks)
+/* fetch_block6 - checks the given member device to see if its data needs
+ * to be read or computed to satisfy a request.
+ *
+ * Returns 1 when no more member devices need to be checked, otherwise returns
+ * 0 to tell the loop in handle_stripe_fill6 to continue
+ */
+static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
+                        struct r6_state *r6s, int disk_idx, int disks)
  {
-       int i;
-       for (i = disks; i--; ) {
-               struct r5dev *dev = &sh->dev[i];
-               if (!test_bit(R5_LOCKED, &dev->flags) &&
-                   !test_bit(R5_UPTODATE, &dev->flags) &&
-                   (dev->toread || (dev->towrite &&
-                    !test_bit(R5_OVERWRITE, &dev->flags)) ||
-                    s->syncing || s->expanding ||
-                    (s->failed >= 1 &&
-                     (sh->dev[r6s->failed_num[0]].toread ||
-                      s->to_write)) ||
-                    (s->failed >= 2 &&
-                     (sh->dev[r6s->failed_num[1]].toread ||
-                      s->to_write)))) {
-                       /* we would like to get this block, possibly
-                        * by computing it, but we might not be able to
+       struct r5dev *dev = &sh->dev[disk_idx];
+       struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
+                                 &sh->dev[r6s->failed_num[1]] };
+
+       if (!test_bit(R5_LOCKED, &dev->flags) &&
+           !test_bit(R5_UPTODATE, &dev->flags) &&
+           (dev->toread ||
+            (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+            s->syncing || s->expanding ||
+            (s->failed >= 1 &&
+             (fdev[0]->toread || s->to_write)) ||
+            (s->failed >= 2 &&
+             (fdev[1]->toread || s->to_write)))) {
+               /* we would like to get this block, possibly by computing it,
+                * otherwise read it if the backing disk is insync
+                */
+               BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
+               BUG_ON(test_bit(R5_Wantread, &dev->flags));
+               if ((s->uptodate == disks - 1) &&
+                   (s->failed && (disk_idx == r6s->failed_num[0] ||
+                                  disk_idx == r6s->failed_num[1]))) {
+                       /* have disk failed, and we're requested to fetch it;
+                        * do compute it
                          */
-                       if ((s->uptodate == disks - 1) &&
-                           (s->failed && (i == r6s->failed_num[0] ||
-                                          i == r6s->failed_num[1]))) {
-                               pr_debug("Computing stripe %llu block %d\n",
-                                      (unsigned long long)sh->sector, i);
-                               compute_block_1(sh, i, 0);
-                               s->uptodate++;
-                       } else if ( s->uptodate == disks-2 && s->failed >= 2 ) {
-                               /* Computing 2-failure is *very* expensive; only
-                                * do it if failed >= 2
-                                */
-                               int other;
-                               for (other = disks; other--; ) {
-                                       if (other == i)
-                                               continue;
-                                       if (!test_bit(R5_UPTODATE,
-                                             &sh->dev[other].flags))
-                                               break;
-                               }
-                               BUG_ON(other < 0);
-                               pr_debug("Computing stripe %llu blocks %d,%d\n",
-                                      (unsigned long long)sh->sector,
-                                      i, other);
-                               compute_block_2(sh, i, other);
-                               s->uptodate += 2;
-                       } else if (test_bit(R5_Insync, &dev->flags)) {
-                               set_bit(R5_LOCKED, &dev->flags);
-                               set_bit(R5_Wantread, &dev->flags);
-                               s->locked++;
-                               pr_debug("Reading block %d (sync=%d)\n",
-                                       i, s->syncing);
+                       pr_debug("Computing stripe %llu block %d\n",
+                              (unsigned long long)sh->sector, disk_idx);
+                       set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+                       set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+                       set_bit(R5_Wantcompute, &dev->flags);
+                       sh->ops.target = disk_idx;
+                       sh->ops.target2 = -1; /* no 2nd target */
+                       s->req_compute = 1;
+                       s->uptodate++;
+                       return 1;
+               } else if (s->uptodate == disks-2 && s->failed >= 2) {
+                       /* Computing 2-failure is *very* expensive; only
+                        * do it if failed >= 2
+                        */
+                       int other;
+                       for (other = disks; other--; ) {
+                               if (other == disk_idx)
+                                       continue;
+                               if (!test_bit(R5_UPTODATE,
+                                     &sh->dev[other].flags))
+                                       break;
                         }
+                       BUG_ON(other < 0);
+                       pr_debug("Computing stripe %llu blocks %d,%d\n",
+                              (unsigned long long)sh->sector,
+                              disk_idx, other);
+                       set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+                       set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+                       set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
+                       set_bit(R5_Wantcompute, &sh->dev[other].flags);
+                       sh->ops.target = disk_idx;
+                       sh->ops.target2 = other;
+                       s->uptodate += 2;
+                       s->req_compute = 1;
+                       return 1;
+               } else if (test_bit(R5_Insync, &dev->flags)) {
+                       set_bit(R5_LOCKED, &dev->flags);
+                       set_bit(R5_Wantread, &dev->flags);
+                       s->locked++;
+                       pr_debug("Reading block %d (sync=%d)\n",
+                               disk_idx, s->syncing);
                 }
         }
+
+       return 0;
+}
+
+/**
+ * handle_stripe_fill6 - read or compute data to satisfy pending requests.
+ */
+static void handle_stripe_fill6(struct stripe_head *sh,
+                       struct stripe_head_state *s, struct r6_state *r6s,
+                       int disks)
+{
+       int i;
+
+       /* look for blocks to read/compute, skip this if a compute
+        * is already in flight, or if the stripe contents are in the
+        * midst of changing due to a write
+        */
+       if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
+           !sh->reconstruct_state)
+               for (i = disks; i--; )
+                       if (fetch_block6(sh, s, r6s, i, disks))
+                               break;
         set_bit(STRIPE_HANDLE, &sh->state);
  }
  
@@ -2361,114 +2509,61 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf,
          */
         /* since handle_stripe can be called at any time we need to handle the
          * case where a compute block operation has been submitted and then a
-        * subsequent call wants to start a write request.  raid5_run_ops only
-        * handles the case where compute block and postxor are requested
+        * subsequent call wants to start a write request.  raid_run_ops only
+        * handles the case where compute block and reconstruct are requested
          * simultaneously.  If this is not the case then new writes need to be
          * held off until the compute completes.
          */
         if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
             (s->locked == 0 && (rcw == 0 || rmw == 0) &&
             !test_bit(STRIPE_BIT_DELAY, &sh->state)))
-               schedule_reconstruction5(sh, s, rcw == 0, 0);
+               schedule_reconstruction(sh, s, rcw == 0, 0);
  }
  
  static void handle_stripe_dirtying6(raid5_conf_t *conf,
                 struct stripe_head *sh, struct stripe_head_state *s,
-               struct r6_state *r6s, int disks)
-{
-       int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
-       int qd_idx = sh->qd_idx;
-       for (i = disks; i--; ) {
-               struct r5dev *dev = &sh->dev[i];
-               /* Would I have to read this buffer for reconstruct_write */
-               if (!test_bit(R5_OVERWRITE, &dev->flags)
-                   && i != pd_idx && i != qd_idx
-                   && (!test_bit(R5_LOCKED, &dev->flags)
-                           ) &&
-                   !test_bit(R5_UPTODATE, &dev->flags)) {
-                       if (test_bit(R5_Insync, &dev->flags)) rcw++;
-                       else {
-                               pr_debug("raid6: must_compute: "
-                                       "disk %d flags=%#lx\n", i, dev->flags);
-                               must_compute++;
-                       }
-               }
-       }
-       pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
-              (unsigned long long)sh->sector, rcw, must_compute);
-       set_bit(STRIPE_HANDLE, &sh->state);
-
-       if (rcw > 0)
-               /* want reconstruct write, but need to get some data */
-               for (i = disks; i--; ) {
-                       struct r5dev *dev = &sh->dev[i];
-                       if (!test_bit(R5_OVERWRITE, &dev->flags)
-                           && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
-                           && !test_bit(R5_LOCKED, &dev->flags) &&
-                           !test_bit(R5_UPTODATE, &dev->flags) &&
-                           test_bit(R5_Insync, &dev->flags)) {
-                               if (
-                                 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-                                       pr_debug("Read_old stripe %llu "
-                                               "block %d for Reconstruct\n",
-                                            (unsigned long long)sh->sector, i);
-                                       set_bit(R5_LOCKED, &dev->flags);
-                                       set_bit(R5_Wantread, &dev->flags);
-                                       s->locked++;
-                               } else {
-                                       pr_debug("Request delayed stripe %llu "
-                                               "block %d for Reconstruct\n",
-                                            (unsigned long long)sh->sector, i);
-                                       set_bit(STRIPE_DELAYED, &sh->state);
-                                       set_bit(STRIPE_HANDLE, &sh->state);
-                               }
+               struct r6_state *r6s, int disks)
+{
+       int rcw = 0, pd_idx = sh->pd_idx, i;
+       int qd_idx = sh->qd_idx;
+
+       set_bit(STRIPE_HANDLE, &sh->state);
+       for (i = disks; i--; ) {
+               struct r5dev *dev = &sh->dev[i];
+               /* check if we haven't enough data */
+               if (!test_bit(R5_OVERWRITE, &dev->flags) &&
+                   i != pd_idx && i != qd_idx &&
+                   !test_bit(R5_LOCKED, &dev->flags) &&
+                   !(test_bit(R5_UPTODATE, &dev->flags) ||
+                     test_bit(R5_Wantcompute, &dev->flags))) {
+                       rcw++;
+                       if (!test_bit(R5_Insync, &dev->flags))
+                               continue; /* it's a failed drive */
+
+                       if (
+                         test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+                               pr_debug("Read_old stripe %llu "
+                                       "block %d for Reconstruct\n",
+                                    (unsigned long long)sh->sector, i);
+                               set_bit(R5_LOCKED, &dev->flags);
+                               set_bit(R5_Wantread, &dev->flags);
+                               s->locked++;
+                       } else {
+                               pr_debug("Request delayed stripe %llu "
+                                       "block %d for Reconstruct\n",
+                                    (unsigned long long)sh->sector, i);
+                               set_bit(STRIPE_DELAYED, &sh->state);
+                               set_bit(STRIPE_HANDLE, &sh->state);
                         }
                 }
+       }
         /* now if nothing is locked, and if we have enough data, we can start a
          * write request
          */
-       if (s->locked == 0 && rcw == 0 &&
+       if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
+           s->locked == 0 && rcw == 0 &&
             !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
-               if (must_compute > 0) {
-                       /* We have failed blocks and need to compute them */
-                       switch (s->failed) {
-                       case 0:
-                               BUG();
-                       case 1:
-                               compute_block_1(sh, r6s->failed_num[0], 0);
-                               break;
-                       case 2:
-                               compute_block_2(sh, r6s->failed_num[0],
-                                               r6s->failed_num[1]);
-                               break;
-                       default: /* This request should have been failed? */
-                               BUG();
-                       }
-               }
-
-               pr_debug("Computing parity for stripe %llu\n",
-                       (unsigned long long)sh->sector);
-               compute_parity6(sh, RECONSTRUCT_WRITE);
-               /* now every locked buffer is ready to be written */
-               for (i = disks; i--; )
-                       if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
-                               pr_debug("Writing stripe %llu block %d\n",
-                                      (unsigned long long)sh->sector, i);
-                               s->locked++;
-                               set_bit(R5_Wantwrite, &sh->dev[i].flags);
-                       }
-               if (s->locked == disks)
-                       if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
-                               atomic_inc(&conf->pending_full_writes);
-               /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
-               set_bit(STRIPE_INSYNC, &sh->state);
-
-               if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-                       atomic_dec(&conf->preread_active_stripes);
-                       if (atomic_read(&conf->preread_active_stripes) <
-                           IO_THRESHOLD)
-                               md_wakeup_thread(conf->mddev->thread);
-               }
+               schedule_reconstruction(sh, s, 1, 0);
         }
  }
  
@@ -2527,7 +2622,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                  * we are done.  Otherwise update the mismatch count and repair
                  * parity if !MD_RECOVERY_CHECK
                  */
-               if (sh->ops.zero_sum_result == 0)
+               if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
                         /* parity is correct (on disc,
                          * not in buffer any more)
                          */
@@ -2544,6 +2639,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                                 set_bit(R5_Wantcompute,
                                         &sh->dev[sh->pd_idx].flags);
                                 sh->ops.target = sh->pd_idx;
+                               sh->ops.target2 = -1;
                                 s->uptodate++;
                         }
                 }
@@ -2560,67 +2656,74 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
  
  
  static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
-                               struct stripe_head_state *s,
-                               struct r6_state *r6s, struct page *tmp_page,
-                               int disks)
+                                 struct stripe_head_state *s,
+                                 struct r6_state *r6s, int disks)
  {
-       int update_p = 0, update_q = 0;
-       struct r5dev *dev;
         int pd_idx = sh->pd_idx;
         int qd_idx = sh->qd_idx;
+       struct r5dev *dev;
  
         set_bit(STRIPE_HANDLE, &sh->state);
  
         BUG_ON(s->failed > 2);
-       BUG_ON(s->uptodate < disks);
+
         /* Want to check and possibly repair P and Q.
          * However there could be one 'failed' device, in which
          * case we can only check one of them, possibly using the
          * other to generate missing data
          */
  
-       /* If !tmp_page, we cannot do the calculations,
-        * but as we have set STRIPE_HANDLE, we will soon be called
-        * by stripe_handle with a tmp_page - just wait until then.
-        */
-       if (tmp_page) {
+       switch (sh->check_state) {
+       case check_state_idle:
+               /* start a new check operation if there are < 2 failures */
                 if (s->failed == r6s->q_failed) {
-                       /* The only possible failed device holds 'Q', so it
+                       /* The only possible failed device holds Q, so it
                          * makes sense to check P (If anything else were failed,
                          * we would have used P to recreate it).
                          */
-                       compute_block_1(sh, pd_idx, 1);
-                       if (!page_is_zero(sh->dev[pd_idx].page)) {
-                               compute_block_1(sh, pd_idx, 0);
-                               update_p = 1;
-                       }
+                       sh->check_state = check_state_run;
                 }
                 if (!r6s->q_failed && s->failed < 2) {
-                       /* q is not failed, and we didn't use it to generate
+                       /* Q is not failed, and we didn't use it to generate
                          * anything, so it makes sense to check it
                          */
-                       memcpy(page_address(tmp_page),
-                              page_address(sh->dev[qd_idx].page),
-                              STRIPE_SIZE);
-                       compute_parity6(sh, UPDATE_PARITY);
-                       if (memcmp(page_address(tmp_page),
-                                  page_address(sh->dev[qd_idx].page),
-                                  STRIPE_SIZE) != 0) {
-                               clear_bit(STRIPE_INSYNC, &sh->state);
-                               update_q = 1;
-                       }
+                       if (sh->check_state == check_state_run)
+                               sh->check_state = check_state_run_pq;
+                       else
+                               sh->check_state = check_state_run_q;
                 }
-               if (update_p || update_q) {
-                       conf->mddev->resync_mismatches += STRIPE_SECTORS;
-                       if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
-                               /* don't try to repair!! */
-                               update_p = update_q = 0;
+
+               /* discard potentially stale zero_sum_result */
+               sh->ops.zero_sum_result = 0;
+
+               if (sh->check_state == check_state_run) {
+                       /* async_xor_zero_sum destroys the contents of P */
+                       clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+                       s->uptodate--;
+               }
+               if (sh->check_state >= check_state_run &&
+                   sh->check_state <= check_state_run_pq) {
+                       /* async_syndrome_zero_sum preserves P and Q, so
+                        * no need to mark them !uptodate here
+                        */
+                       set_bit(STRIPE_OP_CHECK, &s->ops_request);
+                       break;
                 }
  
+               /* we have 2-disk failure */
+               BUG_ON(s->failed != 2);
+               /* fall through */
+       case check_state_compute_result:
+               sh->check_state = check_state_idle;
+
+               /* check that a write has not made the stripe insync */
+               if (test_bit(STRIPE_INSYNC, &sh->state))
+                       break;
+
                 /* now write out any block on a failed drive,
-                * or P or Q if they need it
+                * or P or Q if they were recomputed
                  */
-
+               BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
                 if (s->failed == 2) {
                         dev = &sh->dev[r6s->failed_num[1]];
                         s->locked++;
@@ -2633,14 +2736,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
                         set_bit(R5_LOCKED, &dev->flags);
                         set_bit(R5_Wantwrite, &dev->flags);
                 }
-
-               if (update_p) {
+               if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
                         dev = &sh->dev[pd_idx];
                         s->locked++;
                         set_bit(R5_LOCKED, &dev->flags);
                         set_bit(R5_Wantwrite, &dev->flags);
                 }
-               if (update_q) {
+               if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
                         dev = &sh->dev[qd_idx];
                         s->locked++;
                         set_bit(R5_LOCKED, &dev->flags);
@@ -2649,6 +2751,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
                 clear_bit(STRIPE_DEGRADED, &sh->state);
  
                 set_bit(STRIPE_INSYNC, &sh->state);
+               break;
+       case check_state_run:
+       case check_state_run_q:
+       case check_state_run_pq:
+               break; /* we will be called again upon completion */
+       case check_state_check_result:
+               sh->check_state = check_state_idle;
+
+               /* handle a successful check operation, if parity is correct
+                * we are done.  Otherwise update the mismatch count and repair
+                * parity if !MD_RECOVERY_CHECK
+                */
+               if (sh->ops.zero_sum_result == 0) {
+                       /* both parities are correct */
+                       if (!s->failed)
+                               set_bit(STRIPE_INSYNC, &sh->state);
+                       else {
+                               /* in contrast to the raid5 case we can validate
+                                * parity, but still have a failure to write
+                                * back
+                                */
+                               sh->check_state = check_state_compute_result;
+                               /* Returning at this point means that we may go
+                                * off and bring p and/or q uptodate again so
+                                * we make sure to check zero_sum_result again
+                                * to verify if p or q need writeback
+                                */
+                       }
+               } else {
+                       conf->mddev->resync_mismatches += STRIPE_SECTORS;
+                       if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+                               /* don't try to repair!! */
+                               set_bit(STRIPE_INSYNC, &sh->state);
+                       else {
+                               int *target = &sh->ops.target;
+
+                               sh->ops.target = -1;
+                               sh->ops.target2 = -1;
+                               sh->check_state = check_state_compute_run;
+                               set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+                               set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+                               if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
+                                       set_bit(R5_Wantcompute,
+                                               &sh->dev[pd_idx].flags);
+                                       *target = pd_idx;
+                                       target = &sh->ops.target2;
+                                       s->uptodate++;
+                               }
+                               if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
+                                       set_bit(R5_Wantcompute,
+                                               &sh->dev[qd_idx].flags);
+                                       *target = qd_idx;
+                                       s->uptodate++;
+                               }
+                       }
+               }
+               break;
+       case check_state_compute_run:
+               break;
+       default:
+               printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
+                      __func__, sh->check_state,
+                      (unsigned long long) sh->sector);
+               BUG();
         }
  }
  
@@ -2666,6 +2832,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
                 if (i != sh->pd_idx && i != sh->qd_idx) {
                         int dd_idx, j;
                         struct stripe_head *sh2;
+                       struct async_submit_ctl submit;
  
                         sector_t bn = compute_blocknr(sh, i, 1);
                         sector_t s = raid5_compute_sector(conf, bn, 0,
@@ -2685,9 +2852,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
                         }
  
                         /* place all the copies on one channel */
+                       init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
                         tx = async_memcpy(sh2->dev[dd_idx].page,
-                               sh->dev[i].page, 0, 0, STRIPE_SIZE,
-                               ASYNC_TX_DEP_ACK, tx, NULL, NULL);
+                                         sh->dev[i].page, 0, 0, STRIPE_SIZE,
+                                         &submit);
  
                         set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
                         set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
@@ -2756,7 +2924,8 @@ static bool handle_stripe5(struct stripe_head *sh)
         rcu_read_lock();
         for (i=disks; i--; ) {
                 mdk_rdev_t *rdev;
-               struct r5dev *dev = &sh->dev[i];
+
+               dev = &sh->dev[i];
                 clear_bit(R5_Insync, &dev->flags);
  
                 pr_debug("check %d: state 0x%lx toread %p read %p write %p "
@@ -2973,7 +3142,7 @@ static bool handle_stripe5(struct stripe_head *sh)
                 /* Need to write out all blocks after computing parity */
                 sh->disks = conf->raid_disks;
                 stripe_set_idx(sh->sector, conf, 0, sh);
-               schedule_reconstruction5(sh, &s, 1, 1);
+               schedule_reconstruction(sh, &s, 1, 1);
         } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
                 clear_bit(STRIPE_EXPAND_READY, &sh->state);
                 atomic_dec(&conf->reshape_stripes);
@@ -2993,7 +3162,7 @@ static bool handle_stripe5(struct stripe_head *sh)
                 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
  
         if (s.ops_request)
-               raid5_run_ops(sh, s.ops_request);
+               raid_run_ops(sh, s.ops_request);
  
         ops_run_io(sh, &s);
  
@@ -3002,7 +3171,7 @@ static bool handle_stripe5(struct stripe_head *sh)
         return blocked_rdev == NULL;
  }
  
-static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
+static bool handle_stripe6(struct stripe_head *sh)
  {
         raid5_conf_t *conf = sh->raid_conf;
         int disks = sh->disks;
@@ -3014,9 +3183,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
         mdk_rdev_t *blocked_rdev = NULL;
  
         pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
-               "pd_idx=%d, qd_idx=%d\n",
+               "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
                (unsigned long long)sh->sector, sh->state,
-              atomic_read(&sh->count), pd_idx, qd_idx);
+              atomic_read(&sh->count), pd_idx, qd_idx,
+              sh->check_state, sh->reconstruct_state);
         memset(&s, 0, sizeof(s));
  
         spin_lock(&sh->lock);
@@ -3036,35 +3206,26 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
  
                 pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
                         i, dev->flags, dev->toread, dev->towrite, dev->written);
-               /* maybe we can reply to a read */
-               if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
-                       struct bio *rbi, *rbi2;
-                       pr_debug("Return read for disc %d\n", i);
-                       spin_lock_irq(&conf->device_lock);
-                       rbi = dev->toread;
-                       dev->toread = NULL;
-                       if (test_and_clear_bit(R5_Overlap, &dev->flags))
-                               wake_up(&conf->wait_for_overlap);
-                       spin_unlock_irq(&conf->device_lock);
-                       while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
-                               copy_data(0, rbi, dev->page, dev->sector);
-                               rbi2 = r5_next_bio(rbi, dev->sector);
-                               spin_lock_irq(&conf->device_lock);
-                               if (!raid5_dec_bi_phys_segments(rbi)) {
-                                       rbi->bi_next = return_bi;
-                                       return_bi = rbi;
-                               }
-                               spin_unlock_irq(&conf->device_lock);
-                               rbi = rbi2;
-                       }
-               }
+               /* maybe we can reply to a read
+                *
+                * new wantfill requests are only permitted while
+                * ops_complete_biofill is guaranteed to be inactive
+                */
+               if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
+                   !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
+                       set_bit(R5_Wantfill, &dev->flags);
  
                 /* now count some things */
                 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
                 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
+               if (test_bit(R5_Wantcompute, &dev->flags)) {
+                       s.compute++;
+                       BUG_ON(s.compute > 2);
+               }
  
-
-               if (dev->toread)
+               if (test_bit(R5_Wantfill, &dev->flags)) {
+                       s.to_fill++;
+               } else if (dev->toread)
                         s.to_read++;
                 if (dev->towrite) {
                         s.to_write++;
@@ -3105,6 +3266,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                 blocked_rdev = NULL;
         }
  
+       if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
+               set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
+               set_bit(STRIPE_BIOFILL_RUN, &sh->state);
+       }
+
         pr_debug("locked=%d uptodate=%d to_read=%d"
                " to_write=%d failed=%d failed_num=%d,%d\n",
                s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -3145,19 +3311,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
          * or to load a block that is being partially written.
          */
         if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
-           (s.syncing && (s.uptodate < disks)) || s.expanding)
+           (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
                 handle_stripe_fill6(sh, &s, &r6s, disks);
  
-       /* now to consider writing and what else, if anything should be read */
-       if (s.to_write)
+       /* Now we check to see if any write operations have recently
+        * completed
+        */
+       if (sh->reconstruct_state == reconstruct_state_drain_result) {
+               int qd_idx = sh->qd_idx;
+
+               sh->reconstruct_state = reconstruct_state_idle;
+               /* All the 'written' buffers and the parity blocks are ready to
+                * be written back to disk
+                */
+               BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
+               BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
+               for (i = disks; i--; ) {
+                       dev = &sh->dev[i];
+                       if (test_bit(R5_LOCKED, &dev->flags) &&
+                           (i == sh->pd_idx || i == qd_idx ||
+                            dev->written)) {
+                               pr_debug("Writing block %d\n", i);
+                               BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
+                               set_bit(R5_Wantwrite, &dev->flags);
+                               if (!test_bit(R5_Insync, &dev->flags) ||
+                                   ((i == sh->pd_idx || i == qd_idx) &&
+                                     s.failed == 0))
+                                       set_bit(STRIPE_INSYNC, &sh->state);
+                       }
+               }
+               if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+                       atomic_dec(&conf->preread_active_stripes);
+                       if (atomic_read(&conf->preread_active_stripes) <
+                               IO_THRESHOLD)
+                               md_wakeup_thread(conf->mddev->thread);
+               }
+       }
+
+       /* Now to consider new write requests and what else, if anything
+        * should be read.  We do not handle new writes when:
+        * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
+        * 2/ A 'check' operation is in flight, as it may clobber the parity
+        *    block.
+        */
+       if (s.to_write && !sh->reconstruct_state && !sh->check_state)
                 handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
  
         /* maybe we need to check and possibly fix the parity for this stripe
          * Any reads will already have been scheduled, so we just see if enough
-        * data is available
+        * data is available.  The parity check is held off while parity
+        * dependent operations are in flight.
          */
-       if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state))
-               handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks);
+       if (sh->check_state ||
+           (s.syncing && s.locked == 0 &&
+            !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
+            !test_bit(STRIPE_INSYNC, &sh->state)))
+               handle_parity_checks6(conf, sh, &s, &r6s, disks);
  
         if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
                 md_done_sync(conf->mddev, STRIPE_SECTORS,1);
@@ -3178,15 +3387,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                                         set_bit(R5_Wantwrite, &dev->flags);
                                         set_bit(R5_ReWrite, &dev->flags);
                                         set_bit(R5_LOCKED, &dev->flags);
+                                       s.locked++;
                                 } else {
                                         /* let's read it back */
                                         set_bit(R5_Wantread, &dev->flags);
                                         set_bit(R5_LOCKED, &dev->flags);
+                                       s.locked++;
                                 }
                         }
                 }
  
-       if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+       /* Finish reconstruct operations initiated by the expansion process */
+       if (sh->reconstruct_state == reconstruct_state_result) {
+               sh->reconstruct_state = reconstruct_state_idle;
+               clear_bit(STRIPE_EXPANDING, &sh->state);
+               for (i = conf->raid_disks; i--; ) {
+                       set_bit(R5_Wantwrite, &sh->dev[i].flags);
+                       set_bit(R5_LOCKED, &sh->dev[i].flags);
+                       s.locked++;
+               }
+       }
+
+       if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
+           !sh->reconstruct_state) {
                 struct stripe_head *sh2
                         = get_active_stripe(conf, sh->sector, 1, 1, 1);
                 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
@@ -3207,14 +3430,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                 /* Need to write out all blocks after computing P&Q */
                 sh->disks = conf->raid_disks;
                 stripe_set_idx(sh->sector, conf, 0, sh);
-               compute_parity6(sh, RECONSTRUCT_WRITE);
-               for (i = conf->raid_disks ; i-- ;  ) {
-                       set_bit(R5_LOCKED, &sh->dev[i].flags);
-                       s.locked++;
-                       set_bit(R5_Wantwrite, &sh->dev[i].flags);
-               }
-               clear_bit(STRIPE_EXPANDING, &sh->state);
-       } else if (s.expanded) {
+               schedule_reconstruction(sh, &s, 1, 1);
+       } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
                 clear_bit(STRIPE_EXPAND_READY, &sh->state);
                 atomic_dec(&conf->reshape_stripes);
                 wake_up(&conf->wait_for_overlap);
@@ -3232,6 +3449,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
         if (unlikely(blocked_rdev))
                 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
  
+       if (s.ops_request)
+               raid_run_ops(sh, s.ops_request);
+
         ops_run_io(sh, &s);
  
         return_io(return_bi);
@@ -3240,16 +3460,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
  }
  
  /* returns true if the stripe was handled */
-static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page)
+static bool handle_stripe(struct stripe_head *sh)
  {
         if (sh->raid_conf->level == 6)
-               return handle_stripe6(sh, tmp_page);
+               return handle_stripe6(sh);
         else
                 return handle_stripe5(sh);
  }
  
-
-
  static void raid5_activate_delayed(raid5_conf_t *conf)
  {
         if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
@@ -3331,6 +3549,9 @@ static int raid5_congested(void *data, int bits)
         /* No difference between reads and writes.  Just check
          * how busy the stripe_cache is
          */
+
+       if (mddev_congested(mddev, bits))
+               return 1;
         if (conf->inactive_blocked)
                 return 1;
         if (conf->quiesce)
@@ -3880,7 +4101,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
         INIT_LIST_HEAD(&stripes);
         for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
                 int j;
-               int skipped = 0;
+               int skipped_disk = 0;
                 sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
                 set_bit(STRIPE_EXPANDING, &sh->state);
                 atomic_inc(&conf->reshape_stripes);
@@ -3896,14 +4117,14 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                                 continue;
                         s = compute_blocknr(sh, j, 0);
                         if (s < raid5_size(mddev, 0, 0)) {
-                               skipped = 1;
+                               skipped_disk = 1;
                                 continue;
                         }
                         memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
                         set_bit(R5_Expanded, &sh->dev[j].flags);
                         set_bit(R5_UPTODATE, &sh->dev[j].flags);
                 }
-               if (!skipped) {
+               if (!skipped_disk) {
                         set_bit(STRIPE_EXPAND_READY, &sh->state);
                         set_bit(STRIPE_HANDLE, &sh->state);
                 }
@@ -4057,7 +4278,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
         spin_unlock(&sh->lock);
  
         /* wait for any blocked device to be handled */
-       while(unlikely(!handle_stripe(sh, NULL)))
+       while (unlikely(!handle_stripe(sh)))
                 ;
         release_stripe(sh);
  
@@ -4114,7 +4335,7 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
                         return handled;
                 }
  
-               handle_stripe(sh, NULL);
+               handle_stripe(sh);
                 release_stripe(sh);
                 handled++;
         }
@@ -4128,6 +4349,36 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
         return handled;
  }
  
+#ifdef CONFIG_MULTICORE_RAID456
+static void __process_stripe(void *param, async_cookie_t cookie)
+{
+       struct stripe_head *sh = param;
+
+       handle_stripe(sh);
+       release_stripe(sh);
+}
+
+static void process_stripe(struct stripe_head *sh, struct list_head *domain)
+{
+       async_schedule_domain(__process_stripe, sh, domain);
+}
+
+static void synchronize_stripe_processing(struct list_head *domain)
+{
+       async_synchronize_full_domain(domain);
+}
+#else
+static void process_stripe(struct stripe_head *sh, struct list_head *domain)
+{
+       handle_stripe(sh);
+       release_stripe(sh);
+       cond_resched();
+}
+
+static void synchronize_stripe_processing(struct list_head *domain)
+{
+}
+#endif
  
  
  /*
@@ -4142,6 +4393,7 @@ static void raid5d(mddev_t *mddev)
         struct stripe_head *sh;
         raid5_conf_t *conf = mddev->private;
         int handled;
+       LIST_HEAD(raid_domain);
  
         pr_debug("+++ raid5d active\n");
  
@@ -4178,8 +4430,7 @@ static void raid5d(mddev_t *mddev)
                 spin_unlock_irq(&conf->device_lock);
                 
                 handled++;
-               handle_stripe(sh, conf->spare_page);
-               release_stripe(sh);
+               process_stripe(sh, &raid_domain);
  
                 spin_lock_irq(&conf->device_lock);
         }
@@ -4187,6 +4438,7 @@ static void raid5d(mddev_t *mddev)
  
         spin_unlock_irq(&conf->device_lock);
  
+       synchronize_stripe_processing(&raid_domain);
         async_tx_issue_pending_all();
         unplug_slaves(mddev);
  
@@ -4319,15 +4571,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
         return sectors * (raid_disks - conf->max_degraded);
  }
  
+static void raid5_free_percpu(raid5_conf_t *conf)
+{
+       struct raid5_percpu *percpu;
+       unsigned long cpu;
+
+       if (!conf->percpu)
+               return;
+
+       get_online_cpus();
+       for_each_possible_cpu(cpu) {
+               percpu = per_cpu_ptr(conf->percpu, cpu);
+               safe_put_page(percpu->spare_page);
+               kfree(percpu->scribble);
+       }
+#ifdef CONFIG_HOTPLUG_CPU
+       unregister_cpu_notifier(&conf->cpu_notify);
+#endif
+       put_online_cpus();
+
+       free_percpu(conf->percpu);
+}
+
  static void free_conf(raid5_conf_t *conf)
  {
         shrink_stripes(conf);
-       safe_put_page(conf->spare_page);
+       raid5_free_percpu(conf);
         kfree(conf->disks);
         kfree(conf->stripe_hashtbl);
         kfree(conf);
  }
  
+#ifdef CONFIG_HOTPLUG_CPU
+static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
+                             void *hcpu)
+{
+       raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify);
+       long cpu = (long)hcpu;
+       struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
+
+       switch (action) {
+       case CPU_UP_PREPARE:
+       case CPU_UP_PREPARE_FROZEN:
+               if (conf->level == 6 && !percpu->spare_page)
+                       percpu->spare_page = alloc_page(GFP_KERNEL);
+               if (!percpu->scribble)
+                       percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
+
+               if (!percpu->scribble ||
+                   (conf->level == 6 && !percpu->spare_page)) {
+                       safe_put_page(percpu->spare_page);
+                       kfree(percpu->scribble);
+                       pr_err("%s: failed memory allocation for cpu%ld\n",
+                              __func__, cpu);
+                       return NOTIFY_BAD;
+               }
+               break;
+       case CPU_DEAD:
+       case CPU_DEAD_FROZEN:
+               safe_put_page(percpu->spare_page);
+               kfree(percpu->scribble);
+               percpu->spare_page = NULL;
+               percpu->scribble = NULL;
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+}
+#endif
+
+static int raid5_alloc_percpu(raid5_conf_t *conf)
+{
+       unsigned long cpu;
+       struct page *spare_page;
+       struct raid5_percpu *allcpus;
+       void *scribble;
+       int err;
+
+       allcpus = alloc_percpu(struct raid5_percpu);
+       if (!allcpus)
+               return -ENOMEM;
+       conf->percpu = allcpus;
+
+       get_online_cpus();
+       err = 0;
+       for_each_present_cpu(cpu) {
+               if (conf->level == 6) {
+                       spare_page = alloc_page(GFP_KERNEL);
+                       if (!spare_page) {
+                               err = -ENOMEM;
+                               break;
+                       }
+                       per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
+               }
+               scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
+               if (!scribble) {
+                       err = -ENOMEM;
+                       break;
+               }
+               per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
+       }
+#ifdef CONFIG_HOTPLUG_CPU
+       conf->cpu_notify.notifier_call = raid456_cpu_notify;
+       conf->cpu_notify.priority = 0;
+       if (err == 0)
+               err = register_cpu_notifier(&conf->cpu_notify);
+#endif
+       put_online_cpus();
+
+       return err;
+}
+
  static raid5_conf_t *setup_conf(mddev_t *mddev)
  {
         raid5_conf_t *conf;
@@ -4369,6 +4724,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
                 goto abort;
  
         conf->raid_disks = mddev->raid_disks;
+       conf->scribble_len = scribble_len(conf->raid_disks);
         if (mddev->reshape_position == MaxSector)
                 conf->previous_raid_disks = mddev->raid_disks;
         else
@@ -4384,11 +4740,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
         if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
                 goto abort;
  
-       if (mddev->new_level == 6) {
-               conf->spare_page = alloc_page(GFP_KERNEL);
-               if (!conf->spare_page)
-                       goto abort;
-       }
+       conf->level = mddev->new_level;
+       if (raid5_alloc_percpu(conf) != 0)
+               goto abort;
+
         spin_lock_init(&conf->device_lock);
         init_waitqueue_head(&conf->wait_for_stripe);
         init_waitqueue_head(&conf->wait_for_overlap);
@@ -4447,7 +4802,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
                 printk(KERN_INFO "raid5: allocated %dkB for %s\n",
                         memory, mdname(mddev));
  
-       conf->thread = md_register_thread(raid5d, mddev, "%s_raid5");
+       conf->thread = md_register_thread(raid5d, mddev, NULL);
         if (!conf->thread) {
                 printk(KERN_ERR
                        "raid5: couldn't allocate thread for %s\n",
@@ -4613,7 +4968,7 @@ static int run(mddev_t *mddev)
                 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
                 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
                 mddev->sync_thread = md_register_thread(md_do_sync, mddev,
-                                                       "%s_reshape");
+                                                       "reshape");
         }
  
         /* read-ahead size must cover two whole stripes, which is
@@ -5031,7 +5386,7 @@ static int raid5_start_reshape(mddev_t *mddev)
         set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
         set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
         mddev->sync_thread = md_register_thread(md_do_sync, mddev,
-                                               "%s_reshape");
+                                               "reshape");
         if (!mddev->sync_thread) {
                 mddev->recovery = 0;
                 spin_lock_irq(&conf->device_lock);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h

index 9459689c4ea00a50073cdcfca38ee038bda10364..2390e0e83daf7c939344f8062a9d4a6ef737c476 100644 (file)
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -2,6 +2,7 @@
  #define _RAID5_H
  
  #include <linux/raid/xor.h>
+#include <linux/dmaengine.h>
  
  /*
   *
@@ -175,7 +176,9 @@
   */
  enum check_states {
         check_state_idle = 0,
-       check_state_run, /* parity check */
+       check_state_run, /* xor parity check */
+       check_state_run_q, /* q-parity check */
+       check_state_run_pq, /* pq dual parity check */
         check_state_check_result,
         check_state_compute_run, /* parity repair */
         check_state_compute_result,
@@ -215,8 +218,8 @@ struct stripe_head {
          * @target - STRIPE_OP_COMPUTE_BLK target
          */
         struct stripe_operations {
-               int                target;
-               u32                zero_sum_result;
+               int                  target, target2;
+               enum sum_check_flags zero_sum_result;
         } ops;
         struct r5dev {
                 struct bio      req;
@@ -298,7 +301,7 @@ struct r6_state {
  #define STRIPE_OP_COMPUTE_BLK  1
  #define STRIPE_OP_PREXOR       2
  #define STRIPE_OP_BIODRAIN     3
-#define STRIPE_OP_POSTXOR      4
+#define STRIPE_OP_RECONSTRUCT  4
  #define STRIPE_OP_CHECK        5
  
  /*
@@ -385,8 +388,21 @@ struct raid5_private_data {
                                             * (fresh device added).
                                             * Cleared when a sync completes.
                                             */
-
-       struct page             *spare_page; /* Used when checking P/Q in raid6 */
+       /* per cpu variables */
+       struct raid5_percpu {
+               struct page     *spare_page; /* Used when checking P/Q in raid6 */
+               void            *scribble;   /* space for constructing buffer
+                                             * lists and performing address
+                                             * conversions
+                                             */
+       } *percpu;
+       size_t                  scribble_len; /* size of scribble region must be
+                                              * associated with conf to handle
+                                              * cpu hotplug while reshaping
+                                              */
+#ifdef CONFIG_HOTPLUG_CPU
+       struct notifier_block   cpu_notify;
+#endif
  
         /*
          * Free stripes pool
diff --git a/drivers/media/dvb/dvb-core/dvbdev.h b/drivers/media/dvb/dvb-core/dvbdev.h

index 895e2efca8a974a3c2996b51b5b86ef8a644b76a..01fc704847434fb1faa08e61c09053e053ecaff3 100644 (file)
--- a/drivers/media/dvb/dvb-core/dvbdev.h
+++ b/drivers/media/dvb/dvb-core/dvbdev.h
@@ -31,10 +31,9 @@
  #define DVB_MAJOR 212
  
  #if defined(CONFIG_DVB_MAX_ADAPTERS) && CONFIG_DVB_MAX_ADAPTERS > 0
-#define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS
+  #define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS
  #else
-#warning invalid CONFIG_DVB_MAX_ADAPTERS value
-#define DVB_MAX_ADAPTERS 8
+  #define DVB_MAX_ADAPTERS 8
  #endif
  
  #define DVB_UNSET (-1)
diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig

index 0e4b97fba384bb837bd1946d1fc502ce03f46dc2..9744b0692417597814f55f3d0a2d8057198e27d9 100644 (file)
--- a/drivers/media/dvb/dvb-usb/Kconfig
+++ b/drivers/media/dvb/dvb-usb/Kconfig
@@ -75,7 +75,7 @@ config DVB_USB_DIB0700
         select DVB_DIB3000MC if !DVB_FE_CUSTOMISE
         select DVB_S5H1411 if !DVB_FE_CUSTOMISE
         select DVB_LGDT3305 if !DVB_FE_CUSTOMISE
-       select DVB_TUNER_DIB0070 if !DVB_FE_CUSTOMISE
+       select DVB_TUNER_DIB0070
         select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMISE
         select MEDIA_TUNER_MT2266 if !MEDIA_TUNER_CUSTOMISE
         select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE
diff --git a/drivers/media/video/saa7164/saa7164-api.c b/drivers/media/video/saa7164/saa7164-api.c

index bb6df1b276bee161eeca7b0a6f3a0fb3627c994c..6f094a96ac810afbfbad67667b50bcd70cdca7d9 100644 (file)
--- a/drivers/media/video/saa7164/saa7164-api.c
+++ b/drivers/media/video/saa7164/saa7164-api.c
@@ -415,7 +415,7 @@ int saa7164_api_enum_subdevs(struct saa7164_dev *dev)
                 goto out;
         }
  
-       if (debug & DBGLVL_API)
+       if (saa_debug & DBGLVL_API)
                 saa7164_dumphex16(dev, buf, (buflen/16)*16);
  
         saa7164_api_dump_subdevs(dev, buf, buflen);
@@ -480,7 +480,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg,
  
         dprintk(DBGLVL_API, "%s() len = %d bytes\n", __func__, len);
  
-       if (debug & DBGLVL_I2C)
+       if (saa_debug & DBGLVL_I2C)
                 saa7164_dumphex16(dev, buf, 2 * 16);
  
         ret = saa7164_cmd_send(bus->dev, unitid, GET_CUR,
@@ -488,7 +488,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg,
         if (ret != SAA_OK)
                 printk(KERN_ERR "%s() error, ret(2) = 0x%x\n", __func__, ret);
         else {
-               if (debug & DBGLVL_I2C)
+               if (saa_debug & DBGLVL_I2C)
                         saa7164_dumphex16(dev, buf, sizeof(buf));
                 memcpy(data, (buf + 2 * sizeof(u32) + reglen), datalen);
         }
@@ -548,7 +548,7 @@ int saa7164_api_i2c_write(struct saa7164_i2c *bus, u8 addr, u32 datalen,
         *((u32 *)(buf + 1 * sizeof(u32))) = datalen - reglen;
         memcpy((buf + 2 * sizeof(u32)), data, datalen);
  
-       if (debug & DBGLVL_I2C)
+       if (saa_debug & DBGLVL_I2C)
                 saa7164_dumphex16(dev, buf, sizeof(buf));
  
         ret = saa7164_cmd_send(bus->dev, unitid, SET_CUR,
diff --git a/drivers/media/video/saa7164/saa7164-cmd.c b/drivers/media/video/saa7164/saa7164-cmd.c

index e097f1a0969a26441a0aa1a02df5136aaf1d4460..c45966edc0cf2f202ea4a38e158506390f145f46 100644 (file)
--- a/drivers/media/video/saa7164/saa7164-cmd.c
+++ b/drivers/media/video/saa7164/saa7164-cmd.c
@@ -250,7 +250,7 @@ int saa7164_cmd_wait(struct saa7164_dev *dev, u8 seqno)
         unsigned long stamp;
         int r;
  
-       if (debug >= 4)
+       if (saa_debug >= 4)
                 saa7164_bus_dump(dev);
  
         dprintk(DBGLVL_CMD, "%s(seqno=%d)\n", __func__, seqno);
diff --git a/drivers/media/video/saa7164/saa7164-core.c b/drivers/media/video/saa7164/saa7164-core.c

index f0dbead188c893741c2bbdb6dc55035ee36436e1..709affc31042571fdc66efd0d99b72673451869b 100644 (file)
--- a/drivers/media/video/saa7164/saa7164-core.c
+++ b/drivers/media/video/saa7164/saa7164-core.c
@@ -45,8 +45,8 @@ MODULE_LICENSE("GPL");
   32 bus
   */
  
-unsigned int debug;
-module_param(debug, int, 0644);
+unsigned int saa_debug;
+module_param_named(debug, saa_debug, int, 0644);
  MODULE_PARM_DESC(debug, "enable debug messages");
  
  unsigned int waitsecs = 10;
@@ -653,7 +653,7 @@ static int __devinit saa7164_initdev(struct pci_dev *pci_dev,
                 printk(KERN_ERR "%s() Unsupported board detected, "
                         "registering without firmware\n", __func__);
  
-       dprintk(1, "%s() parameter debug = %d\n", __func__, debug);
+       dprintk(1, "%s() parameter debug = %d\n", __func__, saa_debug);
         dprintk(1, "%s() parameter waitsecs = %d\n", __func__, waitsecs);
  
  fail_fw:
diff --git a/drivers/media/video/saa7164/saa7164.h b/drivers/media/video/saa7164/saa7164.h

index 6753008a9c9be4a47e35abf04a20e558157cc1f0..42660b546f0e811f528b7c3af1c8959c2e8cb0e4 100644 (file)
--- a/drivers/media/video/saa7164/saa7164.h
+++ b/drivers/media/video/saa7164/saa7164.h
@@ -375,9 +375,9 @@ extern int saa7164_buffer_dealloc(struct saa7164_tsport *port,
  
  /* ----------------------------------------------------------- */
  
-extern unsigned int debug;
+extern unsigned int saa_debug;
  #define dprintk(level, fmt, arg...)\
-       do { if (debug & level)\
+       do { if (saa_debug & level)\
                 printk(KERN_DEBUG "%s: " fmt, dev->name, ## arg);\
         } while (0)
  
diff --git a/drivers/media/video/usbvision/usbvision-core.c b/drivers/media/video/usbvision/usbvision-core.c

index 6ba16abeebdd61af7c03e310a7e6b0b0441056b1..e0f91e4ab653ccb975676a908e6f2b11db5eedd0 100644 (file)
--- a/drivers/media/video/usbvision/usbvision-core.c
+++ b/drivers/media/video/usbvision/usbvision-core.c
@@ -28,7 +28,6 @@
  #include <linux/timer.h>
  #include <linux/slab.h>
  #include <linux/mm.h>
-#include <linux/utsname.h>
  #include <linux/highmem.h>
  #include <linux/vmalloc.h>
  #include <linux/module.h>
diff --git a/drivers/media/video/usbvision/usbvision-i2c.c b/drivers/media/video/usbvision/usbvision-i2c.c

index f97fd06d594858de44f8bf4d11e51ddb3cebb807..c19f51dba2ee28b3d0827c93df30044ebfe69766 100644 (file)
--- a/drivers/media/video/usbvision/usbvision-i2c.c
+++ b/drivers/media/video/usbvision/usbvision-i2c.c
@@ -28,7 +28,6 @@
  #include <linux/module.h>
  #include <linux/delay.h>
  #include <linux/slab.h>
-#include <linux/utsname.h>
  #include <linux/init.h>
  #include <asm/uaccess.h>
  #include <linux/ioport.h>
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c

index 90d9b5c0e9a7bc407806e357a32988ed61df5b0e..a2a50d608a3fd0d061b655119cbf8d4627b0be05 100644 (file)
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -52,7 +52,6 @@
  #include <linux/slab.h>
  #include <linux/smp_lock.h>
  #include <linux/mm.h>
-#include <linux/utsname.h>
  #include <linux/highmem.h>
  #include <linux/vmalloc.h>
  #include <linux/module.h>
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c

index a5b448ea4eab54372a2263688cf04d2243775356..b3bf1c44d74d92c67452b96e89cf4d9741d68d4f 100644 (file)
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -339,9 +339,9 @@ static int h_memstick_read_dev_id(struct memstick_dev *card,
                         card->id.type = id_reg.type;
                         card->id.category = id_reg.category;
                         card->id.class = id_reg.class;
+                       dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode);
                 }
                 complete(&card->mrq_complete);
-               dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode);
                 return -EAGAIN;
         }
  }
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c

index 79689b10f937e0b8405a6a3568d5503c532d64a9..766e21e15574c75ceba18828a4d492ac6a90a189 100644 (file)
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -937,6 +937,8 @@ static int quicktest1(unsigned long arg)
  
         /* Need  1K cacheline aligned that does not cross page boundary */
         p = kmalloc(4096, 0);
+       if (p == NULL)
+               return -ENOMEM;
         mq = ALIGNUP(p, 1024);
         memset(mes, 0xee, sizeof(mes));
         dw = mq;
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c

index 9cbf95bedce6ba6b35a6078bf792ea403a18ae55..ccd4408a26c73efe3efa11753ac64dbed0a55936 100644 (file)
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -340,10 +340,9 @@ static struct proc_dir_entry *proc_gru __read_mostly;
  
  static int create_proc_file(struct proc_entry *p)
  {
-       p->entry = create_proc_entry(p->name, p->mode, proc_gru);
+       p->entry = proc_create(p->name, p->mode, proc_gru, p->fops);
         if (!p->entry)
                 return -1;
-       p->entry->proc_fops = p->fops;
         return 0;
  }
  
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c

index 065fa818be5750a8b59dc7d12a53cb217933b5d0..fc25586b7ee1c94575e5674f89c3382c2a7ffd88 100644 (file)
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -599,6 +599,7 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
         struct scatterlist              *sg;
         unsigned int                    i;
         enum dma_data_direction         direction;
+       unsigned int                    sglen;
  
         /*
          * We don't do DMA on "complex" transfers, i.e. with
@@ -628,11 +629,14 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
         else
                 direction = DMA_TO_DEVICE;
  
+       sglen = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len, direction);
+       if (sglen != data->sg_len)
+               goto unmap_exit;
         desc = chan->device->device_prep_slave_sg(chan,
                         data->sg, data->sg_len, direction,
                         DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
         if (!desc)
-               return -ENOMEM;
+               goto unmap_exit;
  
         host->dma.data_desc = desc;
         desc->callback = atmci_dma_complete;
@@ -643,6 +647,9 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
         chan->device->device_issue_pending(chan);
  
         return 0;
+unmap_exit:
+       dma_unmap_sg(&host->pdev->dev, data->sg, sglen, direction);
+       return -ENOMEM;
  }
  
  #else /* CONFIG_MMC_ATMELMCI_DMA */
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig

index e4ec3659759a91acc38cedefb0160c3c0640c5e1..ecf90f5c97c2bce84983d2567dc7933c8bb060c4 100644 (file)
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -159,7 +159,7 @@ config MTD_AFS_PARTS
  
  config MTD_OF_PARTS
         tristate "Flash partition map based on OF description"
-       depends on PPC_OF && MTD_PARTITIONS
+       depends on (MICROBLAZE || PPC_OF) && MTD_PARTITIONS
         help
           This provides a partition parsing function which derives
           the partition map from the children of the flash node,
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig

index 3a9a960644b69464c45236803db90b0e832cdf17..841e085ab74ab198fafe9e801e5a3df2307f0da3 100644 (file)
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -74,7 +74,7 @@ config MTD_PHYSMAP_BANKWIDTH
  
  config MTD_PHYSMAP_OF
         tristate "Flash device in physical memory map based on OF description"
-       depends on PPC_OF && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM)
+       depends on (MICROBLAZE || PPC_OF) && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM)
         help
           This provides a 'mapping' driver which allows the NOR Flash and
           ROM driver code to communicate with chips which are mapped
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c

index 7adff4d0960ddc09d3b430b1ac88b596bcf85fc4..b9eeadf01b749cb9ae223ee6f63ba63e685db98e 100644 (file)
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -813,10 +813,10 @@ static int vortex_suspend(struct pci_dev *pdev, pm_message_t state)
                 if (netif_running(dev)) {
                         netif_device_detach(dev);
                         vortex_down(dev, 1);
+                       disable_irq(dev->irq);
                 }
                 pci_save_state(pdev);
                 pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
-               free_irq(dev->irq, dev);
                 pci_disable_device(pdev);
                 pci_set_power_state(pdev, pci_choose_state(pdev, state));
         }
@@ -839,18 +839,12 @@ static int vortex_resume(struct pci_dev *pdev)
                         return err;
                 }
                 pci_set_master(pdev);
-               if (request_irq(dev->irq, vp->full_bus_master_rx ?
-                               &boomerang_interrupt : &vortex_interrupt, IRQF_SHARED, dev->name, dev)) {
-                       pr_warning("%s: Could not reserve IRQ %d\n", dev->name, dev->irq);
-                       pci_disable_device(pdev);
-                       return -EBUSY;
-               }
                 if (netif_running(dev)) {
                         err = vortex_up(dev);
                         if (err)
                                 return err;
-                       else
-                               netif_device_attach(dev);
+                       enable_irq(dev->irq);
+                       netif_device_attach(dev);
                 }
         }
         return 0;
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c

index 462d9f59c53aacfcb980b8326c005e6a53c10f48..83a1922e68e0a8750705c03087786c71f4cea084 100644 (file)
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -87,7 +87,7 @@
  
  /* These identify the driver base version and may not be removed. */
  static char version[] =
-KERN_INFO DRV_NAME ": 10/100 PCI Ethernet driver v" DRV_VERSION " (" DRV_RELDATE ")\n";
+DRV_NAME ": 10/100 PCI Ethernet driver v" DRV_VERSION " (" DRV_RELDATE ")\n";
  
  MODULE_AUTHOR("Jeff Garzik <jgarzik@pobox.com>");
  MODULE_DESCRIPTION("RealTek RTL-8139C+ series 10/100 PCI Ethernet driver");
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig

index ed5741b2e7010a4ba9e6d1f76dded430e03111a9..2bea67c134f022456a15fdad71fcd3f089ca52d2 100644 (file)
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -1875,7 +1875,7 @@ config 68360_ENET
  
  config FEC
         bool "FEC ethernet controller (of ColdFire and some i.MX CPUs)"
-       depends on M523x || M527x || M5272 || M528x || M520x || M532x || MACH_MX27 || ARCH_MX35
+       depends on M523x || M527x || M5272 || M528x || M520x || M532x || MACH_MX27 || ARCH_MX35 || ARCH_MX25
         help
           Say Y here if you want to use the built-in 10/100 Fast ethernet
           controller on some Motorola ColdFire and Freescale i.MX processors.
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c

index be2c6cfe6e84cc55e7a71b2c63adad1057dc2a4b..1372e9a99f5b29eb8e92aefc39404234977ceae0 100644 (file)
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -2296,7 +2296,7 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
         u32 ctrl;
         u32 mac_ctrl_data;
         u32 master_ctrl_data;
-       u32 wol_ctrl_data;
+       u32 wol_ctrl_data = 0;
         u16 mii_bmsr_data;
         u16 save_autoneg_advertised;
         u16 mii_intr_status_data;
diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig

index 0900743724624f5e39e8a83622d653a7b29b4d31..df32c109b7acee953f48eef55ddd7b6100cc1e40 100644 (file)
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -75,6 +75,13 @@ config CAN_EMS_PCI
           CPC-PCIe and CPC-104P cards from EMS Dr. Thomas Wuensche
           (http://www.ems-wuensche.de).
  
+config CAN_EMS_USB
+       tristate "EMS CPC-USB/ARM7 CAN/USB interface"
+       depends on USB && CAN_DEV
+       ---help---
+         This driver is for the one channel CPC-USB/ARM7 CAN/USB interface
+         from from EMS Dr. Thomas Wuensche (http://www.ems-wuensche.de).
+
  config CAN_KVASER_PCI
         tristate "Kvaser PCIcanx and Kvaser PCIcan PCI Cards"
         depends on PCI && CAN_SJA1000
@@ -82,6 +89,12 @@ config CAN_KVASER_PCI
           This driver is for the the PCIcanx and PCIcan cards (1, 2 or
           4 channel) from Kvaser (http://www.kvaser.com).
  
+config CAN_AT91
+       tristate "Atmel AT91 onchip CAN controller"
+       depends on CAN && CAN_DEV && ARCH_AT91SAM9263
+       ---help---
+         This is a driver for the SoC CAN controller in Atmel's AT91SAM9263.
+
  config CAN_DEBUG_DEVICES
         bool "CAN devices debugging messages"
         depends on CAN
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile

index 523a941b358b1bd47aaa36888142b25963e7107d..0dea62721f2f52a1f67e4762eb31478b1067542f 100644 (file)
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -7,6 +7,9 @@ obj-$(CONFIG_CAN_VCAN)          += vcan.o
  obj-$(CONFIG_CAN_DEV)          += can-dev.o
  can-dev-y                      := dev.o
  
+obj-y                          += usb/
+
  obj-$(CONFIG_CAN_SJA1000)      += sja1000/
+obj-$(CONFIG_CAN_AT91)         += at91_can.o
  
  ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/sja1000/ems_pci.c b/drivers/net/can/sja1000/ems_pci.c

index 7d84b8ac9c1c0f2f206635e9b5c317d4e5b69e5a..fd04789d33707a8db769d88a099374deff3089ca 100644 (file)
--- a/drivers/net/can/sja1000/ems_pci.c
+++ b/drivers/net/can/sja1000/ems_pci.c
@@ -94,12 +94,14 @@ struct ems_pci_card {
  #define EMS_PCI_CDR             (CDR_CBP | CDR_CLKOUT_MASK)
  
  #define EMS_PCI_V1_BASE_BAR     1
-#define EMS_PCI_V1_MEM_SIZE     4096
+#define EMS_PCI_V1_CONF_SIZE    4096 /* size of PITA control area */
  #define EMS_PCI_V2_BASE_BAR     2
-#define EMS_PCI_V2_MEM_SIZE     128
+#define EMS_PCI_V2_CONF_SIZE    128 /* size of PLX control area */
  #define EMS_PCI_CAN_BASE_OFFSET 0x400 /* offset where the controllers starts */
  #define EMS_PCI_CAN_CTRL_SIZE   0x200 /* memory size for each controller */
  
+#define EMS_PCI_BASE_SIZE  4096 /* size of controller area */
+
  static struct pci_device_id ems_pci_tbl[] = {
         /* CPC-PCI v1 */
         {PCI_VENDOR_ID_SIEMENS, 0x2104, PCI_ANY_ID, PCI_ANY_ID,},
@@ -224,7 +226,7 @@ static int __devinit ems_pci_add_card(struct pci_dev *pdev,
         struct sja1000_priv *priv;
         struct net_device *dev;
         struct ems_pci_card *card;
-       int max_chan, mem_size, base_bar;
+       int max_chan, conf_size, base_bar;
         int err, i;
  
         /* Enabling PCI device */
@@ -251,22 +253,22 @@ static int __devinit ems_pci_add_card(struct pci_dev *pdev,
                 card->version = 2; /* CPC-PCI v2 */
                 max_chan = EMS_PCI_V2_MAX_CHAN;
                 base_bar = EMS_PCI_V2_BASE_BAR;
-               mem_size = EMS_PCI_V2_MEM_SIZE;
+               conf_size = EMS_PCI_V2_CONF_SIZE;
         } else {
                 card->version = 1; /* CPC-PCI v1 */
                 max_chan = EMS_PCI_V1_MAX_CHAN;
                 base_bar = EMS_PCI_V1_BASE_BAR;
-               mem_size = EMS_PCI_V1_MEM_SIZE;
+               conf_size = EMS_PCI_V1_CONF_SIZE;
         }
  
         /* Remap configuration space and controller memory area */
-       card->conf_addr = pci_iomap(pdev, 0, mem_size);
+       card->conf_addr = pci_iomap(pdev, 0, conf_size);
         if (card->conf_addr == NULL) {
                 err = -ENOMEM;
                 goto failure_cleanup;
         }
  
-       card->base_addr = pci_iomap(pdev, base_bar, mem_size);
+       card->base_addr = pci_iomap(pdev, base_bar, EMS_PCI_BASE_SIZE);
         if (card->base_addr == NULL) {
                 err = -ENOMEM;
                 goto failure_cleanup;
diff --git a/drivers/net/can/usb/Makefile b/drivers/net/can/usb/Makefile

new file mode 100644 (file)

index 0000000..c3f75ba
--- /dev/null
+++ b/drivers/net/can/usb/Makefile
@@ -0,0 +1,5 @@
+#
+#  Makefile for the Linux Controller Area Network USB drivers.
+#
+
+obj-$(CONFIG_CAN_EMS_USB) += ems_usb.o
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c

new file mode 100644 (file)

index 0000000..9012e0a
--- /dev/null
+++ b/drivers/net/can/usb/ems_usb.c
@@ -0,0 +1,1155 @@
+/*
+ * CAN driver for EMS Dr. Thomas Wuensche CPC-USB/ARM7
+ *
+ * Copyright (C) 2004-2009 EMS Dr. Thomas Wuensche
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <linux/init.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/usb.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+
+MODULE_AUTHOR("Sebastian Haas <haas@ems-wuensche.com>");
+MODULE_DESCRIPTION("CAN driver for EMS Dr. Thomas Wuensche CAN/USB interfaces");
+MODULE_LICENSE("GPL v2");
+
+/* Control-Values for CPC_Control() Command Subject Selection */
+#define CONTR_CAN_MESSAGE 0x04
+#define CONTR_CAN_STATE   0x0C
+#define CONTR_BUS_ERROR   0x1C
+
+/* Control Command Actions */
+#define CONTR_CONT_OFF 0
+#define CONTR_CONT_ON  1
+#define CONTR_ONCE     2
+
+/* Messages from CPC to PC */
+#define CPC_MSG_TYPE_CAN_FRAME       1  /* CAN data frame */
+#define CPC_MSG_TYPE_RTR_FRAME       8  /* CAN remote frame */
+#define CPC_MSG_TYPE_CAN_PARAMS      12 /* Actual CAN parameters */
+#define CPC_MSG_TYPE_CAN_STATE       14 /* CAN state message */
+#define CPC_MSG_TYPE_EXT_CAN_FRAME   16 /* Extended CAN data frame */
+#define CPC_MSG_TYPE_EXT_RTR_FRAME   17 /* Extended remote frame */
+#define CPC_MSG_TYPE_CONTROL         19 /* change interface behavior */
+#define CPC_MSG_TYPE_CONFIRM         20 /* command processed confirmation */
+#define CPC_MSG_TYPE_OVERRUN         21 /* overrun events */
+#define CPC_MSG_TYPE_CAN_FRAME_ERROR 23 /* detected bus errors */
+#define CPC_MSG_TYPE_ERR_COUNTER     25 /* RX/TX error counter */
+
+/* Messages from the PC to the CPC interface  */
+#define CPC_CMD_TYPE_CAN_FRAME     1   /* CAN data frame */
+#define CPC_CMD_TYPE_CONTROL       3   /* control of interface behavior */
+#define CPC_CMD_TYPE_CAN_PARAMS    6   /* set CAN parameters */
+#define CPC_CMD_TYPE_RTR_FRAME     13  /* CAN remote frame */
+#define CPC_CMD_TYPE_CAN_STATE     14  /* CAN state message */
+#define CPC_CMD_TYPE_EXT_CAN_FRAME 15  /* Extended CAN data frame */
+#define CPC_CMD_TYPE_EXT_RTR_FRAME 16  /* Extended CAN remote frame */
+#define CPC_CMD_TYPE_CAN_EXIT      200 /* exit the CAN */
+
+#define CPC_CMD_TYPE_INQ_ERR_COUNTER 25 /* request the CAN error counters */
+#define CPC_CMD_TYPE_CLEAR_MSG_QUEUE 8  /* clear CPC_MSG queue */
+#define CPC_CMD_TYPE_CLEAR_CMD_QUEUE 28 /* clear CPC_CMD queue */
+
+#define CPC_CC_TYPE_SJA1000 2 /* Philips basic CAN controller */
+
+#define CPC_CAN_ECODE_ERRFRAME 0x01 /* Ecode type */
+
+/* Overrun types */
+#define CPC_OVR_EVENT_CAN       0x01
+#define CPC_OVR_EVENT_CANSTATE  0x02
+#define CPC_OVR_EVENT_BUSERROR  0x04
+
+/*
+ * If the CAN controller lost a message we indicate it with the highest bit
+ * set in the count field.
+ */
+#define CPC_OVR_HW 0x80
+
+/* Size of the "struct ems_cpc_msg" without the union */
+#define CPC_MSG_HEADER_LEN   11
+#define CPC_CAN_MSG_MIN_SIZE 5
+
+/* Define these values to match your devices */
+#define USB_CPCUSB_VENDOR_ID 0x12D6
+
+#define USB_CPCUSB_ARM7_PRODUCT_ID 0x0444
+
+/* Mode register NXP LPC2119/SJA1000 CAN Controller */
+#define SJA1000_MOD_NORMAL 0x00
+#define SJA1000_MOD_RM     0x01
+
+/* ECC register NXP LPC2119/SJA1000 CAN Controller */
+#define SJA1000_ECC_SEG   0x1F
+#define SJA1000_ECC_DIR   0x20
+#define SJA1000_ECC_ERR   0x06
+#define SJA1000_ECC_BIT   0x00
+#define SJA1000_ECC_FORM  0x40
+#define SJA1000_ECC_STUFF 0x80
+#define SJA1000_ECC_MASK  0xc0
+
+/* Status register content */
+#define SJA1000_SR_BS 0x80
+#define SJA1000_SR_ES 0x40
+
+#define SJA1000_DEFAULT_OUTPUT_CONTROL 0xDA
+
+/*
+ * The device actually uses a 16MHz clock to generate the CAN clock
+ * but it expects SJA1000 bit settings based on 8MHz (is internally
+ * converted).
+ */
+#define EMS_USB_ARM7_CLOCK 8000000
+
+/*
+ * CAN-Message representation in a CPC_MSG. Message object type is
+ * CPC_MSG_TYPE_CAN_FRAME or CPC_MSG_TYPE_RTR_FRAME or
+ * CPC_MSG_TYPE_EXT_CAN_FRAME or CPC_MSG_TYPE_EXT_RTR_FRAME.
+ */
+struct cpc_can_msg {
+       u32 id;
+       u8 length;
+       u8 msg[8];
+};
+
+/* Representation of the CAN parameters for the SJA1000 controller */
+struct cpc_sja1000_params {
+       u8 mode;
+       u8 acc_code0;
+       u8 acc_code1;
+       u8 acc_code2;
+       u8 acc_code3;
+       u8 acc_mask0;
+       u8 acc_mask1;
+       u8 acc_mask2;
+       u8 acc_mask3;
+       u8 btr0;
+       u8 btr1;
+       u8 outp_contr;
+};
+
+/* CAN params message representation */
+struct cpc_can_params {
+       u8 cc_type;
+
+       /* Will support M16C CAN controller in the future */
+       union {
+               struct cpc_sja1000_params sja1000;
+       } cc_params;
+};
+
+/* Structure for confirmed message handling */
+struct cpc_confirm {
+       u8 error; /* error code */
+};
+
+/* Structure for overrun conditions */
+struct cpc_overrun {
+       u8 event;
+       u8 count;
+};
+
+/* SJA1000 CAN errors (compatible to NXP LPC2119) */
+struct cpc_sja1000_can_error {
+       u8 ecc;
+       u8 rxerr;
+       u8 txerr;
+};
+
+/* structure for CAN error conditions */
+struct cpc_can_error {
+       u8 ecode;
+
+       struct {
+               u8 cc_type;
+
+               /* Other controllers may also provide error code capture regs */
+               union {
+                       struct cpc_sja1000_can_error sja1000;
+               } regs;
+       } cc;
+};
+
+/*
+ * Structure containing RX/TX error counter. This structure is used to request
+ * the values of the CAN controllers TX and RX error counter.
+ */
+struct cpc_can_err_counter {
+       u8 rx;
+       u8 tx;
+};
+
+/* Main message type used between library and application */
+struct __attribute__ ((packed)) ems_cpc_msg {
+       u8 type;        /* type of message */
+       u8 length;      /* length of data within union 'msg' */
+       u8 msgid;       /* confirmation handle */
+       u32 ts_sec;     /* timestamp in seconds */
+       u32 ts_nsec;    /* timestamp in nano seconds */
+
+       union {
+               u8 generic[64];
+               struct cpc_can_msg can_msg;
+               struct cpc_can_params can_params;
+               struct cpc_confirm confirmation;
+               struct cpc_overrun overrun;
+               struct cpc_can_error error;
+               struct cpc_can_err_counter err_counter;
+               u8 can_state;
+       } msg;
+};
+
+/*
+ * Table of devices that work with this driver
+ * NOTE: This driver supports only CPC-USB/ARM7 (LPC2119) yet.
+ */
+static struct usb_device_id ems_usb_table[] = {
+       {USB_DEVICE(USB_CPCUSB_VENDOR_ID, USB_CPCUSB_ARM7_PRODUCT_ID)},
+       {} /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(usb, ems_usb_table);
+
+#define RX_BUFFER_SIZE      64
+#define CPC_HEADER_SIZE     4
+#define INTR_IN_BUFFER_SIZE 4
+
+#define MAX_RX_URBS 10
+#define MAX_TX_URBS CAN_ECHO_SKB_MAX
+
+struct ems_usb;
+
+struct ems_tx_urb_context {
+       struct ems_usb *dev;
+
+       u32 echo_index;
+       u8 dlc;
+};
+
+struct ems_usb {
+       struct can_priv can; /* must be the first member */
+       int open_time;
+
+       struct sk_buff *echo_skb[MAX_TX_URBS];
+
+       struct usb_device *udev;
+       struct net_device *netdev;
+
+       atomic_t active_tx_urbs;
+       struct usb_anchor tx_submitted;
+       struct ems_tx_urb_context tx_contexts[MAX_TX_URBS];
+
+       struct usb_anchor rx_submitted;
+
+       struct urb *intr_urb;
+
+       u8 *tx_msg_buffer;
+
+       u8 *intr_in_buffer;
+       unsigned int free_slots; /* remember number of available slots */
+
+       struct ems_cpc_msg active_params; /* active controller parameters */
+};
+
+static void ems_usb_read_interrupt_callback(struct urb *urb)
+{
+       struct ems_usb *dev = urb->context;
+       struct net_device *netdev = dev->netdev;
+       int err;
+
+       if (!netif_device_present(netdev))
+               return;
+
+       switch (urb->status) {
+       case 0:
+               dev->free_slots = dev->intr_in_buffer[1];
+               break;
+
+       case -ECONNRESET: /* unlink */
+       case -ENOENT:
+       case -ESHUTDOWN:
+               return;
+
+       default:
+               dev_info(netdev->dev.parent, "Rx interrupt aborted %d\n",
+                        urb->status);
+               break;
+       }
+
+       err = usb_submit_urb(urb, GFP_ATOMIC);
+
+       if (err == -ENODEV)
+               netif_device_detach(netdev);
+       else if (err)
+               dev_err(netdev->dev.parent,
+                       "failed resubmitting intr urb: %d\n", err);
+
+       return;
+}
+
+static void ems_usb_rx_can_msg(struct ems_usb *dev, struct ems_cpc_msg *msg)
+{
+       struct can_frame *cf;
+       struct sk_buff *skb;
+       int i;
+       struct net_device_stats *stats = &dev->netdev->stats;
+
+       skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame));
+       if (skb == NULL)
+               return;
+
+       skb->protocol = htons(ETH_P_CAN);
+
+       cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+
+       cf->can_id = msg->msg.can_msg.id;
+       cf->can_dlc = min_t(u8, msg->msg.can_msg.length, 8);
+
+       if (msg->type == CPC_MSG_TYPE_EXT_CAN_FRAME
+           || msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME)
+               cf->can_id |= CAN_EFF_FLAG;
+
+       if (msg->type == CPC_MSG_TYPE_RTR_FRAME
+           || msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME) {
+               cf->can_id |= CAN_RTR_FLAG;
+       } else {
+               for (i = 0; i < cf->can_dlc; i++)
+                       cf->data[i] = msg->msg.can_msg.msg[i];
+       }
+
+       netif_rx(skb);
+
+       stats->rx_packets++;
+       stats->rx_bytes += cf->can_dlc;
+}
+
+static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg)
+{
+       struct can_frame *cf;
+       struct sk_buff *skb;
+       struct net_device_stats *stats = &dev->netdev->stats;
+
+       skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame));
+       if (skb == NULL)
+               return;
+
+       skb->protocol = htons(ETH_P_CAN);
+
+       cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+       memset(cf, 0, sizeof(struct can_frame));
+
+       cf->can_id = CAN_ERR_FLAG;
+       cf->can_dlc = CAN_ERR_DLC;
+
+       if (msg->type == CPC_MSG_TYPE_CAN_STATE) {
+               u8 state = msg->msg.can_state;
+
+               if (state & SJA1000_SR_BS) {
+                       dev->can.state = CAN_STATE_BUS_OFF;
+                       cf->can_id |= CAN_ERR_BUSOFF;
+
+                       can_bus_off(dev->netdev);
+               } else if (state & SJA1000_SR_ES) {
+                       dev->can.state = CAN_STATE_ERROR_WARNING;
+                       dev->can.can_stats.error_warning++;
+               } else {
+                       dev->can.state = CAN_STATE_ERROR_ACTIVE;
+                       dev->can.can_stats.error_passive++;
+               }
+       } else if (msg->type == CPC_MSG_TYPE_CAN_FRAME_ERROR) {
+               u8 ecc = msg->msg.error.cc.regs.sja1000.ecc;
+               u8 txerr = msg->msg.error.cc.regs.sja1000.txerr;
+               u8 rxerr = msg->msg.error.cc.regs.sja1000.rxerr;
+
+               /* bus error interrupt */
+               dev->can.can_stats.bus_error++;
+               stats->rx_errors++;
+
+               cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
+
+               switch (ecc & SJA1000_ECC_MASK) {
+               case SJA1000_ECC_BIT:
+                       cf->data[2] |= CAN_ERR_PROT_BIT;
+                       break;
+               case SJA1000_ECC_FORM:
+                       cf->data[2] |= CAN_ERR_PROT_FORM;
+                       break;
+               case SJA1000_ECC_STUFF:
+                       cf->data[2] |= CAN_ERR_PROT_STUFF;
+                       break;
+               default:
+                       cf->data[2] |= CAN_ERR_PROT_UNSPEC;
+                       cf->data[3] = ecc & SJA1000_ECC_SEG;
+                       break;
+               }
+
+               /* Error occured during transmission? */
+               if ((ecc & SJA1000_ECC_DIR) == 0)
+                       cf->data[2] |= CAN_ERR_PROT_TX;
+
+               if (dev->can.state == CAN_STATE_ERROR_WARNING ||
+                   dev->can.state == CAN_STATE_ERROR_PASSIVE) {
+                       cf->data[1] = (txerr > rxerr) ?
+                           CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE;
+               }
+       } else if (msg->type == CPC_MSG_TYPE_OVERRUN) {
+               cf->can_id |= CAN_ERR_CRTL;
+               cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+
+               stats->rx_over_errors++;
+               stats->rx_errors++;
+       }
+
+       netif_rx(skb);
+
+       stats->rx_packets++;
+       stats->rx_bytes += cf->can_dlc;
+}
+
+/*
+ * callback for bulk IN urb
+ */
+static void ems_usb_read_bulk_callback(struct urb *urb)
+{
+       struct ems_usb *dev = urb->context;
+       struct net_device *netdev;
+       int retval;
+
+       netdev = dev->netdev;
+
+       if (!netif_device_present(netdev))
+               return;
+
+       switch (urb->status) {
+       case 0: /* success */
+               break;
+
+       case -ENOENT:
+               return;
+
+       default:
+               dev_info(netdev->dev.parent, "Rx URB aborted (%d)\n",
+                        urb->status);
+               goto resubmit_urb;
+       }
+
+       if (urb->actual_length > CPC_HEADER_SIZE) {
+               struct ems_cpc_msg *msg;
+               u8 *ibuf = urb->transfer_buffer;
+               u8 msg_count, again, start;
+
+               msg_count = ibuf[0] & ~0x80;
+               again = ibuf[0] & 0x80;
+
+               start = CPC_HEADER_SIZE;
+
+               while (msg_count) {
+                       msg = (struct ems_cpc_msg *)&ibuf[start];
+
+                       switch (msg->type) {
+                       case CPC_MSG_TYPE_CAN_STATE:
+                               /* Process CAN state changes */
+                               ems_usb_rx_err(dev, msg);
+                               break;
+
+                       case CPC_MSG_TYPE_CAN_FRAME:
+                       case CPC_MSG_TYPE_EXT_CAN_FRAME:
+                       case CPC_MSG_TYPE_RTR_FRAME:
+                       case CPC_MSG_TYPE_EXT_RTR_FRAME:
+                               ems_usb_rx_can_msg(dev, msg);
+                               break;
+
+                       case CPC_MSG_TYPE_CAN_FRAME_ERROR:
+                               /* Process errorframe */
+                               ems_usb_rx_err(dev, msg);
+                               break;
+
+                       case CPC_MSG_TYPE_OVERRUN:
+                               /* Message lost while receiving */
+                               ems_usb_rx_err(dev, msg);
+                               break;
+                       }
+
+                       start += CPC_MSG_HEADER_LEN + msg->length;
+                       msg_count--;
+
+                       if (start > urb->transfer_buffer_length) {
+                               dev_err(netdev->dev.parent, "format error\n");
+                               break;
+                       }
+               }
+       }
+
+resubmit_urb:
+       usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2),
+                         urb->transfer_buffer, RX_BUFFER_SIZE,
+                         ems_usb_read_bulk_callback, dev);
+
+       retval = usb_submit_urb(urb, GFP_ATOMIC);
+
+       if (retval == -ENODEV)
+               netif_device_detach(netdev);
+       else if (retval)
+               dev_err(netdev->dev.parent,
+                       "failed resubmitting read bulk urb: %d\n", retval);
+
+       return;
+}
+
+/*
+ * callback for bulk IN urb
+ */
+static void ems_usb_write_bulk_callback(struct urb *urb)
+{
+       struct ems_tx_urb_context *context = urb->context;
+       struct ems_usb *dev;
+       struct net_device *netdev;
+
+       BUG_ON(!context);
+
+       dev = context->dev;
+       netdev = dev->netdev;
+
+       /* free up our allocated buffer */
+       usb_buffer_free(urb->dev, urb->transfer_buffer_length,
+                       urb->transfer_buffer, urb->transfer_dma);
+
+       atomic_dec(&dev->active_tx_urbs);
+
+       if (!netif_device_present(netdev))
+               return;
+
+       if (urb->status)
+               dev_info(netdev->dev.parent, "Tx URB aborted (%d)\n",
+                        urb->status);
+
+       netdev->trans_start = jiffies;
+
+       /* transmission complete interrupt */
+       netdev->stats.tx_packets++;
+       netdev->stats.tx_bytes += context->dlc;
+
+       can_get_echo_skb(netdev, context->echo_index);
+
+       /* Release context */
+       context->echo_index = MAX_TX_URBS;
+
+       if (netif_queue_stopped(netdev))
+               netif_wake_queue(netdev);
+}
+
+/*
+ * Send the given CPC command synchronously
+ */
+static int ems_usb_command_msg(struct ems_usb *dev, struct ems_cpc_msg *msg)
+{
+       int actual_length;
+
+       /* Copy payload */
+       memcpy(&dev->tx_msg_buffer[CPC_HEADER_SIZE], msg,
+              msg->length + CPC_MSG_HEADER_LEN);
+
+       /* Clear header */
+       memset(&dev->tx_msg_buffer[0], 0, CPC_HEADER_SIZE);
+
+       return usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, 2),
+                           &dev->tx_msg_buffer[0],
+                           msg->length + CPC_MSG_HEADER_LEN + CPC_HEADER_SIZE,
+                           &actual_length, 1000);
+}
+
+/*
+ * Change CAN controllers' mode register
+ */
+static int ems_usb_write_mode(struct ems_usb *dev, u8 mode)
+{
+       dev->active_params.msg.can_params.cc_params.sja1000.mode = mode;
+
+       return ems_usb_command_msg(dev, &dev->active_params);
+}
+
+/*
+ * Send a CPC_Control command to change behaviour when interface receives a CAN
+ * message, bus error or CAN state changed notifications.
+ */
+static int ems_usb_control_cmd(struct ems_usb *dev, u8 val)
+{
+       struct ems_cpc_msg cmd;
+
+       cmd.type = CPC_CMD_TYPE_CONTROL;
+       cmd.length = CPC_MSG_HEADER_LEN + 1;
+
+       cmd.msgid = 0;
+
+       cmd.msg.generic[0] = val;
+
+       return ems_usb_command_msg(dev, &cmd);
+}
+
+/*
+ * Start interface
+ */
+static int ems_usb_start(struct ems_usb *dev)
+{
+       struct net_device *netdev = dev->netdev;
+       int err, i;
+
+       dev->intr_in_buffer[0] = 0;
+       dev->free_slots = 15; /* initial size */
+
+       for (i = 0; i < MAX_RX_URBS; i++) {
+               struct urb *urb = NULL;
+               u8 *buf = NULL;
+
+               /* create a URB, and a buffer for it */
+               urb = usb_alloc_urb(0, GFP_KERNEL);
+               if (!urb) {
+                       dev_err(netdev->dev.parent,
+                               "No memory left for URBs\n");
+                       return -ENOMEM;
+               }
+
+               buf = usb_buffer_alloc(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL,
+                                      &urb->transfer_dma);
+               if (!buf) {
+                       dev_err(netdev->dev.parent,
+                               "No memory left for USB buffer\n");
+                       usb_free_urb(urb);
+                       return -ENOMEM;
+               }
+
+               usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2),
+                                 buf, RX_BUFFER_SIZE,
+                                 ems_usb_read_bulk_callback, dev);
+               urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+               usb_anchor_urb(urb, &dev->rx_submitted);
+
+               err = usb_submit_urb(urb, GFP_KERNEL);
+               if (err) {
+                       if (err == -ENODEV)
+                               netif_device_detach(dev->netdev);
+
+                       usb_unanchor_urb(urb);
+                       usb_buffer_free(dev->udev, RX_BUFFER_SIZE, buf,
+                                       urb->transfer_dma);
+                       break;
+               }
+
+               /* Drop reference, USB core will take care of freeing it */
+               usb_free_urb(urb);
+       }
+
+       /* Did we submit any URBs */
+       if (i == 0) {
+               dev_warn(netdev->dev.parent, "couldn't setup read URBs\n");
+               return err;
+       }
+
+       /* Warn if we've couldn't transmit all the URBs */
+       if (i < MAX_RX_URBS)
+               dev_warn(netdev->dev.parent, "rx performance may be slow\n");
+
+       /* Setup and start interrupt URB */
+       usb_fill_int_urb(dev->intr_urb, dev->udev,
+                        usb_rcvintpipe(dev->udev, 1),
+                        dev->intr_in_buffer,
+                        INTR_IN_BUFFER_SIZE,
+                        ems_usb_read_interrupt_callback, dev, 1);
+
+       err = usb_submit_urb(dev->intr_urb, GFP_KERNEL);
+       if (err) {
+               if (err == -ENODEV)
+                       netif_device_detach(dev->netdev);
+
+               dev_warn(netdev->dev.parent, "intr URB submit failed: %d\n",
+                        err);
+
+               return err;
+       }
+
+       /* CPC-USB will transfer received message to host */
+       err = ems_usb_control_cmd(dev, CONTR_CAN_MESSAGE | CONTR_CONT_ON);
+       if (err)
+               goto failed;
+
+       /* CPC-USB will transfer CAN state changes to host */
+       err = ems_usb_control_cmd(dev, CONTR_CAN_STATE | CONTR_CONT_ON);
+       if (err)
+               goto failed;
+
+       /* CPC-USB will transfer bus errors to host */
+       err = ems_usb_control_cmd(dev, CONTR_BUS_ERROR | CONTR_CONT_ON);
+       if (err)
+               goto failed;
+
+       err = ems_usb_write_mode(dev, SJA1000_MOD_NORMAL);
+       if (err)
+               goto failed;
+
+       dev->can.state = CAN_STATE_ERROR_ACTIVE;
+
+       return 0;
+
+failed:
+       if (err == -ENODEV)
+               netif_device_detach(dev->netdev);
+
+       dev_warn(netdev->dev.parent, "couldn't submit control: %d\n", err);
+
+       return err;
+}
+
+static void unlink_all_urbs(struct ems_usb *dev)
+{
+       int i;
+
+       usb_unlink_urb(dev->intr_urb);
+
+       usb_kill_anchored_urbs(&dev->rx_submitted);
+
+       usb_kill_anchored_urbs(&dev->tx_submitted);
+       atomic_set(&dev->active_tx_urbs, 0);
+
+       for (i = 0; i < MAX_TX_URBS; i++)
+               dev->tx_contexts[i].echo_index = MAX_TX_URBS;
+}
+
+static int ems_usb_open(struct net_device *netdev)
+{
+       struct ems_usb *dev = netdev_priv(netdev);
+       int err;
+
+       err = ems_usb_write_mode(dev, SJA1000_MOD_RM);
+       if (err)
+               return err;
+
+       /* common open */
+       err = open_candev(netdev);
+       if (err)
+               return err;
+
+       /* finally start device */
+       err = ems_usb_start(dev);
+       if (err) {
+               if (err == -ENODEV)
+                       netif_device_detach(dev->netdev);
+
+               dev_warn(netdev->dev.parent, "couldn't start device: %d\n",
+                        err);
+
+               close_candev(netdev);
+
+               return err;
+       }
+
+       dev->open_time = jiffies;
+
+       netif_start_queue(netdev);
+
+       return 0;
+}
+
+static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct ems_usb *dev = netdev_priv(netdev);
+       struct ems_tx_urb_context *context = NULL;
+       struct net_device_stats *stats = &netdev->stats;
+       struct can_frame *cf = (struct can_frame *)skb->data;
+       struct ems_cpc_msg *msg;
+       struct urb *urb;
+       u8 *buf;
+       int i, err;
+       size_t size = CPC_HEADER_SIZE + CPC_MSG_HEADER_LEN
+                       + sizeof(struct cpc_can_msg);
+
+       /* create a URB, and a buffer for it, and copy the data to the URB */
+       urb = usb_alloc_urb(0, GFP_ATOMIC);
+       if (!urb) {
+               dev_err(netdev->dev.parent, "No memory left for URBs\n");
+               goto nomem;
+       }
+
+       buf = usb_buffer_alloc(dev->udev, size, GFP_ATOMIC, &urb->transfer_dma);
+       if (!buf) {
+               dev_err(netdev->dev.parent, "No memory left for USB buffer\n");
+               usb_free_urb(urb);
+               goto nomem;
+       }
+
+       msg = (struct ems_cpc_msg *)&buf[CPC_HEADER_SIZE];
+
+       msg->msg.can_msg.id = cf->can_id & CAN_ERR_MASK;
+       msg->msg.can_msg.length = cf->can_dlc;
+
+       if (cf->can_id & CAN_RTR_FLAG) {
+               msg->type = cf->can_id & CAN_EFF_FLAG ?
+                       CPC_CMD_TYPE_EXT_RTR_FRAME : CPC_CMD_TYPE_RTR_FRAME;
+
+               msg->length = CPC_CAN_MSG_MIN_SIZE;
+       } else {
+               msg->type = cf->can_id & CAN_EFF_FLAG ?
+                       CPC_CMD_TYPE_EXT_CAN_FRAME : CPC_CMD_TYPE_CAN_FRAME;
+
+               for (i = 0; i < cf->can_dlc; i++)
+                       msg->msg.can_msg.msg[i] = cf->data[i];
+
+               msg->length = CPC_CAN_MSG_MIN_SIZE + cf->can_dlc;
+       }
+
+       for (i = 0; i < MAX_TX_URBS; i++) {
+               if (dev->tx_contexts[i].echo_index == MAX_TX_URBS) {
+                       context = &dev->tx_contexts[i];
+                       break;
+               }
+       }
+
+       /*
+        * May never happen! When this happens we'd more URBs in flight as
+        * allowed (MAX_TX_URBS).
+        */
+       if (!context) {
+               usb_unanchor_urb(urb);
+               usb_buffer_free(dev->udev, size, buf, urb->transfer_dma);
+
+               dev_warn(netdev->dev.parent, "couldn't find free context\n");
+
+               return NETDEV_TX_BUSY;
+       }
+
+       context->dev = dev;
+       context->echo_index = i;
+       context->dlc = cf->can_dlc;
+
+       usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), buf,
+                         size, ems_usb_write_bulk_callback, context);
+       urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+       usb_anchor_urb(urb, &dev->tx_submitted);
+
+       can_put_echo_skb(skb, netdev, context->echo_index);
+
+       atomic_inc(&dev->active_tx_urbs);
+
+       err = usb_submit_urb(urb, GFP_ATOMIC);
+       if (unlikely(err)) {
+               can_free_echo_skb(netdev, context->echo_index);
+
+               usb_unanchor_urb(urb);
+               usb_buffer_free(dev->udev, size, buf, urb->transfer_dma);
+               dev_kfree_skb(skb);
+
+               atomic_dec(&dev->active_tx_urbs);
+
+               if (err == -ENODEV) {
+                       netif_device_detach(netdev);
+               } else {
+                       dev_warn(netdev->dev.parent, "failed tx_urb %d\n", err);
+
+                       stats->tx_dropped++;
+               }
+       } else {
+               netdev->trans_start = jiffies;
+
+               /* Slow down tx path */
+               if (atomic_read(&dev->active_tx_urbs) >= MAX_TX_URBS ||
+                   dev->free_slots < 5) {
+                       netif_stop_queue(netdev);
+               }
+       }
+
+       /*
+        * Release our reference to this URB, the USB core will eventually free
+        * it entirely.
+        */
+       usb_free_urb(urb);
+
+       return NETDEV_TX_OK;
+
+nomem:
+       if (skb)
+               dev_kfree_skb(skb);
+
+       stats->tx_dropped++;
+
+       return NETDEV_TX_OK;
+}
+
+static int ems_usb_close(struct net_device *netdev)
+{
+       struct ems_usb *dev = netdev_priv(netdev);
+
+       /* Stop polling */
+       unlink_all_urbs(dev);
+
+       netif_stop_queue(netdev);
+
+       /* Set CAN controller to reset mode */
+       if (ems_usb_write_mode(dev, SJA1000_MOD_RM))
+               dev_warn(netdev->dev.parent, "couldn't stop device");
+
+       close_candev(netdev);
+
+       dev->open_time = 0;
+
+       return 0;
+}
+
+static const struct net_device_ops ems_usb_netdev_ops = {
+       .ndo_open = ems_usb_open,
+       .ndo_stop = ems_usb_close,
+       .ndo_start_xmit = ems_usb_start_xmit,
+};
+
+static struct can_bittiming_const ems_usb_bittiming_const = {
+       .name = "ems_usb",
+       .tseg1_min = 1,
+       .tseg1_max = 16,
+       .tseg2_min = 1,
+       .tseg2_max = 8,
+       .sjw_max = 4,
+       .brp_min = 1,
+       .brp_max = 64,
+       .brp_inc = 1,
+};
+
+static int ems_usb_set_mode(struct net_device *netdev, enum can_mode mode)
+{
+       struct ems_usb *dev = netdev_priv(netdev);
+
+       if (!dev->open_time)
+               return -EINVAL;
+
+       switch (mode) {
+       case CAN_MODE_START:
+               if (ems_usb_write_mode(dev, SJA1000_MOD_NORMAL))
+                       dev_warn(netdev->dev.parent, "couldn't start device");
+
+               if (netif_queue_stopped(netdev))
+                       netif_wake_queue(netdev);
+               break;
+
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int ems_usb_set_bittiming(struct net_device *netdev)
+{
+       struct ems_usb *dev = netdev_priv(netdev);
+       struct can_bittiming *bt = &dev->can.bittiming;
+       u8 btr0, btr1;
+
+       btr0 = ((bt->brp - 1) & 0x3f) | (((bt->sjw - 1) & 0x3) << 6);
+       btr1 = ((bt->prop_seg + bt->phase_seg1 - 1) & 0xf) |
+               (((bt->phase_seg2 - 1) & 0x7) << 4);
+       if (dev->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES)
+               btr1 |= 0x80;
+
+       dev_info(netdev->dev.parent, "setting BTR0=0x%02x BTR1=0x%02x\n",
+                btr0, btr1);
+
+       dev->active_params.msg.can_params.cc_params.sja1000.btr0 = btr0;
+       dev->active_params.msg.can_params.cc_params.sja1000.btr1 = btr1;
+
+       return ems_usb_command_msg(dev, &dev->active_params);
+}
+
+static void init_params_sja1000(struct ems_cpc_msg *msg)
+{
+       struct cpc_sja1000_params *sja1000 =
+               &msg->msg.can_params.cc_params.sja1000;
+
+       msg->type = CPC_CMD_TYPE_CAN_PARAMS;
+       msg->length = sizeof(struct cpc_can_params);
+       msg->msgid = 0;
+
+       msg->msg.can_params.cc_type = CPC_CC_TYPE_SJA1000;
+
+       /* Acceptance filter open */
+       sja1000->acc_code0 = 0x00;
+       sja1000->acc_code1 = 0x00;
+       sja1000->acc_code2 = 0x00;
+       sja1000->acc_code3 = 0x00;
+
+       /* Acceptance filter open */
+       sja1000->acc_mask0 = 0xFF;
+       sja1000->acc_mask1 = 0xFF;
+       sja1000->acc_mask2 = 0xFF;
+       sja1000->acc_mask3 = 0xFF;
+
+       sja1000->btr0 = 0;
+       sja1000->btr1 = 0;
+
+       sja1000->outp_contr = SJA1000_DEFAULT_OUTPUT_CONTROL;
+       sja1000->mode = SJA1000_MOD_RM;
+}
+
+/*
+ * probe function for new CPC-USB devices
+ */
+static int ems_usb_probe(struct usb_interface *intf,
+                        const struct usb_device_id *id)
+{
+       struct net_device *netdev;
+       struct ems_usb *dev;
+       int i, err = -ENOMEM;
+
+       netdev = alloc_candev(sizeof(struct ems_usb));
+       if (!netdev) {
+               dev_err(netdev->dev.parent, "Couldn't alloc candev\n");
+               return -ENOMEM;
+       }
+
+       dev = netdev_priv(netdev);
+
+       dev->udev = interface_to_usbdev(intf);
+       dev->netdev = netdev;
+
+       dev->can.state = CAN_STATE_STOPPED;
+       dev->can.clock.freq = EMS_USB_ARM7_CLOCK;
+       dev->can.bittiming_const = &ems_usb_bittiming_const;
+       dev->can.do_set_bittiming = ems_usb_set_bittiming;
+       dev->can.do_set_mode = ems_usb_set_mode;
+
+       netdev->flags |= IFF_ECHO; /* we support local echo */
+
+       netdev->netdev_ops = &ems_usb_netdev_ops;
+
+       netdev->flags |= IFF_ECHO; /* we support local echo */
+
+       init_usb_anchor(&dev->rx_submitted);
+
+       init_usb_anchor(&dev->tx_submitted);
+       atomic_set(&dev->active_tx_urbs, 0);
+
+       for (i = 0; i < MAX_TX_URBS; i++)
+               dev->tx_contexts[i].echo_index = MAX_TX_URBS;
+
+       dev->intr_urb = usb_alloc_urb(0, GFP_KERNEL);
+       if (!dev->intr_urb) {
+               dev_err(netdev->dev.parent, "Couldn't alloc intr URB\n");
+               goto cleanup_candev;
+       }
+
+       dev->intr_in_buffer = kzalloc(INTR_IN_BUFFER_SIZE, GFP_KERNEL);
+       if (!dev->intr_in_buffer) {
+               dev_err(netdev->dev.parent, "Couldn't alloc Intr buffer\n");
+               goto cleanup_intr_urb;
+       }
+
+       dev->tx_msg_buffer = kzalloc(CPC_HEADER_SIZE +
+                                    sizeof(struct ems_cpc_msg), GFP_KERNEL);
+       if (!dev->tx_msg_buffer) {
+               dev_err(netdev->dev.parent, "Couldn't alloc Tx buffer\n");
+               goto cleanup_intr_in_buffer;
+       }
+
+       usb_set_intfdata(intf, dev);
+
+       SET_NETDEV_DEV(netdev, &intf->dev);
+
+       init_params_sja1000(&dev->active_params);
+
+       err = ems_usb_command_msg(dev, &dev->active_params);
+       if (err) {
+               dev_err(netdev->dev.parent,
+                       "couldn't initialize controller: %d\n", err);
+               goto cleanup_tx_msg_buffer;
+       }
+
+       err = register_candev(netdev);
+       if (err) {
+               dev_err(netdev->dev.parent,
+                       "couldn't register CAN device: %d\n", err);
+               goto cleanup_tx_msg_buffer;
+       }
+
+       return 0;
+
+cleanup_tx_msg_buffer:
+       kfree(dev->tx_msg_buffer);
+
+cleanup_intr_in_buffer:
+       kfree(dev->intr_in_buffer);
+
+cleanup_intr_urb:
+       usb_free_urb(dev->intr_urb);
+
+cleanup_candev:
+       free_candev(netdev);
+
+       return err;
+}
+
+/*
+ * called by the usb core when the device is removed from the system
+ */
+static void ems_usb_disconnect(struct usb_interface *intf)
+{
+       struct ems_usb *dev = usb_get_intfdata(intf);
+
+       usb_set_intfdata(intf, NULL);
+
+       if (dev) {
+               unregister_netdev(dev->netdev);
+               free_candev(dev->netdev);
+
+               unlink_all_urbs(dev);
+
+               usb_free_urb(dev->intr_urb);
+
+               kfree(dev->intr_in_buffer);
+       }
+}
+
+/* usb specific object needed to register this driver with the usb subsystem */
+static struct usb_driver ems_usb_driver = {
+       .name = "ems_usb",
+       .probe = ems_usb_probe,
+       .disconnect = ems_usb_disconnect,
+       .id_table = ems_usb_table,
+};
+
+static int __init ems_usb_init(void)
+{
+       int err;
+
+       printk(KERN_INFO "CPC-USB kernel driver loaded\n");
+
+       /* register this driver with the USB subsystem */
+       err = usb_register(&ems_usb_driver);
+
+       if (err) {
+               err("usb_register failed. Error number %d\n", err);
+               return err;
+       }
+
+       return 0;
+}
+
+static void __exit ems_usb_exit(void)
+{
+       /* deregister this driver with the USB subsystem */
+       usb_deregister(&ems_usb_driver);
+}
+
+module_init(ems_usb_init);
+module_exit(ems_usb_exit);
diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c

index d45eacb76702a5134aea3fc50138af013cb95e29..211c8e9182fc51566fffac1fff201da34bc8ee05 100644 (file)
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -85,8 +85,6 @@ static int cnic_uio_open(struct uio_info *uinfo, struct inode *inode)
  
         cp->uio_dev = iminor(inode);
  
-       cnic_shutdown_bnx2_rx_ring(dev);
-
         cnic_init_bnx2_tx_ring(dev);
         cnic_init_bnx2_rx_ring(dev);
  
@@ -98,6 +96,8 @@ static int cnic_uio_close(struct uio_info *uinfo, struct inode *inode)
         struct cnic_dev *dev = uinfo->priv;
         struct cnic_local *cp = dev->cnic_priv;
  
+       cnic_shutdown_bnx2_rx_ring(dev);
+
         cp->uio_dev = -1;
         return 0;
  }
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c

index 3e3fab8afb1ee37118a040d68e757603235d14e1..61f9da2b49431d88056daa1bbcb4b6bceaee86b7 100644 (file)
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -1109,7 +1109,7 @@ static int external_switch;
  static int __devinit cpmac_probe(struct platform_device *pdev)
  {
         int rc, phy_id;
-       char mdio_bus_id[BUS_ID_SIZE];
+       char mdio_bus_id[MII_BUS_ID_SIZE];
         struct resource *mem;
         struct cpmac_priv *priv;
         struct net_device *dev;
@@ -1118,7 +1118,7 @@ static int __devinit cpmac_probe(struct platform_device *pdev)
         pdata = pdev->dev.platform_data;
  
         if (external_switch || dumb_switch) {
-               strncpy(mdio_bus_id, "0", BUS_ID_SIZE); /* fixed phys bus */
+               strncpy(mdio_bus_id, "0", MII_BUS_ID_SIZE); /* fixed phys bus */
                 phy_id = pdev->id;
         } else {
                 for (phy_id = 0; phy_id < PHY_MAX_ADDR; phy_id++) {
@@ -1126,7 +1126,7 @@ static int __devinit cpmac_probe(struct platform_device *pdev)
                                 continue;
                         if (!cpmac_mii->phy_map[phy_id])
                                 continue;
-                       strncpy(mdio_bus_id, cpmac_mii->id, BUS_ID_SIZE);
+                       strncpy(mdio_bus_id, cpmac_mii->id, MII_BUS_ID_SIZE);
                         break;
                 }
         }
@@ -1167,7 +1167,7 @@ static int __devinit cpmac_probe(struct platform_device *pdev)
         priv->msg_enable = netif_msg_init(debug_level, 0xff);
         memcpy(dev->dev_addr, pdata->dev_addr, sizeof(dev->dev_addr));
  
-       snprintf(priv->phy_name, BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id);
+       snprintf(priv->phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id);
  
         priv->phy = phy_connect(dev, priv->phy_name, &cpmac_adjust_link, 0,
                                                 PHY_INTERFACE_MODE_MII);
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c

index 977c3d358279378086f2e82ee03ed21f31221dc7..41bd7aeafd8248119b21fe13e6e7576012e6bd29 100644 (file)
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -3083,7 +3083,6 @@ static const struct net_device_ops ehea_netdev_ops = {
         .ndo_poll_controller    = ehea_netpoll,
  #endif
         .ndo_get_stats          = ehea_get_stats,
-       .ndo_change_mtu         = eth_change_mtu,
         .ndo_set_mac_address    = ehea_set_mac_addr,
         .ndo_validate_addr      = eth_validate_addr,
         .ndo_set_multicast_list = ehea_set_multicast_list,
diff --git a/drivers/net/igb/e1000_mac.c b/drivers/net/igb/e1000_mac.c

index a0231cd079f128666c74d95f21ec790145d3786e..7d76bb085e105923080bb8304209f0874c5eeff6 100644 (file)
--- a/drivers/net/igb/e1000_mac.c
+++ b/drivers/net/igb/e1000_mac.c
@@ -285,41 +285,6 @@ void igb_mta_set(struct e1000_hw *hw, u32 hash_value)
         wrfl();
  }
  
-/**
- *  igb_update_mc_addr_list - Update Multicast addresses
- *  @hw: pointer to the HW structure
- *  @mc_addr_list: array of multicast addresses to program
- *  @mc_addr_count: number of multicast addresses to program
- *
- *  Updates entire Multicast Table Array.
- *  The caller must have a packed mc_addr_list of multicast addresses.
- **/
-void igb_update_mc_addr_list(struct e1000_hw *hw,
-                             u8 *mc_addr_list, u32 mc_addr_count)
-{
-       u32 hash_value, hash_bit, hash_reg;
-       int i;
-
-       /* clear mta_shadow */
-       memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
-
-       /* update mta_shadow from mc_addr_list */
-       for (i = 0; (u32) i < mc_addr_count; i++) {
-               hash_value = igb_hash_mc_addr(hw, mc_addr_list);
-
-               hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
-               hash_bit = hash_value & 0x1F;
-
-               hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit);
-               mc_addr_list += (ETH_ALEN);
-       }
-
-       /* replace the entire MTA table */
-       for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
-               array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
-       wrfl();
-}
-
  /**
   *  igb_hash_mc_addr - Generate a multicast hash value
   *  @hw: pointer to the HW structure
@@ -329,7 +294,7 @@ void igb_update_mc_addr_list(struct e1000_hw *hw,
   *  the multicast filter table array address and new table value.  See
   *  igb_mta_set()
   **/
-u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
+static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
  {
         u32 hash_value, hash_mask;
         u8 bit_shift = 0;
@@ -391,6 +356,41 @@ u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
         return hash_value;
  }
  
+/**
+ *  igb_update_mc_addr_list - Update Multicast addresses
+ *  @hw: pointer to the HW structure
+ *  @mc_addr_list: array of multicast addresses to program
+ *  @mc_addr_count: number of multicast addresses to program
+ *
+ *  Updates entire Multicast Table Array.
+ *  The caller must have a packed mc_addr_list of multicast addresses.
+ **/
+void igb_update_mc_addr_list(struct e1000_hw *hw,
+                             u8 *mc_addr_list, u32 mc_addr_count)
+{
+       u32 hash_value, hash_bit, hash_reg;
+       int i;
+
+       /* clear mta_shadow */
+       memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+
+       /* update mta_shadow from mc_addr_list */
+       for (i = 0; (u32) i < mc_addr_count; i++) {
+               hash_value = igb_hash_mc_addr(hw, mc_addr_list);
+
+               hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+               hash_bit = hash_value & 0x1F;
+
+               hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit);
+               mc_addr_list += (ETH_ALEN);
+       }
+
+       /* replace the entire MTA table */
+       for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
+               array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
+       wrfl();
+}
+
  /**
   *  igb_clear_hw_cntrs_base - Clear base hardware counters
   *  @hw: pointer to the HW structure
diff --git a/drivers/net/igb/e1000_mac.h b/drivers/net/igb/e1000_mac.h

index 7518af8cbbf5ef68820e6b7dac63385ba8234212..bca17d88241731c344a3e3a9f1fc49f1d6069def 100644 (file)
--- a/drivers/net/igb/e1000_mac.h
+++ b/drivers/net/igb/e1000_mac.h
@@ -88,6 +88,5 @@ enum e1000_mng_mode {
  #define E1000_MNG_DHCP_COOKIE_STATUS_VLAN    0x2
  
  extern void e1000_init_function_pointers_82575(struct e1000_hw *hw);
-extern u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr);
  
  #endif
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h

index dd688d45e9cdaeb35dfb6a9ea6479d3fa1b40f76..385be6016667a6fd1049538443d634627f32db03 100644 (file)
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -267,7 +267,8 @@ struct ixgbe_adapter {
         enum ixgbe_fc_mode last_lfc_mode;
  
         /* Interrupt Throttle Rate */
-       u32 itr_setting;
+       u32 rx_itr_setting;
+       u32 tx_itr_setting;
         u16 eitr_low;
         u16 eitr_high;
  
@@ -351,7 +352,8 @@ struct ixgbe_adapter {
         struct ixgbe_hw_stats stats;
  
         /* Interrupt Throttle Rate */
-       u32 eitr_param;
+       u32 rx_eitr_param;
+       u32 tx_eitr_param;
  
         unsigned long state;
         u64 tx_busy;
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c

index 026e94a9984954d1b2815aaacca9aadf7f854584..53b0a6680254fdbd816692f0a88e1616f312a7aa 100644 (file)
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -1929,7 +1929,7 @@ static int ixgbe_get_coalesce(struct net_device *netdev,
         ec->tx_max_coalesced_frames_irq = adapter->tx_ring[0].work_limit;
  
         /* only valid if in constant ITR mode */
-       switch (adapter->itr_setting) {
+       switch (adapter->rx_itr_setting) {
         case 0:
                 /* throttling disabled */
                 ec->rx_coalesce_usecs = 0;
@@ -1940,9 +1940,25 @@ static int ixgbe_get_coalesce(struct net_device *netdev,
                 break;
         default:
                 /* fixed interrupt rate mode */
-               ec->rx_coalesce_usecs = 1000000/adapter->eitr_param;
+               ec->rx_coalesce_usecs = 1000000/adapter->rx_eitr_param;
                 break;
         }
+
+       /* only valid if in constant ITR mode */
+       switch (adapter->tx_itr_setting) {
+       case 0:
+               /* throttling disabled */
+               ec->tx_coalesce_usecs = 0;
+               break;
+       case 1:
+               /* dynamic ITR mode */
+               ec->tx_coalesce_usecs = 1;
+               break;
+       default:
+               ec->tx_coalesce_usecs = 1000000/adapter->tx_eitr_param;
+               break;
+       }
+
         return 0;
  }
  
@@ -1953,6 +1969,14 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
         struct ixgbe_q_vector *q_vector;
         int i;
  
+       /*
+        * don't accept tx specific changes if we've got mixed RxTx vectors
+        * test and jump out here if needed before changing the rx numbers
+        */
+       if ((1000000/ec->tx_coalesce_usecs) != adapter->tx_eitr_param &&
+           adapter->q_vector[0]->txr_count && adapter->q_vector[0]->rxr_count)
+               return -EINVAL;
+
         if (ec->tx_max_coalesced_frames_irq)
                 adapter->tx_ring[0].work_limit = ec->tx_max_coalesced_frames_irq;
  
@@ -1963,26 +1987,49 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
                         return -EINVAL;
  
                 /* store the value in ints/second */
-               adapter->eitr_param = 1000000/ec->rx_coalesce_usecs;
+               adapter->rx_eitr_param = 1000000/ec->rx_coalesce_usecs;
  
                 /* static value of interrupt rate */
-               adapter->itr_setting = adapter->eitr_param;
+               adapter->rx_itr_setting = adapter->rx_eitr_param;
                 /* clear the lower bit as its used for dynamic state */
-               adapter->itr_setting &= ~1;
+               adapter->rx_itr_setting &= ~1;
         } else if (ec->rx_coalesce_usecs == 1) {
                 /* 1 means dynamic mode */
-               adapter->eitr_param = 20000;
-               adapter->itr_setting = 1;
+               adapter->rx_eitr_param = 20000;
+               adapter->rx_itr_setting = 1;
         } else {
                 /*
                  * any other value means disable eitr, which is best
                  * served by setting the interrupt rate very high
                  */
                 if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
-                       adapter->eitr_param = IXGBE_MAX_RSC_INT_RATE;
+                       adapter->rx_eitr_param = IXGBE_MAX_RSC_INT_RATE;
                 else
-                       adapter->eitr_param = IXGBE_MAX_INT_RATE;
-               adapter->itr_setting = 0;
+                       adapter->rx_eitr_param = IXGBE_MAX_INT_RATE;
+               adapter->rx_itr_setting = 0;
+       }
+
+       if (ec->tx_coalesce_usecs > 1) {
+               /* check the limits */
+               if ((1000000/ec->tx_coalesce_usecs > IXGBE_MAX_INT_RATE) ||
+                   (1000000/ec->tx_coalesce_usecs < IXGBE_MIN_INT_RATE))
+                       return -EINVAL;
+
+               /* store the value in ints/second */
+               adapter->tx_eitr_param = 1000000/ec->tx_coalesce_usecs;
+
+               /* static value of interrupt rate */
+               adapter->tx_itr_setting = adapter->tx_eitr_param;
+
+               /* clear the lower bit as its used for dynamic state */
+               adapter->tx_itr_setting &= ~1;
+       } else if (ec->tx_coalesce_usecs == 1) {
+               /* 1 means dynamic mode */
+               adapter->tx_eitr_param = 10000;
+               adapter->tx_itr_setting = 1;
+       } else {
+               adapter->tx_eitr_param = IXGBE_MAX_INT_RATE;
+               adapter->tx_itr_setting = 0;
         }
  
         /* MSI/MSIx Interrupt Mode */
@@ -1992,17 +2039,17 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
                 for (i = 0; i < num_vectors; i++) {
                         q_vector = adapter->q_vector[i];
                         if (q_vector->txr_count && !q_vector->rxr_count)
-                               /* tx vector gets half the rate */
-                               q_vector->eitr = (adapter->eitr_param >> 1);
+                               /* tx only */
+                               q_vector->eitr = adapter->tx_eitr_param;
                         else
                                 /* rx only or mixed */
-                               q_vector->eitr = adapter->eitr_param;
+                               q_vector->eitr = adapter->rx_eitr_param;
                         ixgbe_write_eitr(q_vector);
                 }
         /* Legacy Interrupt Mode */
         } else {
                 q_vector = adapter->q_vector[0];
-               q_vector->eitr = adapter->eitr_param;
+               q_vector->eitr = adapter->rx_eitr_param;
                 ixgbe_write_eitr(q_vector);
         }
  
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c

index 59ad9590e700c23b79a7a512c0818302408ae06a..c407bd9de0dd87644556efcb5b717a835c4657d6 100644 (file)
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -926,12 +926,12 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
                                               r_idx + 1);
                 }
  
-               /* if this is a tx only vector halve the interrupt rate */
                 if (q_vector->txr_count && !q_vector->rxr_count)
-                       q_vector->eitr = (adapter->eitr_param >> 1);
+                       /* tx only */
+                       q_vector->eitr = adapter->tx_eitr_param;
                 else if (q_vector->rxr_count)
-                       /* rx only */
-                       q_vector->eitr = adapter->eitr_param;
+                       /* rx or mixed */
+                       q_vector->eitr = adapter->rx_eitr_param;
  
                 ixgbe_write_eitr(q_vector);
         }
@@ -1359,7 +1359,7 @@ static int ixgbe_clean_rxonly(struct napi_struct *napi, int budget)
         /* If all Rx work done, exit the polling mode */
         if (work_done < budget) {
                 napi_complete(napi);
-               if (adapter->itr_setting & 1)
+               if (adapter->rx_itr_setting & 1)
                         ixgbe_set_itr_msix(q_vector);
                 if (!test_bit(__IXGBE_DOWN, &adapter->state))
                         ixgbe_irq_enable_queues(adapter,
@@ -1420,7 +1420,7 @@ static int ixgbe_clean_rxtx_many(struct napi_struct *napi, int budget)
         /* If all Rx work done, exit the polling mode */
         if (work_done < budget) {
                 napi_complete(napi);
-               if (adapter->itr_setting & 1)
+               if (adapter->rx_itr_setting & 1)
                         ixgbe_set_itr_msix(q_vector);
                 if (!test_bit(__IXGBE_DOWN, &adapter->state))
                         ixgbe_irq_enable_queues(adapter,
@@ -1458,10 +1458,10 @@ static int ixgbe_clean_txonly(struct napi_struct *napi, int budget)
         if (!ixgbe_clean_tx_irq(q_vector, tx_ring))
                 work_done = budget;
  
-       /* If all Rx work done, exit the polling mode */
+       /* If all Tx work done, exit the polling mode */
         if (work_done < budget) {
                 napi_complete(napi);
-               if (adapter->itr_setting & 1)
+               if (adapter->tx_itr_setting & 1)
                         ixgbe_set_itr_msix(q_vector);
                 if (!test_bit(__IXGBE_DOWN, &adapter->state))
                         ixgbe_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx));
@@ -1848,7 +1848,7 @@ static void ixgbe_configure_msi_and_legacy(struct ixgbe_adapter *adapter)
         struct ixgbe_hw *hw = &adapter->hw;
  
         IXGBE_WRITE_REG(hw, IXGBE_EITR(0),
-                       EITR_INTS_PER_SEC_TO_REG(adapter->eitr_param));
+                       EITR_INTS_PER_SEC_TO_REG(adapter->rx_eitr_param));
  
         ixgbe_set_ivar(adapter, 0, 0, 0);
         ixgbe_set_ivar(adapter, 1, 0, 0);
@@ -1969,6 +1969,50 @@ static u32 ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
         return mrqc;
  }
  
+/**
+ * ixgbe_configure_rscctl - enable RSC for the indicated ring
+ * @adapter:    address of board private structure
+ * @index:      index of ring to set
+ * @rx_buf_len: rx buffer length
+ **/
+static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, int index,
+                                   int rx_buf_len)
+{
+       struct ixgbe_ring *rx_ring;
+       struct ixgbe_hw *hw = &adapter->hw;
+       int j;
+       u32 rscctrl;
+
+       rx_ring = &adapter->rx_ring[index];
+       j = rx_ring->reg_idx;
+       rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(j));
+       rscctrl |= IXGBE_RSCCTL_RSCEN;
+       /*
+        * we must limit the number of descriptors so that the
+        * total size of max desc * buf_len is not greater
+        * than 65535
+        */
+       if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) {
+#if (MAX_SKB_FRAGS > 16)
+               rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
+#elif (MAX_SKB_FRAGS > 8)
+               rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
+#elif (MAX_SKB_FRAGS > 4)
+               rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
+#else
+               rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
+#endif
+       } else {
+               if (rx_buf_len < IXGBE_RXBUFFER_4096)
+                       rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
+               else if (rx_buf_len < IXGBE_RXBUFFER_8192)
+                       rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
+               else
+                       rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
+       }
+       IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(j), rscctrl);
+}
+
  /**
   * ixgbe_configure_rx - Configure 8259x Receive Unit after Reset
   * @adapter: board private structure
@@ -1990,7 +2034,6 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
         u32 fctrl, hlreg0;
         u32 reta = 0, mrqc = 0;
         u32 rdrxctl;
-       u32 rscctrl;
         int rx_buf_len;
  
         /* Decide whether to use packet split mode or not */
@@ -2148,36 +2191,9 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
  
         if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
                 /* Enable 82599 HW-RSC */
-               for (i = 0; i < adapter->num_rx_queues; i++) {
-                       rx_ring = &adapter->rx_ring[i];
-                       j = rx_ring->reg_idx;
-                       rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(j));
-                       rscctrl |= IXGBE_RSCCTL_RSCEN;
-                       /*
-                        * we must limit the number of descriptors so that the
-                        * total size of max desc * buf_len is not greater
-                        * than 65535
-                        */
-                       if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) {
-#if (MAX_SKB_FRAGS > 16)
-                               rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
-#elif (MAX_SKB_FRAGS > 8)
-                               rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
-#elif (MAX_SKB_FRAGS > 4)
-                               rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
-#else
-                               rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
-#endif
-                       } else {
-                               if (rx_buf_len < IXGBE_RXBUFFER_4096)
-                                       rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
-                               else if (rx_buf_len < IXGBE_RXBUFFER_8192)
-                                       rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
-                               else
-                                       rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
-                       }
-                       IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(j), rscctrl);
-               }
+               for (i = 0; i < adapter->num_rx_queues; i++)
+                       ixgbe_configure_rscctl(adapter, i, rx_buf_len);
+
                 /* Disable RSC for ACK packets */
                 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
                    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
@@ -2926,6 +2942,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
  
         ixgbe_napi_disable_all(adapter);
  
+       clear_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state);
+       del_timer_sync(&adapter->sfp_timer);
         del_timer_sync(&adapter->watchdog_timer);
         cancel_work_sync(&adapter->watchdog_task);
  
@@ -2989,7 +3007,7 @@ static int ixgbe_poll(struct napi_struct *napi, int budget)
         /* If budget not fully consumed, exit the polling mode */
         if (work_done < budget) {
                 napi_complete(napi);
-               if (adapter->itr_setting & 1)
+               if (adapter->rx_itr_setting & 1)
                         ixgbe_set_itr(adapter);
                 if (!test_bit(__IXGBE_DOWN, &adapter->state))
                         ixgbe_irq_enable_queues(adapter, IXGBE_EIMS_RTX_QUEUE);
@@ -3599,7 +3617,10 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
                 if (!q_vector)
                         goto err_out;
                 q_vector->adapter = adapter;
-               q_vector->eitr = adapter->eitr_param;
+               if (q_vector->txr_count && !q_vector->rxr_count)
+                       q_vector->eitr = adapter->tx_eitr_param;
+               else
+                       q_vector->eitr = adapter->rx_eitr_param;
                 q_vector->v_idx = q_idx;
                 netif_napi_add(adapter->netdev, &q_vector->napi, (*poll), 64);
                 adapter->q_vector[q_idx] = q_vector;
@@ -3868,8 +3889,10 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
         hw->fc.disable_fc_autoneg = false;
  
         /* enable itr by default in dynamic mode */
-       adapter->itr_setting = 1;
-       adapter->eitr_param = 20000;
+       adapter->rx_itr_setting = 1;
+       adapter->rx_eitr_param = 20000;
+       adapter->tx_itr_setting = 1;
+       adapter->tx_eitr_param = 10000;
  
         /* set defaults for eitr in MegaBytes */
         adapter->eitr_low = 10;
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c

index f7bdde111dfcba86f0bf821fd332c77580670738..b5aa974827e53bc5b69b209797b332028eec6a42 100644 (file)
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -1469,6 +1469,7 @@ netxen_nic_resume(struct pci_dev *pdev)
         }
  
         netxen_schedule_work(adapter, netxen_fw_poll_work, FW_POLL_DELAY);
+       return 0;
  
  err_out_detach:
         netxen_nic_detach(adapter);
@@ -1903,12 +1904,13 @@ static void netxen_tx_timeout_task(struct work_struct *work)
  
                 netif_wake_queue(adapter->netdev);
  
-               goto done;
+               clear_bit(__NX_RESETTING, &adapter->state);
  
         } else {
+               clear_bit(__NX_RESETTING, &adapter->state);
                 if (!netxen_nic_reset_context(adapter)) {
                         adapter->netdev->trans_start = jiffies;
-                       goto done;
+                       return;
                 }
  
                 /* context reset failed, fall through for fw reset */
@@ -1916,8 +1918,6 @@ static void netxen_tx_timeout_task(struct work_struct *work)
  
  request_reset:
         adapter->need_fw_reset = 1;
-done:
-       clear_bit(__NX_RESETTING, &adapter->state);
  }
  
  struct net_device_stats *netxen_nic_get_stats(struct net_device *netdev)
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c

index 97db1c732342bac4229720951b4938e401a6a3f2..474876c879cba3579f8745e5927c1a1042af8334 100644 (file)
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -340,12 +340,11 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link)
         base = &virt[hw_info[i].offset & (req.Size-1)];
         if ((readb(base+0) == hw_info[i].a0) &&
             (readb(base+2) == hw_info[i].a1) &&
-           (readb(base+4) == hw_info[i].a2))
-           break;
-    }
-    if (i < NR_INFO) {
-       for (j = 0; j < 6; j++)
-           dev->dev_addr[j] = readb(base + (j<<1));
+           (readb(base+4) == hw_info[i].a2)) {
+               for (j = 0; j < 6; j++)
+                   dev->dev_addr[j] = readb(base + (j<<1));
+               break;
+       }
      }
  
      iounmap(virt);
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c

index 07a7e4b8f8fc67a06d1ed8c3ee8a153d923822c1..cc4b2f99989dc5433327ae1b47b184c846da151f 100644 (file)
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -884,13 +884,12 @@ static int efx_wanted_rx_queues(void)
         int count;
         int cpu;
  
-       if (unlikely(!alloc_cpumask_var(&core_mask, GFP_KERNEL))) {
+       if (unlikely(!zalloc_cpumask_var(&core_mask, GFP_KERNEL))) {
                 printk(KERN_WARNING
                        "sfc: RSS disabled due to allocation failure\n");
                 return 1;
         }
  
-       cpumask_clear(core_mask);
         count = 0;
         for_each_online_cpu(cpu) {
                 if (!cpumask_test_cpu(cpu, core_mask)) {
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c

index 15140f9f2e92883bc7d807ee571afa82fe893539..ef1165718dd77328da13bf06308d59baae82bc59 100644 (file)
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1497,7 +1497,6 @@ static int sky2_up(struct net_device *dev)
         if (ramsize > 0) {
                 u32 rxspace;
  
-               hw->flags |= SKY2_HW_RAM_BUFFER;
                 pr_debug(PFX "%s: ram buffer %dK\n", dev->name, ramsize);
                 if (ramsize < 16)
                         rxspace = ramsize / 2;
@@ -2926,6 +2925,9 @@ static int __devinit sky2_init(struct sky2_hw *hw)
                         ++hw->ports;
         }
  
+       if (sky2_read8(hw, B2_E_0))
+               hw->flags |= SKY2_HW_RAM_BUFFER;
+
         return 0;
  }
  
diff --git a/drivers/net/sunvnet.c b/drivers/net/sunvnet.c

index f1e5e4542c2a04084b419c990e67760e05253f82..bc74db0d12f37e0b015f1d6b0d099b9ed77f6bc2 100644 (file)
--- a/drivers/net/sunvnet.c
+++ b/drivers/net/sunvnet.c
@@ -1016,7 +1016,6 @@ static const struct net_device_ops vnet_ops = {
         .ndo_open               = vnet_open,
         .ndo_stop               = vnet_close,
         .ndo_set_multicast_list = vnet_set_rx_mode,
-       .ndo_change_mtu         = eth_change_mtu,
         .ndo_set_mac_address    = vnet_set_mac_addr,
         .ndo_validate_addr      = eth_validate_addr,
         .ndo_tx_timeout         = vnet_tx_timeout,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c

index d3ee1994b02f2c6b19b35fa2e4d1d799503aa207..4fdfa2ae5418e6c030be80b5568e3c12dcbbf3f9 100644 (file)
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -946,8 +946,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
                 char *name;
                 unsigned long flags = 0;
  
-               err = -EINVAL;
-
                 if (!capable(CAP_NET_ADMIN))
                         return -EPERM;
                 err = security_tun_dev_create();
@@ -964,7 +962,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
                         flags |= TUN_TAP_DEV;
                         name = "tap%d";
                 } else
-                       goto failed;
+                       return -EINVAL;
  
                 if (*ifr->ifr_name)
                         name = ifr->ifr_name;
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c

index e2a39b9be96e25fcea7faefca6328e1d3b2d3e2b..e391ef969c288c9ddec730895f8bd2cb96cbfab5 100644 (file)
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -263,6 +263,7 @@ static int kaweth_control(struct kaweth_device *kaweth,
                           int timeout)
  {
         struct usb_ctrlrequest *dr;
+       int retval;
  
         dbg("kaweth_control()");
  
@@ -278,18 +279,21 @@ static int kaweth_control(struct kaweth_device *kaweth,
                 return -ENOMEM;
         }
  
-       dr->bRequestType= requesttype;
+       dr->bRequestType = requesttype;
         dr->bRequest = request;
         dr->wValue = cpu_to_le16(value);
         dr->wIndex = cpu_to_le16(index);
         dr->wLength = cpu_to_le16(size);
  
-       return kaweth_internal_control_msg(kaweth->dev,
-                                       pipe,
-                                       dr,
-                                       data,
-                                       size,
-                                       timeout);
+       retval = kaweth_internal_control_msg(kaweth->dev,
+                                            pipe,
+                                            dr,
+                                            data,
+                                            size,
+                                            timeout);
+
+       kfree(dr);
+       return retval;
  }
  
  /****************************************************************
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c

index 938fb3530a7a779eee3344f2d4cd0dadfc07bdba..c6c922247d0558845bd18d2cf5c66cbf948dcaf7 100644 (file)
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1227,7 +1227,7 @@ static const struct driver_info smsc95xx_info = {
         .rx_fixup       = smsc95xx_rx_fixup,
         .tx_fixup       = smsc95xx_tx_fixup,
         .status         = smsc95xx_status,
-       .flags          = FLAG_ETHER,
+       .flags          = FLAG_ETHER | FLAG_SEND_ZLP,
  };
  
  static const struct usb_device_id products[] = {
@@ -1236,11 +1236,76 @@ static const struct usb_device_id products[] = {
                 USB_DEVICE(0x0424, 0x9500),
                 .driver_info = (unsigned long) &smsc95xx_info,
         },
+       {
+               /* SMSC9505 USB Ethernet Device */
+               USB_DEVICE(0x0424, 0x9505),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
+       {
+               /* SMSC9500A USB Ethernet Device */
+               USB_DEVICE(0x0424, 0x9E00),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
+       {
+               /* SMSC9505A USB Ethernet Device */
+               USB_DEVICE(0x0424, 0x9E01),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
         {
                 /* SMSC9512/9514 USB Hub & Ethernet Device */
                 USB_DEVICE(0x0424, 0xec00),
                 .driver_info = (unsigned long) &smsc95xx_info,
         },
+       {
+               /* SMSC9500 USB Ethernet Device (SAL10) */
+               USB_DEVICE(0x0424, 0x9900),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
+       {
+               /* SMSC9505 USB Ethernet Device (SAL10) */
+               USB_DEVICE(0x0424, 0x9901),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
+       {
+               /* SMSC9500A USB Ethernet Device (SAL10) */
+               USB_DEVICE(0x0424, 0x9902),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
+       {
+               /* SMSC9505A USB Ethernet Device (SAL10) */
+               USB_DEVICE(0x0424, 0x9903),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
+       {
+               /* SMSC9512/9514 USB Hub & Ethernet Device (SAL10) */
+               USB_DEVICE(0x0424, 0x9904),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
+       {
+               /* SMSC9500A USB Ethernet Device (HAL) */
+               USB_DEVICE(0x0424, 0x9905),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
+       {
+               /* SMSC9505A USB Ethernet Device (HAL) */
+               USB_DEVICE(0x0424, 0x9906),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
+       {
+               /* SMSC9500 USB Ethernet Device (Alternate ID) */
+               USB_DEVICE(0x0424, 0x9907),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
+       {
+               /* SMSC9500A USB Ethernet Device (Alternate ID) */
+               USB_DEVICE(0x0424, 0x9908),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
+       {
+               /* SMSC9512/9514 USB Hub & Ethernet Device (Alternate ID) */
+               USB_DEVICE(0x0424, 0x9909),
+               .driver_info = (unsigned long) &smsc95xx_info,
+       },
         { },            /* END */
  };
  MODULE_DEVICE_TABLE(usb, products);
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c

index 24b36f795151bd7a61c91260e95d56830fab29ed..ca5ca5ae061d755ba2ec97f4d8285d7c079680ee 100644 (file)
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1049,7 +1049,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
          * NOTE:  strictly conforming cdc-ether devices should expect
          * the ZLP here, but ignore the one-byte packet.
          */
-       if ((length % dev->maxpacket) == 0) {
+       if (!(info->flags & FLAG_SEND_ZLP) && (length % dev->maxpacket) == 0) {
                 urb->transfer_buffer_length++;
                 if (skb_tailroom(skb)) {
                         skb->data[skb->len] = 0;
diff --git a/drivers/net/wireless/arlan-proc.c b/drivers/net/wireless/arlan-proc.c

index 2ab1d59870f4e1114daca7a1212e43a61026bf68..a8b689635a3ba1a45862bc99ccf9813e09ecf41c 100644 (file)
--- a/drivers/net/wireless/arlan-proc.c
+++ b/drivers/net/wireless/arlan-proc.c
@@ -402,7 +402,7 @@ static int arlan_setup_card_by_book(struct net_device *dev)
  
  static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0";
  
-static int arlan_sysctl_info(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info(ctl_table * ctl, int write,
                       void __user *buffer, size_t * lenp, loff_t *ppos)
  {
         int i;
@@ -629,7 +629,7 @@ final:
         *lenp = pos;
  
         if (!write)
-               retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+               retv = proc_dostring(ctl, write, buffer, lenp, ppos);
         else
         {
                 *lenp = 0;
@@ -639,7 +639,7 @@ final:
  }
  
  
-static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info161719(ctl_table * ctl, int write,
                             void __user *buffer, size_t * lenp, loff_t *ppos)
  {
         int i;
@@ -669,11 +669,11 @@ static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp
  
  final:
         *lenp = pos;
-       retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       retv = proc_dostring(ctl, write, buffer, lenp, ppos);
         return retv;
  }
  
-static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_infotxRing(ctl_table * ctl, int write,
                             void __user *buffer, size_t * lenp, loff_t *ppos)
  {
         int i;
@@ -698,11 +698,11 @@ static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp
         SARLBNpln(u_char, txBuffer, 0x800);
  final:
         *lenp = pos;
-       retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       retv = proc_dostring(ctl, write, buffer, lenp, ppos);
         return retv;
  }
  
-static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_inforxRing(ctl_table * ctl, int write,
                             void __user *buffer, size_t * lenp, loff_t *ppos)
  {
         int i;
@@ -726,11 +726,11 @@ static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp
         SARLBNpln(u_char, rxBuffer, 0x800);
  final:
         *lenp = pos;
-       retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       retv = proc_dostring(ctl, write, buffer, lenp, ppos);
         return retv;
  }
  
-static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info18(ctl_table * ctl, int write,
                         void __user *buffer, size_t * lenp, loff_t *ppos)
  {
         int i;
@@ -756,7 +756,7 @@ static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp,
  
  final:
         *lenp = pos;
-       retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       retv = proc_dostring(ctl, write, buffer, lenp, ppos);
         return retv;
  }
  
@@ -766,7 +766,7 @@ final:
  
  static char conf_reset_result[200];
  
-static int arlan_configure(ctl_table * ctl, int write, struct file *filp,
+static int arlan_configure(ctl_table * ctl, int write,
                     void __user *buffer, size_t * lenp, loff_t *ppos)
  {
         int pos = 0;
@@ -788,10 +788,10 @@ static int arlan_configure(ctl_table * ctl, int write, struct file *filp,
                 return -1;
  
         *lenp = pos;
-       return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       return proc_dostring(ctl, write, buffer, lenp, ppos);
  }
  
-static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_reset(ctl_table * ctl, int write,
                        void __user *buffer, size_t * lenp, loff_t *ppos)
  {
         int pos = 0;
@@ -811,7 +811,7 @@ static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp,
         } else
                 return -1;
         *lenp = pos + 3;
-       return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       return proc_dostring(ctl, write, buffer, lenp, ppos);
  }
  
  
diff --git a/drivers/net/wireless/ath/ar9170/usb.c b/drivers/net/wireless/ath/ar9170/usb.c

index e0138ac8bf504cb5ff5d8093fdae2d10a24aafa7..e974e5829e1a08dc7be666fea2705fc947b91f94 100644 (file)
--- a/drivers/net/wireless/ath/ar9170/usb.c
+++ b/drivers/net/wireless/ath/ar9170/usb.c
@@ -64,6 +64,8 @@ static struct usb_device_id ar9170_usb_ids[] = {
         { USB_DEVICE(0x0cf3, 0x9170) },
         /* Atheros TG121N */
         { USB_DEVICE(0x0cf3, 0x1001) },
+       /* TP-Link TL-WN821N v2 */
+       { USB_DEVICE(0x0cf3, 0x1002) },
         /* Cace Airpcap NX */
         { USB_DEVICE(0xcace, 0x0300) },
         /* D-Link DWA 160A */
diff --git a/drivers/net/wireless/ath/ath9k/calib.c b/drivers/net/wireless/ath/ath9k/calib.c

index 3234995e8881c772a40fe1f975a2028f6a434c69..0ad6d0b76e9ee58a8dc7c5428c4a944d25a67115 100644 (file)
--- a/drivers/net/wireless/ath/ath9k/calib.c
+++ b/drivers/net/wireless/ath/ath9k/calib.c
@@ -609,14 +609,24 @@ void ath9k_hw_loadnf(struct ath_hw *ah, struct ath9k_channel *chan)
                 AR_PHY_CH1_EXT_CCA,
                 AR_PHY_CH2_EXT_CCA
         };
-       u8 chainmask;
+       u8 chainmask, rx_chain_status;
  
+       rx_chain_status = REG_READ(ah, AR_PHY_RX_CHAINMASK);
         if (AR_SREV_9285(ah))
                 chainmask = 0x9;
-       else if (AR_SREV_9280(ah) || AR_SREV_9287(ah))
-               chainmask = 0x1B;
-       else
-               chainmask = 0x3F;
+       else if (AR_SREV_9280(ah) || AR_SREV_9287(ah)) {
+               if ((rx_chain_status & 0x2) || (rx_chain_status & 0x4))
+                       chainmask = 0x1B;
+               else
+                       chainmask = 0x09;
+       } else {
+               if (rx_chain_status & 0x4)
+                       chainmask = 0x3F;
+               else if (rx_chain_status & 0x2)
+                       chainmask = 0x1B;
+               else
+                       chainmask = 0x09;
+       }
  
         h = ah->nfCalHist;
  
@@ -697,6 +707,8 @@ void ath9k_init_nfcal_hist_buffer(struct ath_hw *ah)
                 noise_floor = AR_PHY_CCA_MAX_AR9280_GOOD_VALUE;
         else if (AR_SREV_9285(ah))
                 noise_floor = AR_PHY_CCA_MAX_AR9285_GOOD_VALUE;
+       else if (AR_SREV_9287(ah))
+               noise_floor = AR_PHY_CCA_MAX_AR9287_GOOD_VALUE;
         else
                 noise_floor = AR_PHY_CCA_MAX_AR5416_GOOD_VALUE;
  
@@ -924,6 +936,7 @@ static inline void ath9k_hw_9285_pa_cal(struct ath_hw *ah, bool is_reset)
                 regVal |= (1 << (19 + i));
                 REG_WRITE(ah, 0x7834, regVal);
                 udelay(1);
+               regVal = REG_READ(ah, 0x7834);
                 regVal &= (~(0x1 << (19 + i)));
                 reg_field = MS(REG_READ(ah, 0x7840), AR9285_AN_RXTXBB1_SPARE9);
                 regVal |= (reg_field << (19 + i));
diff --git a/drivers/net/wireless/ath/ath9k/calib.h b/drivers/net/wireless/ath/ath9k/calib.h

index 019bcbba40ed8b43a67aae4aef470f3797213656..9028ab193e4203e8a133efc396b0bd9fb07c62c9 100644 (file)
--- a/drivers/net/wireless/ath/ath9k/calib.h
+++ b/drivers/net/wireless/ath/ath9k/calib.h
@@ -28,6 +28,7 @@ extern const struct ath9k_percal_data adc_init_dc_cal;
  #define AR_PHY_CCA_MAX_AR5416_GOOD_VALUE       -85
  #define AR_PHY_CCA_MAX_AR9280_GOOD_VALUE       -112
  #define AR_PHY_CCA_MAX_AR9285_GOOD_VALUE       -118
+#define AR_PHY_CCA_MAX_AR9287_GOOD_VALUE       -118
  #define AR_PHY_CCA_MAX_HIGH_VALUE                      -62
  #define AR_PHY_CCA_MIN_BAD_VALUE                       -140
  #define AR_PHY_CCA_FILTERWINDOW_LENGTH_INIT     3
diff --git a/drivers/net/wireless/ath/ath9k/eeprom_def.c b/drivers/net/wireless/ath/ath9k/eeprom_def.c

index ae7fb5dcb266f8b9cf2b9c3fc4cbb4fe01d0e5f8..4071fc91da0a94509b4ba3361da0f26278f89bc2 100644 (file)
--- a/drivers/net/wireless/ath/ath9k/eeprom_def.c
+++ b/drivers/net/wireless/ath/ath9k/eeprom_def.c
@@ -509,6 +509,8 @@ static void ath9k_hw_def_set_board_values(struct ath_hw *ah,
                         REG_RMW_FIELD(ah, AR_AN_TOP1, AR_AN_TOP1_DACIPMODE,
                                       eep->baseEepHeader.dacLpMode);
  
+               udelay(100);
+
                 REG_RMW_FIELD(ah, AR_PHY_FRAME_CTL, AR_PHY_FRAME_CTL_TX_CLIP,
                               pModal->miscBits >> 2);
  
@@ -902,7 +904,7 @@ static void ath9k_hw_set_def_power_per_rate_table(struct ath_hw *ah,
                                                   u16 powerLimit)
  {
  #define REDUCE_SCALED_POWER_BY_TWO_CHAIN     6  /* 10*log10(2)*2 */
-#define REDUCE_SCALED_POWER_BY_THREE_CHAIN   10 /* 10*log10(3)*2 */
+#define REDUCE_SCALED_POWER_BY_THREE_CHAIN   9 /* 10*log10(3)*2 */
  
         struct ath_regulatory *regulatory = ath9k_hw_regulatory(ah);
         struct ar5416_eeprom_def *pEepData = &ah->eeprom.def;
diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c

index b6c6cca07812fd4c3c5d998787a5f7e0c4085b49..ca7694caf36425bf278d786af0bdd95c3713b99e 100644 (file)
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -842,7 +842,7 @@ static void ath9k_hw_init_mode_regs(struct ath_hw *ah)
  
  static void ath9k_hw_init_mode_gain_regs(struct ath_hw *ah)
  {
-       if (AR_SREV_9287_11(ah))
+       if (AR_SREV_9287_11_OR_LATER(ah))
                 INIT_INI_ARRAY(&ah->iniModesRxGain,
                 ar9287Modes_rx_gain_9287_1_1,
                 ARRAY_SIZE(ar9287Modes_rx_gain_9287_1_1), 6);
@@ -853,7 +853,7 @@ static void ath9k_hw_init_mode_gain_regs(struct ath_hw *ah)
         else if (AR_SREV_9280_20(ah))
                 ath9k_hw_init_rxgain_ini(ah);
  
-       if (AR_SREV_9287_11(ah)) {
+       if (AR_SREV_9287_11_OR_LATER(ah)) {
                 INIT_INI_ARRAY(&ah->iniModesTxGain,
                 ar9287Modes_tx_gain_9287_1_1,
                 ARRAY_SIZE(ar9287Modes_tx_gain_9287_1_1), 6);
@@ -965,7 +965,7 @@ int ath9k_hw_init(struct ath_hw *ah)
         ath9k_hw_init_mode_regs(ah);
  
         if (ah->is_pciexpress)
-               ath9k_hw_configpcipowersave(ah, 0);
+               ath9k_hw_configpcipowersave(ah, 0, 0);
         else
                 ath9k_hw_disablepcie(ah);
  
@@ -1273,6 +1273,15 @@ static void ath9k_hw_override_ini(struct ath_hw *ah,
          */
         REG_SET_BIT(ah, AR_DIAG_SW, (AR_DIAG_RX_DIS | AR_DIAG_RX_ABORT));
  
+       if (AR_SREV_9280_10_OR_LATER(ah)) {
+               val = REG_READ(ah, AR_PCU_MISC_MODE2) &
+                              (~AR_PCU_MISC_MODE2_HWWAR1);
+
+               if (AR_SREV_9287_10_OR_LATER(ah))
+                       val = val & (~AR_PCU_MISC_MODE2_HWWAR2);
+
+               REG_WRITE(ah, AR_PCU_MISC_MODE2, val);
+       }
  
         if (!AR_SREV_5416_20_OR_LATER(ah) ||
             AR_SREV_9280_10_OR_LATER(ah))
@@ -1784,7 +1793,7 @@ static void ath9k_hw_set_regs(struct ath_hw *ah, struct ath9k_channel *chan,
  static bool ath9k_hw_chip_reset(struct ath_hw *ah,
                                 struct ath9k_channel *chan)
  {
-       if (OLC_FOR_AR9280_20_LATER) {
+       if (AR_SREV_9280(ah) && ah->eep_ops->get_eeprom(ah, EEP_OL_PWRCTRL)) {
                 if (!ath9k_hw_set_reset_reg(ah, ATH9K_RESET_POWER_ON))
                         return false;
         } else if (!ath9k_hw_set_reset_reg(ah, ATH9K_RESET_WARM))
@@ -2338,6 +2347,7 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
         struct ath9k_channel *curchan = ah->curchan;
         u32 saveDefAntenna;
         u32 macStaId1;
+       u64 tsf = 0;
         int i, rx_chainmask, r;
  
         ah->extprotspacing = sc->ht_extprotspacing;
@@ -2347,7 +2357,7 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
         if (!ath9k_hw_setpower(ah, ATH9K_PM_AWAKE))
                 return -EIO;
  
-       if (curchan)
+       if (curchan && !ah->chip_fullsleep)
                 ath9k_hw_getnf(ah, curchan);
  
         if (bChannelChange &&
@@ -2356,8 +2366,8 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
             (chan->channel != ah->curchan->channel) &&
             ((chan->channelFlags & CHANNEL_ALL) ==
              (ah->curchan->channelFlags & CHANNEL_ALL)) &&
-           (!AR_SREV_9280(ah) || (!IS_CHAN_A_5MHZ_SPACED(chan) &&
-                                  !IS_CHAN_A_5MHZ_SPACED(ah->curchan)))) {
+            !(AR_SREV_9280(ah) || IS_CHAN_A_5MHZ_SPACED(chan) ||
+            IS_CHAN_A_5MHZ_SPACED(ah->curchan))) {
  
                 if (ath9k_hw_channel_change(ah, chan, sc->tx_chan_width)) {
                         ath9k_hw_loadnf(ah, ah->curchan);
@@ -2372,6 +2382,10 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
  
         macStaId1 = REG_READ(ah, AR_STA_ID1) & AR_STA_ID1_BASE_RATE_11B;
  
+       /* For chips on which RTC reset is done, save TSF before it gets cleared */
+       if (AR_SREV_9280(ah) && ah->eep_ops->get_eeprom(ah, EEP_OL_PWRCTRL))
+               tsf = ath9k_hw_gettsf64(ah);
+
         saveLedState = REG_READ(ah, AR_CFG_LED) &
                 (AR_CFG_LED_ASSOC_CTL | AR_CFG_LED_MODE_SEL |
                  AR_CFG_LED_BLINK_THRESH_SEL | AR_CFG_LED_BLINK_SLOW);
@@ -2398,6 +2412,10 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan,
                 udelay(50);
         }
  
+       /* Restore TSF */
+       if (tsf && AR_SREV_9280(ah) && ah->eep_ops->get_eeprom(ah, EEP_OL_PWRCTRL))
+               ath9k_hw_settsf64(ah, tsf);
+
         if (AR_SREV_9280_10_OR_LATER(ah))
                 REG_SET_BIT(ah, AR_GPIO_INPUT_EN_VAL, AR_GPIO_JTAG_DISABLE);
  
@@ -3005,9 +3023,10 @@ void ath9k_ps_restore(struct ath_softc *sc)
   * Programming the SerDes must go through the same 288 bit serial shift
   * register as the other analog registers.  Hence the 9 writes.
   */
-void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore)
+void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore, int power_off)
  {
         u8 i;
+       u32 val;
  
         if (ah->is_pciexpress != true)
                 return;
@@ -3017,84 +3036,113 @@ void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore)
                 return;
  
         /* Nothing to do on restore for 11N */
-       if (restore)
-               return;
+       if (!restore) {
+               if (AR_SREV_9280_20_OR_LATER(ah)) {
+                       /*
+                        * AR9280 2.0 or later chips use SerDes values from the
+                        * initvals.h initialized depending on chipset during
+                        * ath9k_hw_init()
+                        */
+                       for (i = 0; i < ah->iniPcieSerdes.ia_rows; i++) {
+                               REG_WRITE(ah, INI_RA(&ah->iniPcieSerdes, i, 0),
+                                         INI_RA(&ah->iniPcieSerdes, i, 1));
+                       }
+               } else if (AR_SREV_9280(ah) &&
+                          (ah->hw_version.macRev == AR_SREV_REVISION_9280_10)) {
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fd00);
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924);
+
+                       /* RX shut off when elecidle is asserted */
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0xa8000019);
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0x13160820);
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980560);
+
+                       /* Shut off CLKREQ active in L1 */
+                       if (ah->config.pcie_clock_req)
+                               REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffc);
+                       else
+                               REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffd);
  
-       if (AR_SREV_9280_20_OR_LATER(ah)) {
-               /*
-                * AR9280 2.0 or later chips use SerDes values from the
-                * initvals.h initialized depending on chipset during
-                * ath9k_hw_init()
-                */
-               for (i = 0; i < ah->iniPcieSerdes.ia_rows; i++) {
-                       REG_WRITE(ah, INI_RA(&ah->iniPcieSerdes, i, 0),
-                                 INI_RA(&ah->iniPcieSerdes, i, 1));
-               }
-       } else if (AR_SREV_9280(ah) &&
-                  (ah->hw_version.macRev == AR_SREV_REVISION_9280_10)) {
-               REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fd00);
-               REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924);
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40);
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554);
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0x00043007);
  
-               /* RX shut off when elecidle is asserted */
-               REG_WRITE(ah, AR_PCIE_SERDES, 0xa8000019);
-               REG_WRITE(ah, AR_PCIE_SERDES, 0x13160820);
-               REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980560);
+                       /* Load the new settings */
+                       REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
  
-               /* Shut off CLKREQ active in L1 */
-               if (ah->config.pcie_clock_req)
-                       REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffc);
-               else
-                       REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffd);
-
-               REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40);
-               REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554);
-               REG_WRITE(ah, AR_PCIE_SERDES, 0x00043007);
+               } else {
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fc00);
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924);
  
-               /* Load the new settings */
-               REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
+                       /* RX shut off when elecidle is asserted */
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0x28000039);
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0x53160824);
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980579);
  
-       } else {
-               REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fc00);
-               REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924);
+                       /*
+                        * Ignore ah->ah_config.pcie_clock_req setting for
+                        * pre-AR9280 11n
+                        */
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0x001defff);
  
-               /* RX shut off when elecidle is asserted */
-               REG_WRITE(ah, AR_PCIE_SERDES, 0x28000039);
-               REG_WRITE(ah, AR_PCIE_SERDES, 0x53160824);
-               REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980579);
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40);
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554);
+                       REG_WRITE(ah, AR_PCIE_SERDES, 0x000e3007);
  
-               /*
-                * Ignore ah->ah_config.pcie_clock_req setting for
-                * pre-AR9280 11n
-                */
-               REG_WRITE(ah, AR_PCIE_SERDES, 0x001defff);
+                       /* Load the new settings */
+                       REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
+               }
  
-               REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40);
-               REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554);
-               REG_WRITE(ah, AR_PCIE_SERDES, 0x000e3007);
+               udelay(1000);
  
-               /* Load the new settings */
-               REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
-       }
+               /* set bit 19 to allow forcing of pcie core into L1 state */
+               REG_SET_BIT(ah, AR_PCIE_PM_CTRL, AR_PCIE_PM_CTRL_ENA);
  
-       udelay(1000);
+               /* Several PCIe massages to ensure proper behaviour */
+               if (ah->config.pcie_waen) {
+                       val = ah->config.pcie_waen;
+                       if (!power_off)
+                               val &= (~AR_WA_D3_L1_DISABLE);
+               } else {
+                       if (AR_SREV_9285(ah) || AR_SREV_9271(ah) ||
+                           AR_SREV_9287(ah)) {
+                               val = AR9285_WA_DEFAULT;
+                               if (!power_off)
+                                       val &= (~AR_WA_D3_L1_DISABLE);
+                       } else if (AR_SREV_9280(ah)) {
+                               /*
+                                * On AR9280 chips bit 22 of 0x4004 needs to be
+                                * set otherwise card may disappear.
+                                */
+                               val = AR9280_WA_DEFAULT;
+                               if (!power_off)
+                                       val &= (~AR_WA_D3_L1_DISABLE);
+                       } else
+                               val = AR_WA_DEFAULT;
+               }
  
-       /* set bit 19 to allow forcing of pcie core into L1 state */
-       REG_SET_BIT(ah, AR_PCIE_PM_CTRL, AR_PCIE_PM_CTRL_ENA);
+               REG_WRITE(ah, AR_WA, val);
+       }
  
-       /* Several PCIe massages to ensure proper behaviour */
-       if (ah->config.pcie_waen) {
-               REG_WRITE(ah, AR_WA, ah->config.pcie_waen);
-       } else {
-               if (AR_SREV_9285(ah) || AR_SREV_9271(ah) || AR_SREV_9287(ah))
-                       REG_WRITE(ah, AR_WA, AR9285_WA_DEFAULT);
+       if (power_off) {
                 /*
-                * On AR9280 chips bit 22 of 0x4004 needs to be set to
-                * otherwise card may disappear.
+                * Set PCIe workaround bits
+                * bit 14 in WA register (disable L1) should only
+                * be set when device enters D3 and be cleared
+                * when device comes back to D0.
                  */
-               else if (AR_SREV_9280(ah))
-                       REG_WRITE(ah, AR_WA, AR9280_WA_DEFAULT);
-               else
-                       REG_WRITE(ah, AR_WA, AR_WA_DEFAULT);
+               if (ah->config.pcie_waen) {
+                       if (ah->config.pcie_waen & AR_WA_D3_L1_DISABLE)
+                               REG_SET_BIT(ah, AR_WA, AR_WA_D3_L1_DISABLE);
+               } else {
+                       if (((AR_SREV_9285(ah) || AR_SREV_9271(ah) ||
+                             AR_SREV_9287(ah)) &&
+                            (AR9285_WA_DEFAULT & AR_WA_D3_L1_DISABLE)) ||
+                           (AR_SREV_9280(ah) &&
+                            (AR9280_WA_DEFAULT & AR_WA_D3_L1_DISABLE))) {
+                               REG_SET_BIT(ah, AR_WA, AR_WA_D3_L1_DISABLE);
+                       }
+               }
         }
  }
  
@@ -3652,15 +3700,7 @@ void ath9k_hw_fill_cap_info(struct ath_hw *ah)
         }
  #endif
  
-       if ((ah->hw_version.macVersion == AR_SREV_VERSION_5416_PCI) ||
-           (ah->hw_version.macVersion == AR_SREV_VERSION_5416_PCIE) ||
-           (ah->hw_version.macVersion == AR_SREV_VERSION_9160) ||
-           (ah->hw_version.macVersion == AR_SREV_VERSION_9100) ||
-           (ah->hw_version.macVersion == AR_SREV_VERSION_9280) ||
-           (ah->hw_version.macVersion == AR_SREV_VERSION_9285))
-               pCap->hw_caps &= ~ATH9K_HW_CAP_AUTOSLEEP;
-       else
-               pCap->hw_caps |= ATH9K_HW_CAP_AUTOSLEEP;
+       pCap->hw_caps &= ~ATH9K_HW_CAP_AUTOSLEEP;
  
         if (AR_SREV_9280(ah) || AR_SREV_9285(ah))
                 pCap->hw_caps &= ~ATH9K_HW_CAP_4KB_SPLITTRANS;
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h

index 9106a0b537dd0eee34df9764863192ae89ad9d90..b8923457182992df11bc43c90e39ae5b3bff8d4a 100644 (file)
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -106,7 +106,7 @@
  #define AH_TSF_WRITE_TIMEOUT        100    /* (us) */
  #define AH_TIME_QUANTUM             10
  #define AR_KEYTABLE_SIZE            128
-#define POWER_UP_TIME               200000
+#define POWER_UP_TIME               10000
  #define SPUR_RSSI_THRESH            40
  
  #define CAB_TIMEOUT_VAL             10
@@ -650,7 +650,7 @@ void ath9k_hw_set_sta_beacon_timers(struct ath_hw *ah,
                                     const struct ath9k_beacon_state *bs);
  bool ath9k_hw_setpower(struct ath_hw *ah,
                        enum ath9k_power_mode mode);
-void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore);
+void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore, int power_off);
  
  /* Interrupt Handling */
  bool ath9k_hw_intrpend(struct ath_hw *ah);
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c

index 3dc7b5a13e64d49bc245d38a82765a2ed56f647b..52bed89063d4e6a0941541683dc116e034ac22fb 100644 (file)
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1131,7 +1131,7 @@ void ath_radio_enable(struct ath_softc *sc)
         int r;
  
         ath9k_ps_wakeup(sc);
-       ath9k_hw_configpcipowersave(ah, 0);
+       ath9k_hw_configpcipowersave(ah, 0, 0);
  
         if (!ah->curchan)
                 ah->curchan = ath_get_curchannel(sc, sc->hw);
@@ -1202,7 +1202,7 @@ void ath_radio_disable(struct ath_softc *sc)
         spin_unlock_bh(&sc->sc_resetlock);
  
         ath9k_hw_phy_disable(ah);
-       ath9k_hw_configpcipowersave(ah, 1);
+       ath9k_hw_configpcipowersave(ah, 1, 1);
         ath9k_ps_restore(sc);
         ath9k_hw_setpower(ah, ATH9K_PM_FULL_SLEEP);
  }
@@ -1226,11 +1226,6 @@ static void ath9k_rfkill_poll_state(struct ieee80211_hw *hw)
         bool blocked = !!ath_is_rfkill_set(sc);
  
         wiphy_rfkill_set_hw_state(hw->wiphy, blocked);
-
-       if (blocked)
-               ath_radio_disable(sc);
-       else
-               ath_radio_enable(sc);
  }
  
  static void ath_start_rfkill_poll(struct ath_softc *sc)
@@ -1260,6 +1255,7 @@ void ath_detach(struct ath_softc *sc)
         DPRINTF(sc, ATH_DBG_CONFIG, "Detach ATH hw\n");
  
         ath_deinit_leds(sc);
+       wiphy_rfkill_stop_polling(sc->hw->wiphy);
  
         for (i = 0; i < sc->num_sec_wiphy; i++) {
                 struct ath_wiphy *aphy = sc->sec_wiphy[i];
@@ -1942,7 +1938,7 @@ static int ath9k_start(struct ieee80211_hw *hw)
         init_channel = ath_get_curchannel(sc, hw);
  
         /* Reset SERDES registers */
-       ath9k_hw_configpcipowersave(sc->sc_ah, 0);
+       ath9k_hw_configpcipowersave(sc->sc_ah, 0, 0);
  
         /*
          * The basic interface to setting the hardware in a good
@@ -2166,11 +2162,9 @@ static void ath9k_stop(struct ieee80211_hw *hw)
         } else
                 sc->rx.rxlink = NULL;
  
-       wiphy_rfkill_stop_polling(sc->hw->wiphy);
-
         /* disable HAL and put h/w to sleep */
         ath9k_hw_disable(sc->sc_ah);
-       ath9k_hw_configpcipowersave(sc->sc_ah, 1);
+       ath9k_hw_configpcipowersave(sc->sc_ah, 1, 1);
         ath9k_hw_setpower(sc->sc_ah, ATH9K_PM_FULL_SLEEP);
  
         sc->sc_flags |= SC_OP_INVALID;
diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h

index e5c29eb86e80b9f0024aad8f757699a1083f33d7..d83b77f821e96200812c59ad46c023ad79d6004f 100644 (file)
--- a/drivers/net/wireless/ath/ath9k/reg.h
+++ b/drivers/net/wireless/ath/ath9k/reg.h
@@ -676,8 +676,9 @@
  #define AR_RC_HOSTIF         0x00000100
  
  #define AR_WA                          0x4004
+#define AR_WA_D3_L1_DISABLE            (1 << 14)
  #define AR9285_WA_DEFAULT              0x004a05cb
-#define AR9280_WA_DEFAULT              0x0040073f
+#define AR9280_WA_DEFAULT              0x0040073b
  #define AR_WA_DEFAULT                  0x0000073f
  
  
diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig

index 83e38134accb876cce7f12b743abb72e423c2fce..54ea61c15d8bd18ac16a50df94f25f9c242f6b67 100644 (file)
--- a/drivers/net/wireless/b43/Kconfig
+++ b/drivers/net/wireless/b43/Kconfig
@@ -61,11 +61,28 @@ config B43_PCMCIA
  
           If unsure, say N.
  
+config B43_SDIO
+       bool "Broadcom 43xx SDIO device support (EXPERIMENTAL)"
+       depends on B43 && SSB_SDIOHOST_POSSIBLE && EXPERIMENTAL
+       select SSB_SDIOHOST
+       ---help---
+         Broadcom 43xx device support for Soft-MAC SDIO devices.
+
+         With this config option you can drive Soft-MAC b43 cards with a
+         Secure Digital I/O interface.
+         This includes the WLAN daughter card found on the Nintendo Wii
+         video game console.
+         Note that this does not support Broadcom 43xx Full-MAC devices.
+
+         It's safe to select Y here, even if you don't have a B43 SDIO device.
+
+         If unsure, say N.
+
  # Data transfers to the device via PIO
-# This is only needed on PCMCIA devices. All others can do DMA properly.
+# This is only needed on PCMCIA and SDIO devices. All others can do DMA properly.
  config B43_PIO
         bool
-       depends on B43 && (B43_PCMCIA || B43_FORCE_PIO)
+       depends on B43 && (B43_SDIO || B43_PCMCIA || B43_FORCE_PIO)
         select SSB_BLOCKIO
         default y
  
diff --git a/drivers/net/wireless/b43/Makefile b/drivers/net/wireless/b43/Makefile

index da379f4b0c3a31f9bbfff19c4367b0ff2ff10a21..84772a2542dca6a21148e2449e544027213c5b39 100644 (file)
--- a/drivers/net/wireless/b43/Makefile
+++ b/drivers/net/wireless/b43/Makefile
@@ -16,6 +16,7 @@ b43-$(CONFIG_B43_PIO)         += pio.o
  b43-y                          += rfkill.o
  b43-$(CONFIG_B43_LEDS)         += leds.o
  b43-$(CONFIG_B43_PCMCIA)       += pcmcia.o
+b43-$(CONFIG_B43_SDIO)         += sdio.o
  b43-$(CONFIG_B43_DEBUG)                += debugfs.o
  
  obj-$(CONFIG_B43)              += b43.o
diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h

index 09cfe68537b6e2a2057a08fc7a6db2b9a9e9b11b..fa1549a03c71f328cc005f26a7c199e19394ba65 100644 (file)
--- a/drivers/net/wireless/b43/b43.h
+++ b/drivers/net/wireless/b43/b43.h
@@ -629,13 +629,6 @@ struct b43_wl {
          * from the mac80211 subsystem. */
         u16 mac80211_initially_registered_queues;
  
-       /* R/W lock for data transmission.
-        * Transmissions on 2+ queues can run concurrently, but somebody else
-        * might sync with TX by write_lock_irqsave()'ing. */
-       rwlock_t tx_lock;
-       /* Lock for LEDs access. */
-       spinlock_t leds_lock;
-
         /* We can only have one operating interface (802.11 core)
          * at a time. General information about this interface follows.
          */
@@ -686,6 +679,9 @@ struct b43_wl {
         struct work_struct tx_work;
         /* Queue of packets to be transmitted. */
         struct sk_buff_head tx_queue;
+
+       /* The device LEDs. */
+       struct b43_leds leds;
  };
  
  /* The type of the firmware file. */
@@ -768,13 +764,10 @@ struct b43_wldev {
         /* The device initialization status.
          * Use b43_status() to query. */
         atomic_t __init_status;
-       /* Saved init status for handling suspend. */
-       int suspend_init_status;
  
         bool bad_frames_preempt;        /* Use "Bad Frames Preemption" (default off) */
         bool dfq_valid;         /* Directed frame queue valid (IBSS PS mode, ATIM) */
         bool radio_hw_enable;   /* saved state of radio hardware enabled state */
-       bool suspend_in_progress;       /* TRUE, if we are in a suspend/resume cycle */
         bool qos_enabled;               /* TRUE, if QoS is used. */
         bool hwcrypto_enabled;          /* TRUE, if HW crypto acceleration is enabled. */
  
@@ -794,12 +787,6 @@ struct b43_wldev {
         /* Various statistics about the physical device. */
         struct b43_stats stats;
  
-       /* The device LEDs. */
-       struct b43_led led_tx;
-       struct b43_led led_rx;
-       struct b43_led led_assoc;
-       struct b43_led led_radio;
-
         /* Reason code of the last interrupt. */
         u32 irq_reason;
         u32 dma_reason[6];
@@ -830,6 +817,10 @@ struct b43_wldev {
         /* Debugging stuff follows. */
  #ifdef CONFIG_B43_DEBUG
         struct b43_dfsentry *dfsentry;
+       unsigned int irq_count;
+       unsigned int irq_bit_count[32];
+       unsigned int tx_count;
+       unsigned int rx_count;
  #endif
  };
  
diff --git a/drivers/net/wireless/b43/debugfs.c b/drivers/net/wireless/b43/debugfs.c

index 8f64943e3f608ae30e7fa1504bd152fd84fadb6b..80b19a44a407ea40bd79a2ed157ea463aad846a2 100644 (file)
--- a/drivers/net/wireless/b43/debugfs.c
+++ b/drivers/net/wireless/b43/debugfs.c
@@ -689,6 +689,7 @@ static void b43_add_dynamic_debug(struct b43_wldev *dev)
         add_dyn_dbg("debug_lo", B43_DBG_LO, 0);
         add_dyn_dbg("debug_firmware", B43_DBG_FIRMWARE, 0);
         add_dyn_dbg("debug_keys", B43_DBG_KEYS, 0);
+       add_dyn_dbg("debug_verbose_stats", B43_DBG_VERBOSESTATS, 0);
  
  #undef add_dyn_dbg
  }
diff --git a/drivers/net/wireless/b43/debugfs.h b/drivers/net/wireless/b43/debugfs.h

index e47b4b488b047ab86946908d68fb6e22d21fcd27..822aad8842f4defbe8b24af07b4f0559f53223b5 100644 (file)
--- a/drivers/net/wireless/b43/debugfs.h
+++ b/drivers/net/wireless/b43/debugfs.h
@@ -13,6 +13,7 @@ enum b43_dyndbg {             /* Dynamic debugging features */
         B43_DBG_LO,
         B43_DBG_FIRMWARE,
         B43_DBG_KEYS,
+       B43_DBG_VERBOSESTATS,
         __B43_NR_DYNDBG,
  };
  
diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c

index a467ee260a19ebd5f31fb72554321af7a5cb0036..8701034569fa520dc09caf90756badc913913714 100644 (file)
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -1428,9 +1428,9 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
                                 ring->nr_failed_tx_packets++;
                         ring->nr_total_packet_tries += status->frame_count;
  #endif /* DEBUG */
-                       ieee80211_tx_status_irqsafe(dev->wl->hw, meta->skb);
+                       ieee80211_tx_status(dev->wl->hw, meta->skb);
  
-                       /* skb is freed by ieee80211_tx_status_irqsafe() */
+                       /* skb is freed by ieee80211_tx_status() */
                         meta->skb = NULL;
                 } else {
                         /* No need to call free_descriptor_buffer here, as
diff --git a/drivers/net/wireless/b43/leds.c b/drivers/net/wireless/b43/leds.c

index c8b317094c31b489dfbb9091bb7c648f3060d927..fbe3d4f62ce2bbdfb6e67826f97fcf6c06889f43 100644 (file)
--- a/drivers/net/wireless/b43/leds.c
+++ b/drivers/net/wireless/b43/leds.c
@@ -34,57 +34,88 @@
  static void b43_led_turn_on(struct b43_wldev *dev, u8 led_index,
                             bool activelow)
  {
-       struct b43_wl *wl = dev->wl;
-       unsigned long flags;
         u16 ctl;
  
-       spin_lock_irqsave(&wl->leds_lock, flags);
         ctl = b43_read16(dev, B43_MMIO_GPIO_CONTROL);
         if (activelow)
                 ctl &= ~(1 << led_index);
         else
                 ctl |= (1 << led_index);
         b43_write16(dev, B43_MMIO_GPIO_CONTROL, ctl);
-       spin_unlock_irqrestore(&wl->leds_lock, flags);
  }
  
  static void b43_led_turn_off(struct b43_wldev *dev, u8 led_index,
                              bool activelow)
  {
-       struct b43_wl *wl = dev->wl;
-       unsigned long flags;
         u16 ctl;
  
-       spin_lock_irqsave(&wl->leds_lock, flags);
         ctl = b43_read16(dev, B43_MMIO_GPIO_CONTROL);
         if (activelow)
                 ctl |= (1 << led_index);
         else
                 ctl &= ~(1 << led_index);
         b43_write16(dev, B43_MMIO_GPIO_CONTROL, ctl);
-       spin_unlock_irqrestore(&wl->leds_lock, flags);
  }
  
-/* Callback from the LED subsystem. */
-static void b43_led_brightness_set(struct led_classdev *led_dev,
-                                  enum led_brightness brightness)
+static void b43_led_update(struct b43_wldev *dev,
+                          struct b43_led *led)
  {
-       struct b43_led *led = container_of(led_dev, struct b43_led, led_dev);
-       struct b43_wldev *dev = led->dev;
         bool radio_enabled;
+       bool turn_on;
  
-       if (unlikely(b43_status(dev) < B43_STAT_INITIALIZED))
+       if (!led->wl)
                 return;
  
-       /* Checking the radio-enabled status here is slightly racy,
-        * but we want to avoid the locking overhead and we don't care
-        * whether the LED has the wrong state for a second. */
         radio_enabled = (dev->phy.radio_on && dev->radio_hw_enable);
  
-       if (brightness == LED_OFF || !radio_enabled)
-               b43_led_turn_off(dev, led->index, led->activelow);
+       /* The led->state read is racy, but we don't care. In case we raced
+        * with the brightness_set handler, we will be called again soon
+        * to fixup our state. */
+       if (radio_enabled)
+               turn_on = atomic_read(&led->state) != LED_OFF;
         else
+               turn_on = 0;
+       if (turn_on == led->hw_state)
+               return;
+       led->hw_state = turn_on;
+
+       if (turn_on)
                 b43_led_turn_on(dev, led->index, led->activelow);
+       else
+               b43_led_turn_off(dev, led->index, led->activelow);
+}
+
+static void b43_leds_work(struct work_struct *work)
+{
+       struct b43_leds *leds = container_of(work, struct b43_leds, work);
+       struct b43_wl *wl = container_of(leds, struct b43_wl, leds);
+       struct b43_wldev *dev;
+
+       mutex_lock(&wl->mutex);
+       dev = wl->current_dev;
+       if (unlikely(!dev || b43_status(dev) < B43_STAT_STARTED))
+               goto out_unlock;
+
+       b43_led_update(dev, &wl->leds.led_tx);
+       b43_led_update(dev, &wl->leds.led_rx);
+       b43_led_update(dev, &wl->leds.led_radio);
+       b43_led_update(dev, &wl->leds.led_assoc);
+
+out_unlock:
+       mutex_unlock(&wl->mutex);
+}
+
+/* Callback from the LED subsystem. */
+static void b43_led_brightness_set(struct led_classdev *led_dev,
+                                  enum led_brightness brightness)
+{
+       struct b43_led *led = container_of(led_dev, struct b43_led, led_dev);
+       struct b43_wl *wl = led->wl;
+
+       if (likely(!wl->leds.stop)) {
+               atomic_set(&led->state, brightness);
+               ieee80211_queue_work(wl->hw, &wl->leds.work);
+       }
  }
  
  static int b43_register_led(struct b43_wldev *dev, struct b43_led *led,
@@ -93,15 +124,15 @@ static int b43_register_led(struct b43_wldev *dev, struct b43_led *led,
  {
         int err;
  
-       b43_led_turn_off(dev, led_index, activelow);
-       if (led->dev)
+       if (led->wl)
                 return -EEXIST;
         if (!default_trigger)
                 return -EINVAL;
-       led->dev = dev;
+       led->wl = dev->wl;
         led->index = led_index;
         led->activelow = activelow;
         strncpy(led->name, name, sizeof(led->name));
+       atomic_set(&led->state, 0);
  
         led->led_dev.name = led->name;
         led->led_dev.default_trigger = default_trigger;
@@ -110,19 +141,19 @@ static int b43_register_led(struct b43_wldev *dev, struct b43_led *led,
         err = led_classdev_register(dev->dev->dev, &led->led_dev);
         if (err) {
                 b43warn(dev->wl, "LEDs: Failed to register %s\n", name);
-               led->dev = NULL;
+               led->wl = NULL;
                 return err;
         }
+
         return 0;
  }
  
  static void b43_unregister_led(struct b43_led *led)
  {
-       if (!led->dev)
+       if (!led->wl)
                 return;
         led_classdev_unregister(&led->led_dev);
-       b43_led_turn_off(led->dev, led->index, led->activelow);
-       led->dev = NULL;
+       led->wl = NULL;
  }
  
  static void b43_map_led(struct b43_wldev *dev,
@@ -137,24 +168,20 @@ static void b43_map_led(struct b43_wldev *dev,
          * generic LED triggers. */
         switch (behaviour) {
         case B43_LED_INACTIVE:
-               break;
         case B43_LED_OFF:
-               b43_led_turn_off(dev, led_index, activelow);
-               break;
         case B43_LED_ON:
-               b43_led_turn_on(dev, led_index, activelow);
                 break;
         case B43_LED_ACTIVITY:
         case B43_LED_TRANSFER:
         case B43_LED_APTRANSFER:
                 snprintf(name, sizeof(name),
                          "b43-%s::tx", wiphy_name(hw->wiphy));
-               b43_register_led(dev, &dev->led_tx, name,
+               b43_register_led(dev, &dev->wl->leds.led_tx, name,
                                  ieee80211_get_tx_led_name(hw),
                                  led_index, activelow);
                 snprintf(name, sizeof(name),
                          "b43-%s::rx", wiphy_name(hw->wiphy));
-               b43_register_led(dev, &dev->led_rx, name,
+               b43_register_led(dev, &dev->wl->leds.led_rx, name,
                                  ieee80211_get_rx_led_name(hw),
                                  led_index, activelow);
                 break;
@@ -164,18 +191,15 @@ static void b43_map_led(struct b43_wldev *dev,
         case B43_LED_MODE_BG:
                 snprintf(name, sizeof(name),
                          "b43-%s::radio", wiphy_name(hw->wiphy));
-               b43_register_led(dev, &dev->led_radio, name,
+               b43_register_led(dev, &dev->wl->leds.led_radio, name,
                                  ieee80211_get_radio_led_name(hw),
                                  led_index, activelow);
-               /* Sync the RF-kill LED state with radio and switch states. */
-               if (dev->phy.radio_on && b43_is_hw_radio_enabled(dev))
-                       b43_led_turn_on(dev, led_index, activelow);
                 break;
         case B43_LED_WEIRD:
         case B43_LED_ASSOC:
                 snprintf(name, sizeof(name),
                          "b43-%s::assoc", wiphy_name(hw->wiphy));
-               b43_register_led(dev, &dev->led_assoc, name,
+               b43_register_led(dev, &dev->wl->leds.led_assoc, name,
                                  ieee80211_get_assoc_led_name(hw),
                                  led_index, activelow);
                 break;
@@ -186,58 +210,150 @@ static void b43_map_led(struct b43_wldev *dev,
         }
  }
  
-void b43_leds_init(struct b43_wldev *dev)
+static void b43_led_get_sprominfo(struct b43_wldev *dev,
+                                 unsigned int led_index,
+                                 enum b43_led_behaviour *behaviour,
+                                 bool *activelow)
  {
         struct ssb_bus *bus = dev->dev->bus;
         u8 sprom[4];
-       int i;
-       enum b43_led_behaviour behaviour;
-       bool activelow;
  
         sprom[0] = bus->sprom.gpio0;
         sprom[1] = bus->sprom.gpio1;
         sprom[2] = bus->sprom.gpio2;
         sprom[3] = bus->sprom.gpio3;
  
-       for (i = 0; i < 4; i++) {
-               if (sprom[i] == 0xFF) {
-                       /* There is no LED information in the SPROM
-                        * for this LED. Hardcode it here. */
-                       activelow = 0;
-                       switch (i) {
-                       case 0:
-                               behaviour = B43_LED_ACTIVITY;
-                               activelow = 1;
-                               if (bus->boardinfo.vendor == PCI_VENDOR_ID_COMPAQ)
-                                       behaviour = B43_LED_RADIO_ALL;
-                               break;
-                       case 1:
-                               behaviour = B43_LED_RADIO_B;
-                               if (bus->boardinfo.vendor == PCI_VENDOR_ID_ASUSTEK)
-                                       behaviour = B43_LED_ASSOC;
-                               break;
-                       case 2:
-                               behaviour = B43_LED_RADIO_A;
-                               break;
-                       case 3:
-                               behaviour = B43_LED_OFF;
-                               break;
-                       default:
-                               B43_WARN_ON(1);
-                               return;
-                       }
+       if (sprom[led_index] == 0xFF) {
+               /* There is no LED information in the SPROM
+                * for this LED. Hardcode it here. */
+               *activelow = 0;
+               switch (led_index) {
+               case 0:
+                       *behaviour = B43_LED_ACTIVITY;
+                       *activelow = 1;
+                       if (bus->boardinfo.vendor == PCI_VENDOR_ID_COMPAQ)
+                               *behaviour = B43_LED_RADIO_ALL;
+                       break;
+               case 1:
+                       *behaviour = B43_LED_RADIO_B;
+                       if (bus->boardinfo.vendor == PCI_VENDOR_ID_ASUSTEK)
+                               *behaviour = B43_LED_ASSOC;
+                       break;
+               case 2:
+                       *behaviour = B43_LED_RADIO_A;
+                       break;
+               case 3:
+                       *behaviour = B43_LED_OFF;
+                       break;
+               default:
+                       B43_WARN_ON(1);
+                       return;
+               }
+       } else {
+               *behaviour = sprom[led_index] & B43_LED_BEHAVIOUR;
+               *activelow = !!(sprom[led_index] & B43_LED_ACTIVELOW);
+       }
+}
+
+void b43_leds_init(struct b43_wldev *dev)
+{
+       struct b43_led *led;
+       unsigned int i;
+       enum b43_led_behaviour behaviour;
+       bool activelow;
+
+       /* Sync the RF-kill LED state (if we have one) with radio and switch states. */
+       led = &dev->wl->leds.led_radio;
+       if (led->wl) {
+               if (dev->phy.radio_on && b43_is_hw_radio_enabled(dev)) {
+                       b43_led_turn_on(dev, led->index, led->activelow);
+                       led->hw_state = 1;
+                       atomic_set(&led->state, 1);
                 } else {
-                       behaviour = sprom[i] & B43_LED_BEHAVIOUR;
-                       activelow = !!(sprom[i] & B43_LED_ACTIVELOW);
+                       b43_led_turn_off(dev, led->index, led->activelow);
+                       led->hw_state = 0;
+                       atomic_set(&led->state, 0);
                 }
-               b43_map_led(dev, i, behaviour, activelow);
         }
+
+       /* Initialize TX/RX/ASSOC leds */
+       led = &dev->wl->leds.led_tx;
+       if (led->wl) {
+               b43_led_turn_off(dev, led->index, led->activelow);
+               led->hw_state = 0;
+               atomic_set(&led->state, 0);
+       }
+       led = &dev->wl->leds.led_rx;
+       if (led->wl) {
+               b43_led_turn_off(dev, led->index, led->activelow);
+               led->hw_state = 0;
+               atomic_set(&led->state, 0);
+       }
+       led = &dev->wl->leds.led_assoc;
+       if (led->wl) {
+               b43_led_turn_off(dev, led->index, led->activelow);
+               led->hw_state = 0;
+               atomic_set(&led->state, 0);
+       }
+
+       /* Initialize other LED states. */
+       for (i = 0; i < B43_MAX_NR_LEDS; i++) {
+               b43_led_get_sprominfo(dev, i, &behaviour, &activelow);
+               switch (behaviour) {
+               case B43_LED_OFF:
+                       b43_led_turn_off(dev, i, activelow);
+                       break;
+               case B43_LED_ON:
+                       b43_led_turn_on(dev, i, activelow);
+                       break;
+               default:
+                       /* Leave others as-is. */
+                       break;
+               }
+       }
+
+       dev->wl->leds.stop = 0;
  }
  
  void b43_leds_exit(struct b43_wldev *dev)
  {
-       b43_unregister_led(&dev->led_tx);
-       b43_unregister_led(&dev->led_rx);
-       b43_unregister_led(&dev->led_assoc);
-       b43_unregister_led(&dev->led_radio);
+       struct b43_leds *leds = &dev->wl->leds;
+
+       b43_led_turn_off(dev, leds->led_tx.index, leds->led_tx.activelow);
+       b43_led_turn_off(dev, leds->led_rx.index, leds->led_rx.activelow);
+       b43_led_turn_off(dev, leds->led_assoc.index, leds->led_assoc.activelow);
+       b43_led_turn_off(dev, leds->led_radio.index, leds->led_radio.activelow);
+}
+
+void b43_leds_stop(struct b43_wldev *dev)
+{
+       struct b43_leds *leds = &dev->wl->leds;
+
+       leds->stop = 1;
+       cancel_work_sync(&leds->work);
+}
+
+void b43_leds_register(struct b43_wldev *dev)
+{
+       unsigned int i;
+       enum b43_led_behaviour behaviour;
+       bool activelow;
+
+       INIT_WORK(&dev->wl->leds.work, b43_leds_work);
+
+       /* Register the LEDs to the LED subsystem. */
+       for (i = 0; i < B43_MAX_NR_LEDS; i++) {
+               b43_led_get_sprominfo(dev, i, &behaviour, &activelow);
+               b43_map_led(dev, i, behaviour, activelow);
+       }
+}
+
+void b43_leds_unregister(struct b43_wldev *dev)
+{
+       struct b43_leds *leds = &dev->wl->leds;
+
+       b43_unregister_led(&leds->led_tx);
+       b43_unregister_led(&leds->led_rx);
+       b43_unregister_led(&leds->led_assoc);
+       b43_unregister_led(&leds->led_radio);
  }
diff --git a/drivers/net/wireless/b43/leds.h b/drivers/net/wireless/b43/leds.h

index b8b1dd5212434d1afda597c4ab1d5624ee9dcfbe..9592e4c5a5f5596ee8c4e041307f2d137067dc15 100644 (file)
--- a/drivers/net/wireless/b43/leds.h
+++ b/drivers/net/wireless/b43/leds.h
@@ -7,12 +7,13 @@ struct b43_wldev;
  
  #include <linux/types.h>
  #include <linux/leds.h>
+#include <linux/workqueue.h>
  
  
  #define B43_LED_MAX_NAME_LEN   31
  
  struct b43_led {
-       struct b43_wldev *dev;
+       struct b43_wl *wl;
         /* The LED class device */
         struct led_classdev led_dev;
         /* The index number of the LED. */
@@ -22,8 +23,24 @@ struct b43_led {
         bool activelow;
         /* The unique name string for this LED device. */
         char name[B43_LED_MAX_NAME_LEN + 1];
+       /* The current status of the LED. This is updated locklessly. */
+       atomic_t state;
+       /* The active state in hardware. */
+       bool hw_state;
  };
  
+struct b43_leds {
+       struct b43_led led_tx;
+       struct b43_led led_rx;
+       struct b43_led led_radio;
+       struct b43_led led_assoc;
+
+       bool stop;
+       struct work_struct work;
+};
+
+#define B43_MAX_NR_LEDS                        4
+
  #define B43_LED_BEHAVIOUR              0x7F
  #define B43_LED_ACTIVELOW              0x80
  /* LED behaviour values */
@@ -42,23 +59,35 @@ enum b43_led_behaviour {
         B43_LED_INACTIVE,
  };
  
+void b43_leds_register(struct b43_wldev *dev);
+void b43_leds_unregister(struct b43_wldev *dev);
  void b43_leds_init(struct b43_wldev *dev);
  void b43_leds_exit(struct b43_wldev *dev);
+void b43_leds_stop(struct b43_wldev *dev);
  
  
  #else /* CONFIG_B43_LEDS */
  /* LED support disabled */
  
-struct b43_led {
+struct b43_leds {
         /* empty */
  };
  
+static inline void b43_leds_register(struct b43_wldev *dev)
+{
+}
+static inline void b43_leds_unregister(struct b43_wldev *dev)
+{
+}
  static inline void b43_leds_init(struct b43_wldev *dev)
  {
  }
  static inline void b43_leds_exit(struct b43_wldev *dev)
  {
  }
+static inline void b43_leds_stop(struct b43_wldev *dev)
+{
+}
  #endif /* CONFIG_B43_LEDS */
  
  #endif /* B43_LEDS_H_ */
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c

index e789792a36bc468f88d688f984c299a78927424c..9b907a36bb8c90608170d1505428c34f898e6164 100644 (file)
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -8,6 +8,9 @@
    Copyright (c) 2005 Danny van Dyk <kugelfang@gentoo.org>
    Copyright (c) 2005 Andreas Jaggi <andreas.jaggi@waterwave.ch>
  
+  SDIO support
+  Copyright (c) 2009 Albert Herranz <albert_herranz@yahoo.es>
+
    Some parts of the code in this file are derived from the ipw2200
    driver  Copyright(c) 2003 - 2004 Intel Corporation.
  
@@ -53,6 +56,8 @@
  #include "xmit.h"
  #include "lo.h"
  #include "pcmcia.h"
+#include "sdio.h"
+#include <linux/mmc/sdio_func.h>
  
  MODULE_DESCRIPTION("Broadcom B43 wireless driver");
  MODULE_AUTHOR("Martin Langer");
@@ -1587,7 +1592,7 @@ static void b43_beacon_update_trigger_work(struct work_struct *work)
         mutex_lock(&wl->mutex);
         dev = wl->current_dev;
         if (likely(dev && (b43_status(dev) >= B43_STAT_INITIALIZED))) {
-               if (0 /*FIXME dev->dev->bus->bustype == SSB_BUSTYPE_SDIO*/) {
+               if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) {
                         /* wl->mutex is enough. */
                         b43_do_beacon_update_trigger_work(dev);
                         mmiowb();
@@ -1825,6 +1830,16 @@ static void b43_do_interrupt_thread(struct b43_wldev *dev)
  
         /* Re-enable interrupts on the device by restoring the current interrupt mask. */
         b43_write32(dev, B43_MMIO_GEN_IRQ_MASK, dev->irq_mask);
+
+#if B43_DEBUG
+       if (b43_debug(dev, B43_DBG_VERBOSESTATS)) {
+               dev->irq_count++;
+               for (i = 0; i < ARRAY_SIZE(dev->irq_bit_count); i++) {
+                       if (reason & (1 << i))
+                               dev->irq_bit_count[i]++;
+               }
+       }
+#endif
  }
  
  /* Interrupt thread handler. Handles device interrupts in thread context. */
@@ -1905,6 +1920,21 @@ static irqreturn_t b43_interrupt_handler(int irq, void *dev_id)
         return ret;
  }
  
+/* SDIO interrupt handler. This runs in process context. */
+static void b43_sdio_interrupt_handler(struct b43_wldev *dev)
+{
+       struct b43_wl *wl = dev->wl;
+       irqreturn_t ret;
+
+       mutex_lock(&wl->mutex);
+
+       ret = b43_do_interrupt(dev);
+       if (ret == IRQ_WAKE_THREAD)
+               b43_do_interrupt_thread(dev);
+
+       mutex_unlock(&wl->mutex);
+}
+
  void b43_do_release_fw(struct b43_firmware_file *fw)
  {
         release_firmware(fw->data);
@@ -2645,6 +2675,20 @@ static void b43_adjust_opmode(struct b43_wldev *dev)
                         cfp_pretbtt = 50;
         }
         b43_write16(dev, 0x612, cfp_pretbtt);
+
+       /* FIXME: We don't currently implement the PMQ mechanism,
+        *        so always disable it. If we want to implement PMQ,
+        *        we need to enable it here (clear DISCPMQ) in AP mode.
+        */
+       if (0  /* ctl & B43_MACCTL_AP */) {
+               b43_write32(dev, B43_MMIO_MACCTL,
+                           b43_read32(dev, B43_MMIO_MACCTL)
+                           & ~B43_MACCTL_DISCPMQ);
+       } else {
+               b43_write32(dev, B43_MMIO_MACCTL,
+                           b43_read32(dev, B43_MMIO_MACCTL)
+                           | B43_MACCTL_DISCPMQ);
+       }
  }
  
  static void b43_rate_memory_write(struct b43_wldev *dev, u16 rate, int is_ofdm)
@@ -2873,6 +2917,27 @@ static void b43_periodic_every15sec(struct b43_wldev *dev)
  
         atomic_set(&phy->txerr_cnt, B43_PHY_TX_BADNESS_LIMIT);
         wmb();
+
+#if B43_DEBUG
+       if (b43_debug(dev, B43_DBG_VERBOSESTATS)) {
+               unsigned int i;
+
+               b43dbg(dev->wl, "Stats: %7u IRQs/sec, %7u TX/sec, %7u RX/sec\n",
+                      dev->irq_count / 15,
+                      dev->tx_count / 15,
+                      dev->rx_count / 15);
+               dev->irq_count = 0;
+               dev->tx_count = 0;
+               dev->rx_count = 0;
+               for (i = 0; i < ARRAY_SIZE(dev->irq_bit_count); i++) {
+                       if (dev->irq_bit_count[i]) {
+                               b43dbg(dev->wl, "Stats: %7u IRQ-%02u/sec (0x%08X)\n",
+                                      dev->irq_bit_count[i] / 15, i, (1 << i));
+                               dev->irq_bit_count[i] = 0;
+                       }
+               }
+       }
+#endif
  }
  
  static void do_periodic_work(struct b43_wldev *dev)
@@ -3002,14 +3067,18 @@ static void b43_security_init(struct b43_wldev *dev)
  static int b43_rng_read(struct hwrng *rng, u32 *data)
  {
         struct b43_wl *wl = (struct b43_wl *)rng->priv;
+       struct b43_wldev *dev;
+       int count = -ENODEV;
  
-       /* FIXME: We need to take wl->mutex here to make sure the device
-        * is not going away from under our ass. However it could deadlock
-        * with hwrng internal locking. */
-
-       *data = b43_read16(wl->current_dev, B43_MMIO_RNG);
+       mutex_lock(&wl->mutex);
+       dev = wl->current_dev;
+       if (likely(dev && b43_status(dev) >= B43_STAT_INITIALIZED)) {
+               *data = b43_read16(dev, B43_MMIO_RNG);
+               count = sizeof(u16);
+       }
+       mutex_unlock(&wl->mutex);
  
-       return (sizeof(u16));
+       return count;
  }
  #endif /* CONFIG_B43_HWRNG */
  
@@ -3068,6 +3137,9 @@ static void b43_tx_work(struct work_struct *work)
                         dev_kfree_skb(skb); /* Drop it */
         }
  
+#if B43_DEBUG
+       dev->tx_count++;
+#endif
         mutex_unlock(&wl->mutex);
  }
  
@@ -3820,7 +3892,7 @@ redo:
  
         /* Disable interrupts on the device. */
         b43_set_status(dev, B43_STAT_INITIALIZED);
-       if (0 /*FIXME dev->dev->bus->bustype == SSB_BUSTYPE_SDIO*/) {
+       if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) {
                 /* wl->mutex is locked. That is enough. */
                 b43_write32(dev, B43_MMIO_GEN_IRQ_MASK, 0);
                 b43_read32(dev, B43_MMIO_GEN_IRQ_MASK); /* Flush */
@@ -3830,10 +3902,15 @@ redo:
                 b43_read32(dev, B43_MMIO_GEN_IRQ_MASK); /* Flush */
                 spin_unlock_irq(&wl->hardirq_lock);
         }
-       /* Synchronize the interrupt handlers. Unlock to avoid deadlocks. */
+       /* Synchronize and free the interrupt handlers. Unlock to avoid deadlocks. */
         orig_dev = dev;
         mutex_unlock(&wl->mutex);
-       synchronize_irq(dev->dev->irq);
+       if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) {
+               b43_sdio_free_irq(dev);
+       } else {
+               synchronize_irq(dev->dev->irq);
+               free_irq(dev->dev->irq, dev);
+       }
         mutex_lock(&wl->mutex);
         dev = wl->current_dev;
         if (!dev)
@@ -3850,7 +3927,7 @@ redo:
                 dev_kfree_skb(skb_dequeue(&wl->tx_queue));
  
         b43_mac_suspend(dev);
-       free_irq(dev->dev->irq, dev);
+       b43_leds_exit(dev);
         b43dbg(wl, "Wireless interface stopped\n");
  
         return dev;
@@ -3864,12 +3941,20 @@ static int b43_wireless_core_start(struct b43_wldev *dev)
         B43_WARN_ON(b43_status(dev) != B43_STAT_INITIALIZED);
  
         drain_txstatus_queue(dev);
-       err = request_threaded_irq(dev->dev->irq, b43_interrupt_handler,
-                                  b43_interrupt_thread_handler,
-                                  IRQF_SHARED, KBUILD_MODNAME, dev);
-       if (err) {
-               b43err(dev->wl, "Cannot request IRQ-%d\n", dev->dev->irq);
-               goto out;
+       if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) {
+               err = b43_sdio_request_irq(dev, b43_sdio_interrupt_handler);
+               if (err) {
+                       b43err(dev->wl, "Cannot request SDIO IRQ\n");
+                       goto out;
+               }
+       } else {
+               err = request_threaded_irq(dev->dev->irq, b43_interrupt_handler,
+                                          b43_interrupt_thread_handler,
+                                          IRQF_SHARED, KBUILD_MODNAME, dev);
+               if (err) {
+                       b43err(dev->wl, "Cannot request IRQ-%d\n", dev->dev->irq);
+                       goto out;
+               }
         }
  
         /* We are ready to run. */
@@ -3882,8 +3967,10 @@ static int b43_wireless_core_start(struct b43_wldev *dev)
         /* Start maintainance work */
         b43_periodic_tasks_setup(dev);
  
+       b43_leds_init(dev);
+
         b43dbg(dev->wl, "Wireless interface started\n");
-      out:
+out:
         return err;
  }
  
@@ -4160,10 +4247,6 @@ static void b43_wireless_core_exit(struct b43_wldev *dev)
         macctl |= B43_MACCTL_PSM_JMP0;
         b43_write32(dev, B43_MMIO_MACCTL, macctl);
  
-       if (!dev->suspend_in_progress) {
-               b43_leds_exit(dev);
-               b43_rng_exit(dev->wl);
-       }
         b43_dma_free(dev);
         b43_pio_free(dev);
         b43_chip_exit(dev);
@@ -4180,7 +4263,6 @@ static void b43_wireless_core_exit(struct b43_wldev *dev)
  /* Initialize a wireless core */
  static int b43_wireless_core_init(struct b43_wldev *dev)
  {
-       struct b43_wl *wl = dev->wl;
         struct ssb_bus *bus = dev->dev->bus;
         struct ssb_sprom *sprom = &bus->sprom;
         struct b43_phy *phy = &dev->phy;
@@ -4264,7 +4346,9 @@ static int b43_wireless_core_init(struct b43_wldev *dev)
         /* Maximum Contention Window */
         b43_shm_write16(dev, B43_SHM_SCRATCH, B43_SHM_SC_MAXCONT, 0x3FF);
  
-       if ((dev->dev->bus->bustype == SSB_BUSTYPE_PCMCIA) || B43_FORCE_PIO) {
+       if ((dev->dev->bus->bustype == SSB_BUSTYPE_PCMCIA) ||
+           (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) ||
+           B43_FORCE_PIO) {
                 dev->__using_pio_transfers = 1;
                 err = b43_pio_init(dev);
         } else {
@@ -4280,15 +4364,13 @@ static int b43_wireless_core_init(struct b43_wldev *dev)
         ssb_bus_powerup(bus, !(sprom->boardflags_lo & B43_BFL_XTAL_NOSLOW));
         b43_upload_card_macaddress(dev);
         b43_security_init(dev);
-       if (!dev->suspend_in_progress)
-               b43_rng_init(wl);
+
+       ieee80211_wake_queues(dev->wl->hw);
  
         ieee80211_wake_queues(dev->wl->hw);
  
         b43_set_status(dev, B43_STAT_INITIALIZED);
  
-       if (!dev->suspend_in_progress)
-               b43_leds_init(dev);
  out:
         return err;
  
@@ -4837,7 +4919,6 @@ static int b43_wireless_init(struct ssb_device *dev)
  
         /* Initialize struct b43_wl */
         wl->hw = hw;
-       spin_lock_init(&wl->leds_lock);
         mutex_init(&wl->mutex);
         spin_lock_init(&wl->hardirq_lock);
         INIT_LIST_HEAD(&wl->devlist);
@@ -4878,6 +4959,8 @@ static int b43_probe(struct ssb_device *dev, const struct ssb_device_id *id)
                 err = ieee80211_register_hw(wl->hw);
                 if (err)
                         goto err_one_core_detach;
+               b43_leds_register(wl->current_dev);
+               b43_rng_init(wl);
         }
  
        out:
@@ -4906,12 +4989,15 @@ static void b43_remove(struct ssb_device *dev)
                  * might have modified it. Restoring is important, so the networking
                  * stack can properly free resources. */
                 wl->hw->queues = wl->mac80211_initially_registered_queues;
+               b43_leds_stop(wldev);
                 ieee80211_unregister_hw(wl->hw);
         }
  
         b43_one_core_detach(dev);
  
         if (list_empty(&wl->devlist)) {
+               b43_rng_exit(wl);
+               b43_leds_unregister(wldev);
                 /* Last core on the chip unregistered.
                  * We can destroy common struct b43_wl.
                  */
@@ -4929,80 +5015,17 @@ void b43_controller_restart(struct b43_wldev *dev, const char *reason)
         ieee80211_queue_work(dev->wl->hw, &dev->restart_work);
  }
  
-#ifdef CONFIG_PM
-
-static int b43_suspend(struct ssb_device *dev, pm_message_t state)
-{
-       struct b43_wldev *wldev = ssb_get_drvdata(dev);
-       struct b43_wl *wl = wldev->wl;
-
-       b43dbg(wl, "Suspending...\n");
-
-       mutex_lock(&wl->mutex);
-       wldev->suspend_in_progress = true;
-       wldev->suspend_init_status = b43_status(wldev);
-       if (wldev->suspend_init_status >= B43_STAT_STARTED)
-               wldev = b43_wireless_core_stop(wldev);
-       if (wldev && wldev->suspend_init_status >= B43_STAT_INITIALIZED)
-               b43_wireless_core_exit(wldev);
-       mutex_unlock(&wl->mutex);
-
-       b43dbg(wl, "Device suspended.\n");
-
-       return 0;
-}
-
-static int b43_resume(struct ssb_device *dev)
-{
-       struct b43_wldev *wldev = ssb_get_drvdata(dev);
-       struct b43_wl *wl = wldev->wl;
-       int err = 0;
-
-       b43dbg(wl, "Resuming...\n");
-
-       mutex_lock(&wl->mutex);
-       if (wldev->suspend_init_status >= B43_STAT_INITIALIZED) {
-               err = b43_wireless_core_init(wldev);
-               if (err) {
-                       b43err(wl, "Resume failed at core init\n");
-                       goto out;
-               }
-       }
-       if (wldev->suspend_init_status >= B43_STAT_STARTED) {
-               err = b43_wireless_core_start(wldev);
-               if (err) {
-                       b43_leds_exit(wldev);
-                       b43_rng_exit(wldev->wl);
-                       b43_wireless_core_exit(wldev);
-                       b43err(wl, "Resume failed at core start\n");
-                       goto out;
-               }
-       }
-       b43dbg(wl, "Device resumed.\n");
- out:
-       wldev->suspend_in_progress = false;
-       mutex_unlock(&wl->mutex);
-       return err;
-}
-
-#else /* CONFIG_PM */
-# define b43_suspend   NULL
-# define b43_resume    NULL
-#endif /* CONFIG_PM */
-
  static struct ssb_driver b43_ssb_driver = {
         .name           = KBUILD_MODNAME,
         .id_table       = b43_ssb_tbl,
         .probe          = b43_probe,
         .remove         = b43_remove,
-       .suspend        = b43_suspend,
-       .resume         = b43_resume,
  };
  
  static void b43_print_driverinfo(void)
  {
         const char *feat_pci = "", *feat_pcmcia = "", *feat_nphy = "",
-                  *feat_leds = "";
+                  *feat_leds = "", *feat_sdio = "";
  
  #ifdef CONFIG_B43_PCI_AUTOSELECT
         feat_pci = "P";
@@ -5015,12 +5038,15 @@ static void b43_print_driverinfo(void)
  #endif
  #ifdef CONFIG_B43_LEDS
         feat_leds = "L";
+#endif
+#ifdef CONFIG_B43_SDIO
+       feat_sdio = "S";
  #endif
         printk(KERN_INFO "Broadcom 43xx driver loaded "
-              "[ Features: %s%s%s%s, Firmware-ID: "
+              "[ Features: %s%s%s%s%s, Firmware-ID: "
                B43_SUPPORTED_FIRMWARE_ID " ]\n",
                feat_pci, feat_pcmcia, feat_nphy,
-              feat_leds);
+              feat_leds, feat_sdio);
  }
  
  static int __init b43_init(void)
@@ -5031,13 +5057,18 @@ static int __init b43_init(void)
         err = b43_pcmcia_init();
         if (err)
                 goto err_dfs_exit;
-       err = ssb_driver_register(&b43_ssb_driver);
+       err = b43_sdio_init();
         if (err)
                 goto err_pcmcia_exit;
+       err = ssb_driver_register(&b43_ssb_driver);
+       if (err)
+               goto err_sdio_exit;
         b43_print_driverinfo();
  
         return err;
  
+err_sdio_exit:
+       b43_sdio_exit();
  err_pcmcia_exit:
         b43_pcmcia_exit();
  err_dfs_exit:
@@ -5048,6 +5079,7 @@ err_dfs_exit:
  static void __exit b43_exit(void)
  {
         ssb_driver_unregister(&b43_ssb_driver);
+       b43_sdio_exit();
         b43_pcmcia_exit();
         b43_debugfs_exit();
  }
diff --git a/drivers/net/wireless/b43/phy_lp.c b/drivers/net/wireless/b43/phy_lp.c

index 3e02d969f6839b70416da8cc1abdb9f6d11fd9d0..1e318d815a5b97b4a98d6c66304e2a2e65dcd439 100644 (file)
--- a/drivers/net/wireless/b43/phy_lp.c
+++ b/drivers/net/wireless/b43/phy_lp.c
@@ -2228,6 +2228,16 @@ static enum b43_txpwr_result b43_lpphy_op_recalc_txpower(struct b43_wldev *dev,
         return B43_TXPWR_RES_DONE;
  }
  
+void b43_lpphy_op_switch_analog(struct b43_wldev *dev, bool on)
+{
+       if (on) {
+               b43_phy_mask(dev, B43_LPPHY_AFE_CTL_OVR, 0xfff8);
+       } else {
+               b43_phy_set(dev, B43_LPPHY_AFE_CTL_OVRVAL, 0x0007);
+               b43_phy_set(dev, B43_LPPHY_AFE_CTL_OVR, 0x0007);
+       }
+}
+
  const struct b43_phy_operations b43_phyops_lp = {
         .allocate               = b43_lpphy_op_allocate,
         .free                   = b43_lpphy_op_free,
@@ -2239,7 +2249,7 @@ const struct b43_phy_operations b43_phyops_lp = {
         .radio_read             = b43_lpphy_op_radio_read,
         .radio_write            = b43_lpphy_op_radio_write,
         .software_rfkill        = b43_lpphy_op_software_rfkill,
-       .switch_analog          = b43_phyop_switch_analog_generic,
+       .switch_analog          = b43_lpphy_op_switch_analog,
         .switch_channel         = b43_lpphy_op_switch_channel,
         .get_default_chan       = b43_lpphy_op_get_default_chan,
         .set_rx_antenna         = b43_lpphy_op_set_rx_antenna,
diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c

index 3498b68385e78086e5d60d20cd4dca84cb2251ad..e96091b314995ec0ebd2f66bfc860a61ca255ac9 100644 (file)
--- a/drivers/net/wireless/b43/pio.c
+++ b/drivers/net/wireless/b43/pio.c
@@ -574,7 +574,7 @@ void b43_pio_handle_txstatus(struct b43_wldev *dev,
         q->buffer_used -= total_len;
         q->free_packet_slots += 1;
  
-       ieee80211_tx_status_irqsafe(dev->wl->hw, pack->skb);
+       ieee80211_tx_status(dev->wl->hw, pack->skb);
         pack->skb = NULL;
         list_add(&pack->list, &q->packets_list);
  
diff --git a/drivers/net/wireless/b43/rfkill.c b/drivers/net/wireless/b43/rfkill.c

index 31e55999893f7084803221af05cb741e443ecf11..7a3218c5ba7dd69085afbddd65d38d6d40a08b08 100644 (file)
--- a/drivers/net/wireless/b43/rfkill.c
+++ b/drivers/net/wireless/b43/rfkill.c
@@ -28,7 +28,7 @@
  /* Returns TRUE, if the radio is enabled in hardware. */
  bool b43_is_hw_radio_enabled(struct b43_wldev *dev)
  {
-       if (dev->phy.rev >= 3) {
+       if (dev->phy.rev >= 3 || dev->phy.type == B43_PHYTYPE_LP) {
                 if (!(b43_read32(dev, B43_MMIO_RADIO_HWENABLED_HI)
                       & B43_MMIO_RADIO_HWENABLED_HI_MASK))
                         return 1;
diff --git a/drivers/net/wireless/b43/sdio.c b/drivers/net/wireless/b43/sdio.c

new file mode 100644 (file)

index 0000000..0d3ac64
--- /dev/null
+++ b/drivers/net/wireless/b43/sdio.c
@@ -0,0 +1,202 @@
+/*
+ * Broadcom B43 wireless driver
+ *
+ * SDIO over Sonics Silicon Backplane bus glue for b43.
+ *
+ * Copyright (C) 2009 Albert Herranz
+ * Copyright (C) 2009 Michael Buesch <mb@bu3sch.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/sdio_func.h>
+#include <linux/mmc/sdio_ids.h>
+#include <linux/ssb/ssb.h>
+
+#include "sdio.h"
+#include "b43.h"
+
+
+#define HNBU_CHIPID            0x01    /* vendor & device id */
+
+#define B43_SDIO_BLOCK_SIZE    64      /* rx fifo max size in bytes */
+
+
+static const struct b43_sdio_quirk {
+       u16 vendor;
+       u16 device;
+       unsigned int quirks;
+} b43_sdio_quirks[] = {
+       { 0x14E4, 0x4318, SSB_QUIRK_SDIO_READ_AFTER_WRITE32, },
+       { },
+};
+
+
+static unsigned int b43_sdio_get_quirks(u16 vendor, u16 device)
+{
+       const struct b43_sdio_quirk *q;
+
+       for (q = b43_sdio_quirks; q->quirks; q++) {
+               if (vendor == q->vendor && device == q->device)
+                       return q->quirks;
+       }
+
+       return 0;
+}
+
+static void b43_sdio_interrupt_dispatcher(struct sdio_func *func)
+{
+       struct b43_sdio *sdio = sdio_get_drvdata(func);
+       struct b43_wldev *dev = sdio->irq_handler_opaque;
+
+       if (unlikely(b43_status(dev) < B43_STAT_STARTED))
+               return;
+
+       sdio_release_host(func);
+       sdio->irq_handler(dev);
+       sdio_claim_host(func);
+}
+
+int b43_sdio_request_irq(struct b43_wldev *dev,
+                        void (*handler)(struct b43_wldev *dev))
+{
+       struct ssb_bus *bus = dev->dev->bus;
+       struct sdio_func *func = bus->host_sdio;
+       struct b43_sdio *sdio = sdio_get_drvdata(func);
+       int err;
+
+       sdio->irq_handler_opaque = dev;
+       sdio->irq_handler = handler;
+       sdio_claim_host(func);
+       err = sdio_claim_irq(func, b43_sdio_interrupt_dispatcher);
+       sdio_release_host(func);
+
+       return err;
+}
+
+void b43_sdio_free_irq(struct b43_wldev *dev)
+{
+       struct ssb_bus *bus = dev->dev->bus;
+       struct sdio_func *func = bus->host_sdio;
+       struct b43_sdio *sdio = sdio_get_drvdata(func);
+
+       sdio_claim_host(func);
+       sdio_release_irq(func);
+       sdio_release_host(func);
+       sdio->irq_handler_opaque = NULL;
+       sdio->irq_handler = NULL;
+}
+
+static int b43_sdio_probe(struct sdio_func *func,
+                         const struct sdio_device_id *id)
+{
+       struct b43_sdio *sdio;
+       struct sdio_func_tuple *tuple;
+       u16 vendor = 0, device = 0;
+       int error;
+
+       /* Look for the card chip identifier. */
+       tuple = func->tuples;
+       while (tuple) {
+               switch (tuple->code) {
+               case 0x80:
+                       switch (tuple->data[0]) {
+                       case HNBU_CHIPID:
+                               if (tuple->size != 5)
+                                       break;
+                               vendor = tuple->data[1] | (tuple->data[2]<<8);
+                               device = tuple->data[3] | (tuple->data[4]<<8);
+                               dev_info(&func->dev, "Chip ID %04x:%04x\n",
+                                        vendor, device);
+                               break;
+                       default:
+                               break;
+                       }
+                       break;
+               default:
+                       break;
+               }
+               tuple = tuple->next;
+       }
+       if (!vendor || !device) {
+               error = -ENODEV;
+               goto out;
+       }
+
+       sdio_claim_host(func);
+       error = sdio_set_block_size(func, B43_SDIO_BLOCK_SIZE);
+       if (error) {
+               dev_err(&func->dev, "failed to set block size to %u bytes,"
+                       " error %d\n", B43_SDIO_BLOCK_SIZE, error);
+               goto err_release_host;
+       }
+       error = sdio_enable_func(func);
+       if (error) {
+               dev_err(&func->dev, "failed to enable func, error %d\n", error);
+               goto err_release_host;
+       }
+       sdio_release_host(func);
+
+       sdio = kzalloc(sizeof(*sdio), GFP_KERNEL);
+       if (!sdio) {
+               error = -ENOMEM;
+               dev_err(&func->dev, "failed to allocate ssb bus\n");
+               goto err_disable_func;
+       }
+       error = ssb_bus_sdiobus_register(&sdio->ssb, func,
+                                        b43_sdio_get_quirks(vendor, device));
+       if (error) {
+               dev_err(&func->dev, "failed to register ssb sdio bus,"
+                       " error %d\n", error);
+               goto err_free_ssb;
+       }
+       sdio_set_drvdata(func, sdio);
+
+       return 0;
+
+err_free_ssb:
+       kfree(sdio);
+err_disable_func:
+       sdio_disable_func(func);
+err_release_host:
+       sdio_release_host(func);
+out:
+       return error;
+}
+
+static void b43_sdio_remove(struct sdio_func *func)
+{
+       struct b43_sdio *sdio = sdio_get_drvdata(func);
+
+       ssb_bus_unregister(&sdio->ssb);
+       sdio_disable_func(func);
+       kfree(sdio);
+       sdio_set_drvdata(func, NULL);
+}
+
+static const struct sdio_device_id b43_sdio_ids[] = {
+       { SDIO_DEVICE(0x02d0, 0x044b) }, /* Nintendo Wii WLAN daughter card */
+       { },
+};
+
+static struct sdio_driver b43_sdio_driver = {
+       .name           = "b43-sdio",
+       .id_table       = b43_sdio_ids,
+       .probe          = b43_sdio_probe,
+       .remove         = b43_sdio_remove,
+};
+
+int b43_sdio_init(void)
+{
+       return sdio_register_driver(&b43_sdio_driver);
+}
+
+void b43_sdio_exit(void)
+{
+       sdio_unregister_driver(&b43_sdio_driver);
+}
diff --git a/drivers/net/wireless/b43/sdio.h b/drivers/net/wireless/b43/sdio.h

new file mode 100644 (file)

index 0000000..fb63309
--- /dev/null
+++ b/drivers/net/wireless/b43/sdio.h
@@ -0,0 +1,45 @@
+#ifndef B43_SDIO_H_
+#define B43_SDIO_H_
+
+#include <linux/ssb/ssb.h>
+
+struct b43_wldev;
+
+
+#ifdef CONFIG_B43_SDIO
+
+struct b43_sdio {
+       struct ssb_bus ssb;
+       void *irq_handler_opaque;
+       void (*irq_handler)(struct b43_wldev *dev);
+};
+
+int b43_sdio_request_irq(struct b43_wldev *dev,
+                        void (*handler)(struct b43_wldev *dev));
+void b43_sdio_free_irq(struct b43_wldev *dev);
+
+int b43_sdio_init(void);
+void b43_sdio_exit(void);
+
+
+#else /* CONFIG_B43_SDIO */
+
+
+int b43_sdio_request_irq(struct b43_wldev *dev,
+                        void (*handler)(struct b43_wldev *dev))
+{
+       return -ENODEV;
+}
+void b43_sdio_free_irq(struct b43_wldev *dev)
+{
+}
+static inline int b43_sdio_init(void)
+{
+       return 0;
+}
+static inline void b43_sdio_exit(void)
+{
+}
+
+#endif /* CONFIG_B43_SDIO */
+#endif /* B43_SDIO_H_ */
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c

index 14f541248b5c13e885cae1cf94add88854653822..ac9f600995e406ab4804b0b775eda826bfbc8dae 100644 (file)
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -690,8 +690,11 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr)
         }
  
         memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
-       ieee80211_rx_irqsafe(dev->wl->hw, skb);
+       ieee80211_rx(dev->wl->hw, skb);
  
+#if B43_DEBUG
+       dev->rx_count++;
+#endif
         return;
  drop:
         b43dbg(dev->wl, "RX: Packet dropped\n");
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c

index ca61d3796cef644798a1d81e67e52c734f4854d7..3259b88415445ca0428bc03c41fe4e5110f7b015 100644 (file)
--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
@@ -2021,6 +2021,12 @@ static int iwl4965_tx_status_reply_tx(struct iwl_priv *priv,
                                            agg->frame_count, txq_id, idx);
  
                         hdr = iwl_tx_queue_get_hdr(priv, txq_id, idx);
+                       if (!hdr) {
+                               IWL_ERR(priv,
+                                       "BUG_ON idx doesn't point to valid skb"
+                                       " idx=%d, txq_id=%d\n", idx, txq_id);
+                               return -1;
+                       }
  
                         sc = le16_to_cpu(hdr->seq_ctrl);
                         if (idx != (SEQ_TO_SN(sc) & 0xff)) {
diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c

index 1d539e3b8db1d77eae65ff56392b39a19afa615e..a6391c7fea532eb7e0d73b59beffec10e2abc322 100644 (file)
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -1163,6 +1163,12 @@ static int iwl5000_tx_status_reply_tx(struct iwl_priv *priv,
                                            agg->frame_count, txq_id, idx);
  
                         hdr = iwl_tx_queue_get_hdr(priv, txq_id, idx);
+                       if (!hdr) {
+                               IWL_ERR(priv,
+                                       "BUG_ON idx doesn't point to valid skb"
+                                       " idx=%d, txq_id=%d\n", idx, txq_id);
+                               return -1;
+                       }
  
                         sc = le16_to_cpu(hdr->seq_ctrl);
                         if (idx != (SEQ_TO_SN(sc) & 0xff)) {
diff --git a/drivers/net/wireless/iwlwifi/iwl-rx.c b/drivers/net/wireless/iwlwifi/iwl-rx.c

index b90adcb73b061d53dfe745112ce6c80e95e9cd1d..8e1bb53c0aa3ffe1fa14291c88a697670f89836c 100644 (file)
--- a/drivers/net/wireless/iwlwifi/iwl-rx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-rx.c
@@ -250,12 +250,20 @@ void iwl_rx_allocate(struct iwl_priv *priv, gfp_t priority)
                 }
                 spin_unlock_irqrestore(&rxq->lock, flags);
  
+               if (rxq->free_count > RX_LOW_WATERMARK)
+                       priority |= __GFP_NOWARN;
                 /* Alloc a new receive buffer */
                 skb = alloc_skb(priv->hw_params.rx_buf_size + 256,
                                                 priority);
  
                 if (!skb) {
-                       IWL_CRIT(priv, "Can not allocate SKB buffers\n");
+                       if (net_ratelimit())
+                               IWL_DEBUG_INFO(priv, "Failed to allocate SKB buffer.\n");
+                       if ((rxq->free_count <= RX_LOW_WATERMARK) &&
+                           net_ratelimit())
+                               IWL_CRIT(priv, "Failed to allocate SKB buffer with %s. Only %u free buffers remaining.\n",
+                                        priority == GFP_ATOMIC ?  "GFP_ATOMIC" : "GFP_KERNEL",
+                                        rxq->free_count);
                         /* We don't reschedule replenish work here -- we will
                          * call the restock method and if it still needs
                          * more buffers it will schedule replenish */
diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c

index a2b9ec82b9650da71b0fad38e1e6783299f65266..c6633fec8216f5f6551e9eba141cc0966494e004 100644 (file)
--- a/drivers/net/wireless/iwlwifi/iwl-sta.c
+++ b/drivers/net/wireless/iwlwifi/iwl-sta.c
@@ -520,7 +520,7 @@ int iwl_send_static_wepkey_cmd(struct iwl_priv *priv, u8 send_if_empty)
         struct iwl_host_cmd cmd = {
                 .id = REPLY_WEPKEY,
                 .data = wep_cmd,
-               .flags = CMD_SYNC,
+               .flags = CMD_ASYNC,
         };
  
         memset(wep_cmd, 0, cmd_size +
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c

index 090966837f3cc1e6d37b9bd2117a47fdaa0b7c7b..4f2d43937283ed02c5a21b8dc4338b81eb6f3de7 100644 (file)
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -1146,11 +1146,18 @@ static void iwl3945_rx_allocate(struct iwl_priv *priv, gfp_t priority)
                 }
                 spin_unlock_irqrestore(&rxq->lock, flags);
  
+               if (rxq->free_count > RX_LOW_WATERMARK)
+                       priority |= __GFP_NOWARN;
                 /* Alloc a new receive buffer */
                 skb = alloc_skb(priv->hw_params.rx_buf_size, priority);
                 if (!skb) {
                         if (net_ratelimit())
-                               IWL_CRIT(priv, ": Can not allocate SKB buffers\n");
+                               IWL_DEBUG_INFO(priv, "Failed to allocate SKB buffer.\n");
+                       if ((rxq->free_count <= RX_LOW_WATERMARK) &&
+                           net_ratelimit())
+                               IWL_CRIT(priv, "Failed to allocate SKB buffer with %s. Only %u free buffers remaining.\n",
+                                        priority == GFP_ATOMIC ?  "GFP_ATOMIC" : "GFP_KERNEL",
+                                        rxq->free_count);
                         /* We don't reschedule replenish work here -- we will
                          * call the restock method and if it still needs
                          * more buffers it will schedule replenish */
diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h

index 5462cb5ad994b099d30e06c7de2638a764eaa012..567f029a8cda1fff4c9d9e7896b6068ef9f9a676 100644 (file)
--- a/drivers/net/wireless/rt2x00/rt2x00lib.h
+++ b/drivers/net/wireless/rt2x00/rt2x00lib.h
@@ -380,7 +380,7 @@ static inline void rt2x00crypto_tx_insert_iv(struct sk_buff *skb,
  {
  }
  
-static inline void rt2x00crypto_rx_insert_iv(struct sk_buff *skb, bool l2pad,
+static inline void rt2x00crypto_rx_insert_iv(struct sk_buff *skb,
                                              unsigned int header_length,
                                              struct rxdone_entry_desc *rxdesc)
  {
diff --git a/drivers/net/wireless/wl12xx/Kconfig b/drivers/net/wireless/wl12xx/Kconfig

index 7b14d5bc63d60f38f0ac1400f6e2461ee2a5640c..88060e117541d64e651fcc441aa84ec664f7621e 100644 (file)
--- a/drivers/net/wireless/wl12xx/Kconfig
+++ b/drivers/net/wireless/wl12xx/Kconfig
@@ -1,5 +1,5 @@
  menuconfig WL12XX
-       boolean "TI wl12xx driver support"
+       tristate "TI wl12xx driver support"
         depends on MAC80211 && WLAN_80211 && EXPERIMENTAL
         ---help---
           This will enable TI wl12xx driver support. The drivers make
diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c

index 38688847d5683f3a2a96bac07a6fef4286508733..23a6a6d4863bd8c3468ac2b21276125201419faa 100644 (file)
--- a/drivers/net/wireless/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zd1211rw/zd_usb.c
@@ -1070,7 +1070,7 @@ static int eject_installer(struct usb_interface *intf)
  
         /* Find bulk out endpoint */
         endpoint = &iface_desc->endpoint[1].desc;
-       if ((endpoint->bEndpointAddress & USB_TYPE_MASK) == USB_DIR_OUT &&
+       if (usb_endpoint_dir_out(endpoint) &&
             usb_endpoint_xfer_bulk(endpoint)) {
                 bulk_out_ep = endpoint->bEndpointAddress;
         } else {
diff --git a/drivers/net/xilinx_emaclite.c b/drivers/net/xilinx_emaclite.c

index dc22782633a5b4ac58eb04c285055ccaa6f9c867..83a044dbd1d775dc9a3c88d39c2091c42b01ff36 100644 (file)
--- a/drivers/net/xilinx_emaclite.c
+++ b/drivers/net/xilinx_emaclite.c
@@ -134,18 +134,15 @@ static void xemaclite_enable_interrupts(struct net_local *drvdata)
         }
  
         /* Enable the Rx interrupts for the first buffer */
-       reg_data = in_be32(drvdata->base_addr + XEL_RSR_OFFSET);
         out_be32(drvdata->base_addr + XEL_RSR_OFFSET,
-                reg_data | XEL_RSR_RECV_IE_MASK);
+                XEL_RSR_RECV_IE_MASK);
  
         /* Enable the Rx interrupts for the second Buffer if
          * configured in HW */
         if (drvdata->rx_ping_pong != 0) {
-               reg_data = in_be32(drvdata->base_addr + XEL_BUFFER_OFFSET +
-                                  XEL_RSR_OFFSET);
                 out_be32(drvdata->base_addr + XEL_BUFFER_OFFSET +
                          XEL_RSR_OFFSET,
-                        reg_data | XEL_RSR_RECV_IE_MASK);
+                        XEL_RSR_RECV_IE_MASK);
         }
  
         /* Enable the Global Interrupt Enable */
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c

index 8574622e36a51abec01aaa05d9f509b00b8ad9f0..c9e2ae90f19508db8f74f91440636689d50321d8 100644 (file)
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -154,9 +154,8 @@ int sync_start(void)
  {
         int err;
  
-       if (!alloc_cpumask_var(&marked_cpus, GFP_KERNEL))
+       if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL))
                 return -ENOMEM;
-       cpumask_clear(marked_cpus);
  
         start_cpu_work();
  
diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c

index 554e11f9e1cea9705ab4b54fa43ea9c60f33d558..8eefe56f1cbe8d470b580f2f5ca1dfe2f97027e2 100644 (file)
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -31,7 +31,7 @@
  #define PARPORT_MIN_SPINTIME_VALUE 1
  #define PARPORT_MAX_SPINTIME_VALUE 1000
  
-static int do_active_device(ctl_table *table, int write, struct file *filp,
+static int do_active_device(ctl_table *table, int write,
                       void __user *result, size_t *lenp, loff_t *ppos)
  {
         struct parport *port = (struct parport *)table->extra1;
@@ -68,7 +68,7 @@ static int do_active_device(ctl_table *table, int write, struct file *filp,
  }
  
  #ifdef CONFIG_PARPORT_1284
-static int do_autoprobe(ctl_table *table, int write, struct file *filp,
+static int do_autoprobe(ctl_table *table, int write,
                         void __user *result, size_t *lenp, loff_t *ppos)
  {
         struct parport_device_info *info = table->extra2;
@@ -111,7 +111,7 @@ static int do_autoprobe(ctl_table *table, int write, struct file *filp,
  #endif /* IEEE1284.3 support. */
  
  static int do_hardware_base_addr (ctl_table *table, int write,
-                                 struct file *filp, void __user *result,
+                                 void __user *result,
                                   size_t *lenp, loff_t *ppos)
  {
         struct parport *port = (struct parport *)table->extra1;
@@ -139,7 +139,7 @@ static int do_hardware_base_addr (ctl_table *table, int write,
  }
  
  static int do_hardware_irq (ctl_table *table, int write,
-                           struct file *filp, void __user *result,
+                           void __user *result,
                             size_t *lenp, loff_t *ppos)
  {
         struct parport *port = (struct parport *)table->extra1;
@@ -167,7 +167,7 @@ static int do_hardware_irq (ctl_table *table, int write,
  }
  
  static int do_hardware_dma (ctl_table *table, int write,
-                           struct file *filp, void __user *result,
+                           void __user *result,
                             size_t *lenp, loff_t *ppos)
  {
         struct parport *port = (struct parport *)table->extra1;
@@ -195,7 +195,7 @@ static int do_hardware_dma (ctl_table *table, int write,
  }
  
  static int do_hardware_modes (ctl_table *table, int write,
-                             struct file *filp, void __user *result,
+                             void __user *result,
                               size_t *lenp, loff_t *ppos)
  {
         struct parport *port = (struct parport *)table->extra1;
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h

index 36faa9a8e18fe15ca6aff9242fc77349694fbae5..3070f77eb56ad3c4e53bf4369fcad7e40993ed3b 100644 (file)
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -72,15 +72,9 @@ do {                                                                 \
  
  #define SLOT_NAME_SIZE 10
  struct slot {
-       u8 bus;
-       u8 device;
         u8 state;
-       u8 hp_slot;
-       u32 number;
         struct controller *ctrl;
-       struct hpc_ops *hpc_ops;
         struct hotplug_slot *hotplug_slot;
-       struct list_head        slot_list;
         struct delayed_work work;       /* work for button event */
         struct mutex lock;
  };
@@ -92,18 +86,10 @@ struct event_info {
  };
  
  struct controller {
-       struct mutex crit_sect;         /* critical section mutex */
         struct mutex ctrl_lock;         /* controller lock */
-       int num_slots;                  /* Number of slots on ctlr */
-       int slot_num_inc;               /* 1 or -1 */
-       struct pci_dev *pci_dev;
         struct pcie_device *pcie;       /* PCI Express port service */
-       struct list_head slot_list;
-       struct hpc_ops *hpc_ops;
+       struct slot *slot;
         wait_queue_head_t queue;        /* sleep & wake process */
-       u8 slot_device_offset;
-       u32 first_slot;         /* First physical slot number */  /* PCIE only has 1 slot */
-       u8 slot_bus;            /* Bus where the slots handled by this controller sit */
         u32 slot_cap;
         u8 cap_base;
         struct timer_list poll_timer;
@@ -131,40 +117,20 @@ struct controller {
  #define POWERON_STATE                  3
  #define POWEROFF_STATE                 4
  
-/* Error messages */
-#define INTERLOCK_OPEN                 0x00000002
-#define ADD_NOT_SUPPORTED              0x00000003
-#define CARD_FUNCTIONING               0x00000005
-#define ADAPTER_NOT_SAME               0x00000006
-#define NO_ADAPTER_PRESENT             0x00000009
-#define NOT_ENOUGH_RESOURCES           0x0000000B
-#define DEVICE_TYPE_NOT_SUPPORTED      0x0000000C
-#define WRONG_BUS_FREQUENCY            0x0000000D
-#define POWER_FAILURE                  0x0000000E
-
-/* Field definitions in Slot Capabilities Register */
-#define ATTN_BUTTN_PRSN        0x00000001
-#define        PWR_CTRL_PRSN   0x00000002
-#define MRL_SENS_PRSN  0x00000004
-#define ATTN_LED_PRSN  0x00000008
-#define PWR_LED_PRSN   0x00000010
-#define HP_SUPR_RM_SUP 0x00000020
-#define EMI_PRSN       0x00020000
-#define NO_CMD_CMPL_SUP        0x00040000
-
-#define ATTN_BUTTN(ctrl)       ((ctrl)->slot_cap & ATTN_BUTTN_PRSN)
-#define POWER_CTRL(ctrl)       ((ctrl)->slot_cap & PWR_CTRL_PRSN)
-#define MRL_SENS(ctrl)         ((ctrl)->slot_cap & MRL_SENS_PRSN)
-#define ATTN_LED(ctrl)         ((ctrl)->slot_cap & ATTN_LED_PRSN)
-#define PWR_LED(ctrl)          ((ctrl)->slot_cap & PWR_LED_PRSN)
-#define HP_SUPR_RM(ctrl)       ((ctrl)->slot_cap & HP_SUPR_RM_SUP)
-#define EMI(ctrl)              ((ctrl)->slot_cap & EMI_PRSN)
-#define NO_CMD_CMPL(ctrl)      ((ctrl)->slot_cap & NO_CMD_CMPL_SUP)
+#define ATTN_BUTTN(ctrl)       ((ctrl)->slot_cap & PCI_EXP_SLTCAP_ABP)
+#define POWER_CTRL(ctrl)       ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PCP)
+#define MRL_SENS(ctrl)         ((ctrl)->slot_cap & PCI_EXP_SLTCAP_MRLSP)
+#define ATTN_LED(ctrl)         ((ctrl)->slot_cap & PCI_EXP_SLTCAP_AIP)
+#define PWR_LED(ctrl)          ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PIP)
+#define HP_SUPR_RM(ctrl)       ((ctrl)->slot_cap & PCI_EXP_SLTCAP_HPS)
+#define EMI(ctrl)              ((ctrl)->slot_cap & PCI_EXP_SLTCAP_EIP)
+#define NO_CMD_CMPL(ctrl)      ((ctrl)->slot_cap & PCI_EXP_SLTCAP_NCCS)
+#define PSN(ctrl)              ((ctrl)->slot_cap >> 19)
  
  extern int pciehp_sysfs_enable_slot(struct slot *slot);
  extern int pciehp_sysfs_disable_slot(struct slot *slot);
  extern u8 pciehp_handle_attention_button(struct slot *p_slot);
-  extern u8 pciehp_handle_switch_change(struct slot *p_slot);
+extern u8 pciehp_handle_switch_change(struct slot *p_slot);
  extern u8 pciehp_handle_presence_change(struct slot *p_slot);
  extern u8 pciehp_handle_power_fault(struct slot *p_slot);
  extern int pciehp_configure_device(struct slot *p_slot);
@@ -175,45 +141,30 @@ int pcie_init_notification(struct controller *ctrl);
  int pciehp_enable_slot(struct slot *p_slot);
  int pciehp_disable_slot(struct slot *p_slot);
  int pcie_enable_notification(struct controller *ctrl);
+int pciehp_power_on_slot(struct slot *slot);
+int pciehp_power_off_slot(struct slot *slot);
+int pciehp_get_power_status(struct slot *slot, u8 *status);
+int pciehp_get_attention_status(struct slot *slot, u8 *status);
+
+int pciehp_set_attention_status(struct slot *slot, u8 status);
+int pciehp_get_latch_status(struct slot *slot, u8 *status);
+int pciehp_get_adapter_status(struct slot *slot, u8 *status);
+int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *speed);
+int pciehp_get_max_link_width(struct slot *slot, enum pcie_link_width *val);
+int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *speed);
+int pciehp_get_cur_link_width(struct slot *slot, enum pcie_link_width *val);
+int pciehp_query_power_fault(struct slot *slot);
+void pciehp_green_led_on(struct slot *slot);
+void pciehp_green_led_off(struct slot *slot);
+void pciehp_green_led_blink(struct slot *slot);
+int pciehp_check_link_status(struct controller *ctrl);
+void pciehp_release_ctrl(struct controller *ctrl);
  
  static inline const char *slot_name(struct slot *slot)
  {
         return hotplug_slot_name(slot->hotplug_slot);
  }
  
-static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device)
-{
-       struct slot *slot;
-
-       list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
-               if (slot->device == device)
-                       return slot;
-       }
-
-       ctrl_err(ctrl, "Slot (device=0x%02x) not found\n", device);
-       return NULL;
-}
-
-struct hpc_ops {
-       int (*power_on_slot)(struct slot *slot);
-       int (*power_off_slot)(struct slot *slot);
-       int (*get_power_status)(struct slot *slot, u8 *status);
-       int (*get_attention_status)(struct slot *slot, u8 *status);
-       int (*set_attention_status)(struct slot *slot, u8 status);
-       int (*get_latch_status)(struct slot *slot, u8 *status);
-       int (*get_adapter_status)(struct slot *slot, u8 *status);
-       int (*get_max_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
-       int (*get_cur_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
-       int (*get_max_lnk_width)(struct slot *slot, enum pcie_link_width *val);
-       int (*get_cur_lnk_width)(struct slot *slot, enum pcie_link_width *val);
-       int (*query_power_fault)(struct slot *slot);
-       void (*green_led_on)(struct slot *slot);
-       void (*green_led_off)(struct slot *slot);
-       void (*green_led_blink)(struct slot *slot);
-       void (*release_ctlr)(struct controller *ctrl);
-       int (*check_lnk_status)(struct controller *ctrl);
-};
-
  #ifdef CONFIG_ACPI
  #include <acpi/acpi.h>
  #include <acpi/acpi_bus.h>
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c

index 7163e6a6cfaedaa199cd5813fcf73365c5ce6a00..37c8d3d0323e60602d6a6b4296e5e6668139c696 100644 (file)
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -33,6 +33,11 @@
  #define PCIEHP_DETECT_AUTO     (2)
  #define PCIEHP_DETECT_DEFAULT  PCIEHP_DETECT_AUTO
  
+struct dummy_slot {
+       u32 number;
+       struct list_head list;
+};
+
  static int slot_detection_mode;
  static char *pciehp_detect_mode;
  module_param(pciehp_detect_mode, charp, 0444);
@@ -77,7 +82,7 @@ static int __init dummy_probe(struct pcie_device *dev)
         int pos;
         u32 slot_cap;
         acpi_handle handle;
-       struct slot *slot, *tmp;
+       struct dummy_slot *slot, *tmp;
         struct pci_dev *pdev = dev->port;
         /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
         if (pciehp_get_hp_hw_control_from_firmware(pdev))
@@ -89,11 +94,11 @@ static int __init dummy_probe(struct pcie_device *dev)
         if (!slot)
                 return -ENOMEM;
         slot->number = slot_cap >> 19;
-       list_for_each_entry(tmp, &dummy_slots, slot_list) {
+       list_for_each_entry(tmp, &dummy_slots, list) {
                 if (tmp->number == slot->number)
                         dup_slot_id++;
         }
-       list_add_tail(&slot->slot_list, &dummy_slots);
+       list_add_tail(&slot->list, &dummy_slots);
         handle = DEVICE_ACPI_HANDLE(&pdev->dev);
         if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle))
                 acpi_slot_detected = 1;
@@ -109,11 +114,11 @@ static struct pcie_port_service_driver __initdata dummy_driver = {
  
  static int __init select_detection_mode(void)
  {
-       struct slot *slot, *tmp;
+       struct dummy_slot *slot, *tmp;
         pcie_port_service_register(&dummy_driver);
         pcie_port_service_unregister(&dummy_driver);
-       list_for_each_entry_safe(slot, tmp, &dummy_slots, slot_list) {
-               list_del(&slot->slot_list);
+       list_for_each_entry_safe(slot, tmp, &dummy_slots, list) {
+               list_del(&slot->list);
                 kfree(slot);
         }
         if (acpi_slot_detected && dup_slot_id)
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c

index 2317557fdee637df956ebf957eaf0d09055f46e9..bc234719b1df7208f8f12003fe3f99b8997cb0ee 100644 (file)
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -99,65 +99,55 @@ static void release_slot(struct hotplug_slot *hotplug_slot)
         kfree(hotplug_slot);
  }
  
-static int init_slots(struct controller *ctrl)
+static int init_slot(struct controller *ctrl)
  {
-       struct slot *slot;
-       struct hotplug_slot *hotplug_slot;
-       struct hotplug_slot_info *info;
+       struct slot *slot = ctrl->slot;
+       struct hotplug_slot *hotplug = NULL;
+       struct hotplug_slot_info *info = NULL;
         char name[SLOT_NAME_SIZE];
         int retval = -ENOMEM;
  
-       list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
-               hotplug_slot = kzalloc(sizeof(*hotplug_slot), GFP_KERNEL);
-               if (!hotplug_slot)
-                       goto error;
-
-               info = kzalloc(sizeof(*info), GFP_KERNEL);
-               if (!info)
-                       goto error_hpslot;
-
-               /* register this slot with the hotplug pci core */
-               hotplug_slot->info = info;
-               hotplug_slot->private = slot;
-               hotplug_slot->release = &release_slot;
-               hotplug_slot->ops = &pciehp_hotplug_slot_ops;
-               slot->hotplug_slot = hotplug_slot;
-               snprintf(name, SLOT_NAME_SIZE, "%u", slot->number);
-
-               ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x "
-                        "hp_slot=%x sun=%x slot_device_offset=%x\n",
-                        pci_domain_nr(ctrl->pci_dev->subordinate),
-                        slot->bus, slot->device, slot->hp_slot, slot->number,
-                        ctrl->slot_device_offset);
-               retval = pci_hp_register(hotplug_slot,
-                                        ctrl->pci_dev->subordinate,
-                                        slot->device,
-                                        name);
-               if (retval) {
-                       ctrl_err(ctrl, "pci_hp_register failed with error %d\n",
-                                retval);
-                       goto error_info;
-               }
-               get_power_status(hotplug_slot, &info->power_status);
-               get_attention_status(hotplug_slot, &info->attention_status);
-               get_latch_status(hotplug_slot, &info->latch_status);
-               get_adapter_status(hotplug_slot, &info->adapter_status);
+       hotplug = kzalloc(sizeof(*hotplug), GFP_KERNEL);
+       if (!hotplug)
+               goto out;
+
+       info = kzalloc(sizeof(*info), GFP_KERNEL);
+       if (!info)
+               goto out;
+
+       /* register this slot with the hotplug pci core */
+       hotplug->info = info;
+       hotplug->private = slot;
+       hotplug->release = &release_slot;
+       hotplug->ops = &pciehp_hotplug_slot_ops;
+       slot->hotplug_slot = hotplug;
+       snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl));
+
+       ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:00 sun=%x\n",
+                pci_domain_nr(ctrl->pcie->port->subordinate),
+                ctrl->pcie->port->subordinate->number, PSN(ctrl));
+       retval = pci_hp_register(hotplug,
+                                ctrl->pcie->port->subordinate, 0, name);
+       if (retval) {
+               ctrl_err(ctrl,
+                        "pci_hp_register failed with error %d\n", retval);
+               goto out;
+       }
+       get_power_status(hotplug, &info->power_status);
+       get_attention_status(hotplug, &info->attention_status);
+       get_latch_status(hotplug, &info->latch_status);
+       get_adapter_status(hotplug, &info->adapter_status);
+out:
+       if (retval) {
+               kfree(info);
+               kfree(hotplug);
         }
-
-       return 0;
-error_info:
-       kfree(info);
-error_hpslot:
-       kfree(hotplug_slot);
-error:
         return retval;
  }
  
-static void cleanup_slots(struct controller *ctrl)
+static void cleanup_slot(struct controller *ctrl)
  {
-       struct slot *slot;
-       list_for_each_entry(slot, &ctrl->slot_list, slot_list)
-               pci_hp_deregister(slot->hotplug_slot);
+       pci_hp_deregister(ctrl->slot->hotplug_slot);
  }
  
  /*
@@ -173,7 +163,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
         hotplug_slot->info->attention_status = status;
  
         if (ATTN_LED(slot->ctrl))
-               slot->hpc_ops->set_attention_status(slot, status);
+               pciehp_set_attention_status(slot, status);
  
         return 0;
  }
@@ -208,7 +198,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
         ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
                   __func__, slot_name(slot));
  
-       retval = slot->hpc_ops->get_power_status(slot, value);
+       retval = pciehp_get_power_status(slot, value);
         if (retval < 0)
                 *value = hotplug_slot->info->power_status;
  
@@ -223,7 +213,7 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
         ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
                   __func__, slot_name(slot));
  
-       retval = slot->hpc_ops->get_attention_status(slot, value);
+       retval = pciehp_get_attention_status(slot, value);
         if (retval < 0)
                 *value = hotplug_slot->info->attention_status;
  
@@ -238,7 +228,7 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
         ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
                  __func__, slot_name(slot));
  
-       retval = slot->hpc_ops->get_latch_status(slot, value);
+       retval = pciehp_get_latch_status(slot, value);
         if (retval < 0)
                 *value = hotplug_slot->info->latch_status;
  
@@ -253,7 +243,7 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
         ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
                  __func__, slot_name(slot));
  
-       retval = slot->hpc_ops->get_adapter_status(slot, value);
+       retval = pciehp_get_adapter_status(slot, value);
         if (retval < 0)
                 *value = hotplug_slot->info->adapter_status;
  
@@ -269,7 +259,7 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
         ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
                  __func__, slot_name(slot));
  
-       retval = slot->hpc_ops->get_max_bus_speed(slot, value);
+       retval = pciehp_get_max_link_speed(slot, value);
         if (retval < 0)
                 *value = PCI_SPEED_UNKNOWN;
  
@@ -284,7 +274,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
         ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
                  __func__, slot_name(slot));
  
-       retval = slot->hpc_ops->get_cur_bus_speed(slot, value);
+       retval = pciehp_get_cur_link_speed(slot, value);
         if (retval < 0)
                 *value = PCI_SPEED_UNKNOWN;
  
@@ -295,7 +285,7 @@ static int pciehp_probe(struct pcie_device *dev)
  {
         int rc;
         struct controller *ctrl;
-       struct slot *t_slot;
+       struct slot *slot;
         u8 value;
         struct pci_dev *pdev = dev->port;
  
@@ -314,7 +304,7 @@ static int pciehp_probe(struct pcie_device *dev)
         set_service_data(dev, ctrl);
  
         /* Setup the slot information structures */
-       rc = init_slots(ctrl);
+       rc = init_slot(ctrl);
         if (rc) {
                 if (rc == -EBUSY)
                         ctrl_warn(ctrl, "Slot already registered by another "
@@ -332,15 +322,15 @@ static int pciehp_probe(struct pcie_device *dev)
         }
  
         /* Check if slot is occupied */
-       t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
-       t_slot->hpc_ops->get_adapter_status(t_slot, &value);
+       slot = ctrl->slot;
+       pciehp_get_adapter_status(slot, &value);
         if (value) {
                 if (pciehp_force)
-                       pciehp_enable_slot(t_slot);
+                       pciehp_enable_slot(slot);
         } else {
                 /* Power off slot if not occupied */
                 if (POWER_CTRL(ctrl)) {
-                       rc = t_slot->hpc_ops->power_off_slot(t_slot);
+                       rc = pciehp_power_off_slot(slot);
                         if (rc)
                                 goto err_out_free_ctrl_slot;
                 }
@@ -349,19 +339,19 @@ static int pciehp_probe(struct pcie_device *dev)
         return 0;
  
  err_out_free_ctrl_slot:
-       cleanup_slots(ctrl);
+       cleanup_slot(ctrl);
  err_out_release_ctlr:
-       ctrl->hpc_ops->release_ctlr(ctrl);
+       pciehp_release_ctrl(ctrl);
  err_out_none:
         return -ENODEV;
  }
  
-static void pciehp_remove (struct pcie_device *dev)
+static void pciehp_remove(struct pcie_device *dev)
  {
         struct controller *ctrl = get_service_data(dev);
  
-       cleanup_slots(ctrl);
-       ctrl->hpc_ops->release_ctlr(ctrl);
+       cleanup_slot(ctrl);
+       pciehp_release_ctrl(ctrl);
  }
  
  #ifdef CONFIG_PM
@@ -376,20 +366,20 @@ static int pciehp_resume (struct pcie_device *dev)
         dev_info(&dev->device, "%s ENTRY\n", __func__);
         if (pciehp_force) {
                 struct controller *ctrl = get_service_data(dev);
-               struct slot *t_slot;
+               struct slot *slot;
                 u8 status;
  
                 /* reinitialize the chipset's event detection logic */
                 pcie_enable_notification(ctrl);
  
-               t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
+               slot = ctrl->slot;
  
                 /* Check if slot is occupied */
-               t_slot->hpc_ops->get_adapter_status(t_slot, &status);
+               pciehp_get_adapter_status(slot, &status);
                 if (status)
-                       pciehp_enable_slot(t_slot);
+                       pciehp_enable_slot(slot);
                 else
-                       pciehp_disable_slot(t_slot);
+                       pciehp_disable_slot(slot);
         }
         return 0;
  }
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c

index b97cb4c3e0fe13258de0ce801aff6bf81c512c6a..84487d126e4d7098c4010425b3dcfebe6a6122cf 100644 (file)
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -82,7 +82,7 @@ u8 pciehp_handle_switch_change(struct slot *p_slot)
         /* Switch Change */
         ctrl_dbg(ctrl, "Switch interrupt received\n");
  
-       p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+       pciehp_get_latch_status(p_slot, &getstatus);
         if (getstatus) {
                 /*
                  * Switch opened
@@ -114,7 +114,7 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
         /* Switch is open, assume a presence change
          * Save the presence state
          */
-       p_slot->hpc_ops->get_adapter_status(p_slot, &presence_save);
+       pciehp_get_adapter_status(p_slot, &presence_save);
         if (presence_save) {
                 /*
                  * Card Present
@@ -143,7 +143,7 @@ u8 pciehp_handle_power_fault(struct slot *p_slot)
         /* power fault */
         ctrl_dbg(ctrl, "Power fault interrupt received\n");
  
-       if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) {
+       if (!pciehp_query_power_fault(p_slot)) {
                 /*
                  * power fault Cleared
                  */
@@ -172,7 +172,7 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
  {
         /* turn off slot, turn on Amber LED, turn off Green LED if supported*/
         if (POWER_CTRL(ctrl)) {
-               if (pslot->hpc_ops->power_off_slot(pslot)) {
+               if (pciehp_power_off_slot(pslot)) {
                         ctrl_err(ctrl,
                                  "Issue of Slot Power Off command failed\n");
                         return;
@@ -186,10 +186,10 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
         }
  
         if (PWR_LED(ctrl))
-               pslot->hpc_ops->green_led_off(pslot);
+               pciehp_green_led_off(pslot);
  
         if (ATTN_LED(ctrl)) {
-               if (pslot->hpc_ops->set_attention_status(pslot, 1)) {
+               if (pciehp_set_attention_status(pslot, 1)) {
                         ctrl_err(ctrl,
                                  "Issue of Set Attention Led command failed\n");
                         return;
@@ -208,24 +208,20 @@ static int board_added(struct slot *p_slot)
  {
         int retval = 0;
         struct controller *ctrl = p_slot->ctrl;
-       struct pci_bus *parent = ctrl->pci_dev->subordinate;
-
-       ctrl_dbg(ctrl, "%s: slot device, slot offset, hp slot = %d, %d, %d\n",
-                __func__, p_slot->device, ctrl->slot_device_offset,
-                p_slot->hp_slot);
+       struct pci_bus *parent = ctrl->pcie->port->subordinate;
  
         if (POWER_CTRL(ctrl)) {
                 /* Power on slot */
-               retval = p_slot->hpc_ops->power_on_slot(p_slot);
+               retval = pciehp_power_on_slot(p_slot);
                 if (retval)
                         return retval;
         }
  
         if (PWR_LED(ctrl))
-               p_slot->hpc_ops->green_led_blink(p_slot);
+               pciehp_green_led_blink(p_slot);
  
         /* Check link training status */
-       retval = p_slot->hpc_ops->check_lnk_status(ctrl);
+       retval = pciehp_check_link_status(ctrl);
         if (retval) {
                 ctrl_err(ctrl, "Failed to check link status\n");
                 set_slot_off(ctrl, p_slot);
@@ -233,21 +229,21 @@ static int board_added(struct slot *p_slot)
         }
  
         /* Check for a power fault */
-       if (p_slot->hpc_ops->query_power_fault(p_slot)) {
+       if (pciehp_query_power_fault(p_slot)) {
                 ctrl_dbg(ctrl, "Power fault detected\n");
-               retval = POWER_FAILURE;
+               retval = -EIO;
                 goto err_exit;
         }
  
         retval = pciehp_configure_device(p_slot);
         if (retval) {
-               ctrl_err(ctrl, "Cannot add device at %04x:%02x:%02x\n",
-                        pci_domain_nr(parent), p_slot->bus, p_slot->device);
+               ctrl_err(ctrl, "Cannot add device at %04x:%02x:00\n",
+                        pci_domain_nr(parent), parent->number);
                 goto err_exit;
         }
  
         if (PWR_LED(ctrl))
-               p_slot->hpc_ops->green_led_on(p_slot);
+               pciehp_green_led_on(p_slot);
  
         return 0;
  
@@ -269,11 +265,9 @@ static int remove_board(struct slot *p_slot)
         if (retval)
                 return retval;
  
-       ctrl_dbg(ctrl, "%s: hp_slot = %d\n", __func__, p_slot->hp_slot);
-
         if (POWER_CTRL(ctrl)) {
                 /* power off slot */
-               retval = p_slot->hpc_ops->power_off_slot(p_slot);
+               retval = pciehp_power_off_slot(p_slot);
                 if (retval) {
                         ctrl_err(ctrl,
                                  "Issue of Slot Disable command failed\n");
@@ -287,9 +281,9 @@ static int remove_board(struct slot *p_slot)
                 msleep(1000);
         }
  
+       /* turn off Green LED */
         if (PWR_LED(ctrl))
-               /* turn off Green LED */
-               p_slot->hpc_ops->green_led_off(p_slot);
+               pciehp_green_led_off(p_slot);
  
         return 0;
  }
@@ -317,18 +311,17 @@ static void pciehp_power_thread(struct work_struct *work)
         case POWEROFF_STATE:
                 mutex_unlock(&p_slot->lock);
                 ctrl_dbg(p_slot->ctrl,
-                        "Disabling domain:bus:device=%04x:%02x:%02x\n",
-                        pci_domain_nr(p_slot->ctrl->pci_dev->subordinate),
-                        p_slot->bus, p_slot->device);
+                        "Disabling domain:bus:device=%04x:%02x:00\n",
+                        pci_domain_nr(p_slot->ctrl->pcie->port->subordinate),
+                        p_slot->ctrl->pcie->port->subordinate->number);
                 pciehp_disable_slot(p_slot);
                 mutex_lock(&p_slot->lock);
                 p_slot->state = STATIC_STATE;
                 break;
         case POWERON_STATE:
                 mutex_unlock(&p_slot->lock);
-               if (pciehp_enable_slot(p_slot) &&
-                   PWR_LED(p_slot->ctrl))
-                       p_slot->hpc_ops->green_led_off(p_slot);
+               if (pciehp_enable_slot(p_slot) && PWR_LED(p_slot->ctrl))
+                       pciehp_green_led_off(p_slot);
                 mutex_lock(&p_slot->lock);
                 p_slot->state = STATIC_STATE;
                 break;
@@ -379,10 +372,10 @@ static int update_slot_info(struct slot *slot)
         if (!info)
                 return -ENOMEM;
  
-       slot->hpc_ops->get_power_status(slot, &(info->power_status));
-       slot->hpc_ops->get_attention_status(slot, &(info->attention_status));
-       slot->hpc_ops->get_latch_status(slot, &(info->latch_status));
-       slot->hpc_ops->get_adapter_status(slot, &(info->adapter_status));
+       pciehp_get_power_status(slot, &info->power_status);
+       pciehp_get_attention_status(slot, &info->attention_status);
+       pciehp_get_latch_status(slot, &info->latch_status);
+       pciehp_get_adapter_status(slot, &info->adapter_status);
  
         result = pci_hp_change_slot_info(slot->hotplug_slot, info);
         kfree (info);
@@ -399,7 +392,7 @@ static void handle_button_press_event(struct slot *p_slot)
  
         switch (p_slot->state) {
         case STATIC_STATE:
-               p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+               pciehp_get_power_status(p_slot, &getstatus);
                 if (getstatus) {
                         p_slot->state = BLINKINGOFF_STATE;
                         ctrl_info(ctrl,
@@ -413,9 +406,9 @@ static void handle_button_press_event(struct slot *p_slot)
                 }
                 /* blink green LED and turn off amber */
                 if (PWR_LED(ctrl))
-                       p_slot->hpc_ops->green_led_blink(p_slot);
+                       pciehp_green_led_blink(p_slot);
                 if (ATTN_LED(ctrl))
-                       p_slot->hpc_ops->set_attention_status(p_slot, 0);
+                       pciehp_set_attention_status(p_slot, 0);
  
                 schedule_delayed_work(&p_slot->work, 5*HZ);
                 break;
@@ -430,13 +423,13 @@ static void handle_button_press_event(struct slot *p_slot)
                 cancel_delayed_work(&p_slot->work);
                 if (p_slot->state == BLINKINGOFF_STATE) {
                         if (PWR_LED(ctrl))
-                               p_slot->hpc_ops->green_led_on(p_slot);
+                               pciehp_green_led_on(p_slot);
                 } else {
                         if (PWR_LED(ctrl))
-                               p_slot->hpc_ops->green_led_off(p_slot);
+                               pciehp_green_led_off(p_slot);
                 }
                 if (ATTN_LED(ctrl))
-                       p_slot->hpc_ops->set_attention_status(p_slot, 0);
+                       pciehp_set_attention_status(p_slot, 0);
                 ctrl_info(ctrl, "PCI slot #%s - action canceled "
                           "due to button press\n", slot_name(p_slot));
                 p_slot->state = STATIC_STATE;
@@ -474,7 +467,7 @@ static void handle_surprise_event(struct slot *p_slot)
         info->p_slot = p_slot;
         INIT_WORK(&info->work, pciehp_power_thread);
  
-       p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+       pciehp_get_adapter_status(p_slot, &getstatus);
         if (!getstatus)
                 p_slot->state = POWEROFF_STATE;
         else
@@ -498,9 +491,9 @@ static void interrupt_event_handler(struct work_struct *work)
                 if (!POWER_CTRL(ctrl))
                         break;
                 if (ATTN_LED(ctrl))
-                       p_slot->hpc_ops->set_attention_status(p_slot, 1);
+                       pciehp_set_attention_status(p_slot, 1);
                 if (PWR_LED(ctrl))
-                       p_slot->hpc_ops->green_led_off(p_slot);
+                       pciehp_green_led_off(p_slot);
                 break;
         case INT_PRESENCE_ON:
         case INT_PRESENCE_OFF:
@@ -525,45 +518,38 @@ int pciehp_enable_slot(struct slot *p_slot)
         int rc;
         struct controller *ctrl = p_slot->ctrl;
  
-       /* Check to see if (latch closed, card present, power off) */
-       mutex_lock(&p_slot->ctrl->crit_sect);
-
-       rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+       rc = pciehp_get_adapter_status(p_slot, &getstatus);
         if (rc || !getstatus) {
                 ctrl_info(ctrl, "No adapter on slot(%s)\n", slot_name(p_slot));
-               mutex_unlock(&p_slot->ctrl->crit_sect);
                 return -ENODEV;
         }
         if (MRL_SENS(p_slot->ctrl)) {
-               rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+               rc = pciehp_get_latch_status(p_slot, &getstatus);
                 if (rc || getstatus) {
                         ctrl_info(ctrl, "Latch open on slot(%s)\n",
                                   slot_name(p_slot));
-                       mutex_unlock(&p_slot->ctrl->crit_sect);
                         return -ENODEV;
                 }
         }
  
         if (POWER_CTRL(p_slot->ctrl)) {
-               rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+               rc = pciehp_get_power_status(p_slot, &getstatus);
                 if (rc || getstatus) {
                         ctrl_info(ctrl, "Already enabled on slot(%s)\n",
                                   slot_name(p_slot));
-                       mutex_unlock(&p_slot->ctrl->crit_sect);
                         return -EINVAL;
                 }
         }
  
-       p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+       pciehp_get_latch_status(p_slot, &getstatus);
  
         rc = board_added(p_slot);
         if (rc) {
-               p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+               pciehp_get_latch_status(p_slot, &getstatus);
         }
  
         update_slot_info(p_slot);
  
-       mutex_unlock(&p_slot->ctrl->crit_sect);
         return rc;
  }
  
@@ -577,35 +563,29 @@ int pciehp_disable_slot(struct slot *p_slot)
         if (!p_slot->ctrl)
                 return 1;
  
-       /* Check to see if (latch closed, card present, power on) */
-       mutex_lock(&p_slot->ctrl->crit_sect);
-
         if (!HP_SUPR_RM(p_slot->ctrl)) {
-               ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+               ret = pciehp_get_adapter_status(p_slot, &getstatus);
                 if (ret || !getstatus) {
                         ctrl_info(ctrl, "No adapter on slot(%s)\n",
                                   slot_name(p_slot));
-                       mutex_unlock(&p_slot->ctrl->crit_sect);
                         return -ENODEV;
                 }
         }
  
         if (MRL_SENS(p_slot->ctrl)) {
-               ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+               ret = pciehp_get_latch_status(p_slot, &getstatus);
                 if (ret || getstatus) {
                         ctrl_info(ctrl, "Latch open on slot(%s)\n",
                                   slot_name(p_slot));
-                       mutex_unlock(&p_slot->ctrl->crit_sect);
                         return -ENODEV;
                 }
         }
  
         if (POWER_CTRL(p_slot->ctrl)) {
-               ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+               ret = pciehp_get_power_status(p_slot, &getstatus);
                 if (ret || !getstatus) {
                         ctrl_info(ctrl, "Already disabled on slot(%s)\n",
                                   slot_name(p_slot));
-                       mutex_unlock(&p_slot->ctrl->crit_sect);
                         return -EINVAL;
                 }
         }
@@ -613,7 +593,6 @@ int pciehp_disable_slot(struct slot *p_slot)
         ret = remove_board(p_slot);
         update_slot_info(p_slot);
  
-       mutex_unlock(&p_slot->ctrl->crit_sect);
         return ret;
  }
  
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c

index 271f917b6f2c4a72a8352fc6be686cf616eadc8f..9ef4605c1ef6471fa6beddd55ccdb4465a2d7952 100644 (file)
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -44,25 +44,25 @@ static atomic_t pciehp_num_controllers = ATOMIC_INIT(0);
  
  static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value)
  {
-       struct pci_dev *dev = ctrl->pci_dev;
+       struct pci_dev *dev = ctrl->pcie->port;
         return pci_read_config_word(dev, ctrl->cap_base + reg, value);
  }
  
  static inline int pciehp_readl(struct controller *ctrl, int reg, u32 *value)
  {
-       struct pci_dev *dev = ctrl->pci_dev;
+       struct pci_dev *dev = ctrl->pcie->port;
         return pci_read_config_dword(dev, ctrl->cap_base + reg, value);
  }
  
  static inline int pciehp_writew(struct controller *ctrl, int reg, u16 value)
  {
-       struct pci_dev *dev = ctrl->pci_dev;
+       struct pci_dev *dev = ctrl->pcie->port;
         return pci_write_config_word(dev, ctrl->cap_base + reg, value);
  }
  
  static inline int pciehp_writel(struct controller *ctrl, int reg, u32 value)
  {
-       struct pci_dev *dev = ctrl->pci_dev;
+       struct pci_dev *dev = ctrl->pcie->port;
         return pci_write_config_dword(dev, ctrl->cap_base + reg, value);
  }
  
@@ -266,7 +266,7 @@ static void pcie_wait_link_active(struct controller *ctrl)
         ctrl_dbg(ctrl, "Data Link Layer Link Active not set in 1000 msec\n");
  }
  
-static int hpc_check_lnk_status(struct controller *ctrl)
+int pciehp_check_link_status(struct controller *ctrl)
  {
         u16 lnk_status;
         int retval = 0;
@@ -305,7 +305,7 @@ static int hpc_check_lnk_status(struct controller *ctrl)
         return retval;
  }
  
-static int hpc_get_attention_status(struct slot *slot, u8 *status)
+int pciehp_get_attention_status(struct slot *slot, u8 *status)
  {
         struct controller *ctrl = slot->ctrl;
         u16 slot_ctrl;
@@ -344,7 +344,7 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status)
         return 0;
  }
  
-static int hpc_get_power_status(struct slot *slot, u8 *status)
+int pciehp_get_power_status(struct slot *slot, u8 *status)
  {
         struct controller *ctrl = slot->ctrl;
         u16 slot_ctrl;
@@ -376,7 +376,7 @@ static int hpc_get_power_status(struct slot *slot, u8 *status)
         return retval;
  }
  
-static int hpc_get_latch_status(struct slot *slot, u8 *status)
+int pciehp_get_latch_status(struct slot *slot, u8 *status)
  {
         struct controller *ctrl = slot->ctrl;
         u16 slot_status;
@@ -392,7 +392,7 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status)
         return 0;
  }
  
-static int hpc_get_adapter_status(struct slot *slot, u8 *status)
+int pciehp_get_adapter_status(struct slot *slot, u8 *status)
  {
         struct controller *ctrl = slot->ctrl;
         u16 slot_status;
@@ -408,7 +408,7 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status)
         return 0;
  }
  
-static int hpc_query_power_fault(struct slot *slot)
+int pciehp_query_power_fault(struct slot *slot)
  {
         struct controller *ctrl = slot->ctrl;
         u16 slot_status;
@@ -422,7 +422,7 @@ static int hpc_query_power_fault(struct slot *slot)
         return !!(slot_status & PCI_EXP_SLTSTA_PFD);
  }
  
-static int hpc_set_attention_status(struct slot *slot, u8 value)
+int pciehp_set_attention_status(struct slot *slot, u8 value)
  {
         struct controller *ctrl = slot->ctrl;
         u16 slot_cmd;
@@ -450,7 +450,7 @@ static int hpc_set_attention_status(struct slot *slot, u8 value)
         return rc;
  }
  
-static void hpc_set_green_led_on(struct slot *slot)
+void pciehp_green_led_on(struct slot *slot)
  {
         struct controller *ctrl = slot->ctrl;
         u16 slot_cmd;
@@ -463,7 +463,7 @@ static void hpc_set_green_led_on(struct slot *slot)
                  __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
  }
  
-static void hpc_set_green_led_off(struct slot *slot)
+void pciehp_green_led_off(struct slot *slot)
  {
         struct controller *ctrl = slot->ctrl;
         u16 slot_cmd;
@@ -476,7 +476,7 @@ static void hpc_set_green_led_off(struct slot *slot)
                  __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
  }
  
-static void hpc_set_green_led_blink(struct slot *slot)
+void pciehp_green_led_blink(struct slot *slot)
  {
         struct controller *ctrl = slot->ctrl;
         u16 slot_cmd;
@@ -489,7 +489,7 @@ static void hpc_set_green_led_blink(struct slot *slot)
                  __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
  }
  
-static int hpc_power_on_slot(struct slot * slot)
+int pciehp_power_on_slot(struct slot * slot)
  {
         struct controller *ctrl = slot->ctrl;
         u16 slot_cmd;
@@ -497,8 +497,6 @@ static int hpc_power_on_slot(struct slot * slot)
         u16 slot_status;
         int retval = 0;
  
-       ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
-
         /* Clear sticky power-fault bit from previous power failures */
         retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
         if (retval) {
@@ -539,7 +537,7 @@ static int hpc_power_on_slot(struct slot * slot)
  
  static inline int pcie_mask_bad_dllp(struct controller *ctrl)
  {
-       struct pci_dev *dev = ctrl->pci_dev;
+       struct pci_dev *dev = ctrl->pcie->port;
         int pos;
         u32 reg;
  
@@ -556,7 +554,7 @@ static inline int pcie_mask_bad_dllp(struct controller *ctrl)
  
  static inline void pcie_unmask_bad_dllp(struct controller *ctrl)
  {
-       struct pci_dev *dev = ctrl->pci_dev;
+       struct pci_dev *dev = ctrl->pcie->port;
         u32 reg;
         int pos;
  
@@ -570,7 +568,7 @@ static inline void pcie_unmask_bad_dllp(struct controller *ctrl)
         pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg);
  }
  
-static int hpc_power_off_slot(struct slot * slot)
+int pciehp_power_off_slot(struct slot * slot)
  {
         struct controller *ctrl = slot->ctrl;
         u16 slot_cmd;
@@ -578,8 +576,6 @@ static int hpc_power_off_slot(struct slot * slot)
         int retval = 0;
         int changed;
  
-       ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
-
         /*
          * Set Bad DLLP Mask bit in Correctable Error Mask
          * Register. This is the workaround against Bad DLLP error
@@ -614,8 +610,8 @@ static int hpc_power_off_slot(struct slot * slot)
  static irqreturn_t pcie_isr(int irq, void *dev_id)
  {
         struct controller *ctrl = (struct controller *)dev_id;
+       struct slot *slot = ctrl->slot;
         u16 detected, intr_loc;
-       struct slot *p_slot;
  
         /*
          * In order to guarantee that all interrupt events are
@@ -656,29 +652,27 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
         if (!(intr_loc & ~PCI_EXP_SLTSTA_CC))
                 return IRQ_HANDLED;
  
-       p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
-
         /* Check MRL Sensor Changed */
         if (intr_loc & PCI_EXP_SLTSTA_MRLSC)
-               pciehp_handle_switch_change(p_slot);
+               pciehp_handle_switch_change(slot);
  
         /* Check Attention Button Pressed */
         if (intr_loc & PCI_EXP_SLTSTA_ABP)
-               pciehp_handle_attention_button(p_slot);
+               pciehp_handle_attention_button(slot);
  
         /* Check Presence Detect Changed */
         if (intr_loc & PCI_EXP_SLTSTA_PDC)
-               pciehp_handle_presence_change(p_slot);
+               pciehp_handle_presence_change(slot);
  
         /* Check Power Fault Detected */
         if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
                 ctrl->power_fault_detected = 1;
-               pciehp_handle_power_fault(p_slot);
+               pciehp_handle_power_fault(slot);
         }
         return IRQ_HANDLED;
  }
  
-static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
+int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *value)
  {
         struct controller *ctrl = slot->ctrl;
         enum pcie_link_speed lnk_speed;
@@ -709,7 +703,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
         return retval;
  }
  
-static int hpc_get_max_lnk_width(struct slot *slot,
+int pciehp_get_max_lnk_width(struct slot *slot,
                                  enum pcie_link_width *value)
  {
         struct controller *ctrl = slot->ctrl;
@@ -759,7 +753,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
         return retval;
  }
  
-static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
+int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *value)
  {
         struct controller *ctrl = slot->ctrl;
         enum pcie_link_speed lnk_speed = PCI_SPEED_UNKNOWN;
@@ -791,7 +785,7 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
         return retval;
  }
  
-static int hpc_get_cur_lnk_width(struct slot *slot,
+int pciehp_get_cur_lnk_width(struct slot *slot,
                                  enum pcie_link_width *value)
  {
         struct controller *ctrl = slot->ctrl;
@@ -842,30 +836,6 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
         return retval;
  }
  
-static void pcie_release_ctrl(struct controller *ctrl);
-static struct hpc_ops pciehp_hpc_ops = {
-       .power_on_slot                  = hpc_power_on_slot,
-       .power_off_slot                 = hpc_power_off_slot,
-       .set_attention_status           = hpc_set_attention_status,
-       .get_power_status               = hpc_get_power_status,
-       .get_attention_status           = hpc_get_attention_status,
-       .get_latch_status               = hpc_get_latch_status,
-       .get_adapter_status             = hpc_get_adapter_status,
-
-       .get_max_bus_speed              = hpc_get_max_lnk_speed,
-       .get_cur_bus_speed              = hpc_get_cur_lnk_speed,
-       .get_max_lnk_width              = hpc_get_max_lnk_width,
-       .get_cur_lnk_width              = hpc_get_cur_lnk_width,
-
-       .query_power_fault              = hpc_query_power_fault,
-       .green_led_on                   = hpc_set_green_led_on,
-       .green_led_off                  = hpc_set_green_led_off,
-       .green_led_blink                = hpc_set_green_led_blink,
-
-       .release_ctlr                   = pcie_release_ctrl,
-       .check_lnk_status               = hpc_check_lnk_status,
-};
-
  int pcie_enable_notification(struct controller *ctrl)
  {
         u16 cmd, mask;
@@ -930,23 +900,16 @@ static int pcie_init_slot(struct controller *ctrl)
         if (!slot)
                 return -ENOMEM;
  
-       slot->hp_slot = 0;
         slot->ctrl = ctrl;
-       slot->bus = ctrl->pci_dev->subordinate->number;
-       slot->device = ctrl->slot_device_offset + slot->hp_slot;
-       slot->hpc_ops = ctrl->hpc_ops;
-       slot->number = ctrl->first_slot;
         mutex_init(&slot->lock);
         INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work);
-       list_add(&slot->slot_list, &ctrl->slot_list);
+       ctrl->slot = slot;
         return 0;
  }
  
  static void pcie_cleanup_slot(struct controller *ctrl)
  {
-       struct slot *slot;
-       slot = list_first_entry(&ctrl->slot_list, struct slot, slot_list);
-       list_del(&slot->slot_list);
+       struct slot *slot = ctrl->slot;
         cancel_delayed_work(&slot->work);
         flush_scheduled_work();
         flush_workqueue(pciehp_wq);
@@ -957,7 +920,7 @@ static inline void dbg_ctrl(struct controller *ctrl)
  {
         int i;
         u16 reg16;
-       struct pci_dev *pdev = ctrl->pci_dev;
+       struct pci_dev *pdev = ctrl->pcie->port;
  
         if (!pciehp_debug)
                 return;
@@ -980,7 +943,7 @@ static inline void dbg_ctrl(struct controller *ctrl)
                           (unsigned long long)pci_resource_start(pdev, i));
         }
         ctrl_info(ctrl, "Slot Capabilities      : 0x%08x\n", ctrl->slot_cap);
-       ctrl_info(ctrl, "  Physical Slot Number : %d\n", ctrl->first_slot);
+       ctrl_info(ctrl, "  Physical Slot Number : %d\n", PSN(ctrl));
         ctrl_info(ctrl, "  Attention Button     : %3s\n",
                   ATTN_BUTTN(ctrl) ? "yes" : "no");
         ctrl_info(ctrl, "  Power Controller     : %3s\n",
@@ -1014,10 +977,7 @@ struct controller *pcie_init(struct pcie_device *dev)
                 dev_err(&dev->device, "%s: Out of memory\n", __func__);
                 goto abort;
         }
-       INIT_LIST_HEAD(&ctrl->slot_list);
-
         ctrl->pcie = dev;
-       ctrl->pci_dev = pdev;
         ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
         if (!ctrl->cap_base) {
                 ctrl_err(ctrl, "Cannot find PCI Express capability\n");
@@ -1029,11 +989,6 @@ struct controller *pcie_init(struct pcie_device *dev)
         }
  
         ctrl->slot_cap = slot_cap;
-       ctrl->first_slot = slot_cap >> 19;
-       ctrl->slot_device_offset = 0;
-       ctrl->num_slots = 1;
-       ctrl->hpc_ops = &pciehp_hpc_ops;
-       mutex_init(&ctrl->crit_sect);
         mutex_init(&ctrl->ctrl_lock);
         init_waitqueue_head(&ctrl->queue);
         dbg_ctrl(ctrl);
@@ -1089,7 +1044,7 @@ abort:
         return NULL;
  }
  
-void pcie_release_ctrl(struct controller *ctrl)
+void pciehp_release_ctrl(struct controller *ctrl)
  {
         pcie_shutdown_notification(ctrl);
         pcie_cleanup_slot(ctrl);
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c

index 02e24d63b3eec78fcdb67c1274d64c6fda953640..21733108addefe3aefc208dd646ae91340903883 100644 (file)
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -63,27 +63,27 @@ static int __ref pciehp_add_bridge(struct pci_dev *dev)
  int pciehp_configure_device(struct slot *p_slot)
  {
         struct pci_dev *dev;
-       struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
+       struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
         int num, fn;
         struct controller *ctrl = p_slot->ctrl;
  
-       dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0));
+       dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
         if (dev) {
                 ctrl_err(ctrl, "Device %s already exists "
-                        "at %04x:%02x:%02x, cannot hot-add\n", pci_name(dev),
-                        pci_domain_nr(parent), p_slot->bus, p_slot->device);
+                        "at %04x:%02x:00, cannot hot-add\n", pci_name(dev),
+                        pci_domain_nr(parent), parent->number);
                 pci_dev_put(dev);
                 return -EINVAL;
         }
  
-       num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0));
+       num = pci_scan_slot(parent, PCI_DEVFN(0, 0));
         if (num == 0) {
                 ctrl_err(ctrl, "No new device found\n");
                 return -ENODEV;
         }
  
         for (fn = 0; fn < 8; fn++) {
-               dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, fn));
+               dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
                 if (!dev)
                         continue;
                 if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
@@ -111,19 +111,18 @@ int pciehp_unconfigure_device(struct slot *p_slot)
         int j;
         u8 bctl = 0;
         u8 presence = 0;
-       struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
+       struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
         u16 command;
         struct controller *ctrl = p_slot->ctrl;
  
-       ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:%02x\n",
-                __func__, pci_domain_nr(parent), p_slot->bus, p_slot->device);
-       ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence);
+       ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
+                __func__, pci_domain_nr(parent), parent->number);
+       ret = pciehp_get_adapter_status(p_slot, &presence);
         if (ret)
                 presence = 0;
  
         for (j = 0; j < 8; j++) {
-               struct pci_dev* temp = pci_get_slot(parent,
-                               (p_slot->device << 3) | j);
+               struct pci_dev* temp = pci_get_slot(parent, PCI_DEVFN(0, j));
                 if (!temp)
                         continue;
                 if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c

index 10c0e62bd5a8c6cdcae048cb713e29228fc18ce3..2ce8f9ccc66e33f27052c1a9e09c89de7d27eea7 100644 (file)
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -318,6 +318,8 @@ static int __init aer_service_init(void)
  {
         if (pcie_aer_disable)
                 return -ENXIO;
+       if (!pci_msi_enabled())
+               return -ENXIO;
         return pcie_port_service_register(&aerdriver);
  }
  
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c

index f289ca9bf18d2171cda7321962488a5e36106f40..745402e8e498a35f99c4f9819518936afc34bc7b 100644 (file)
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -303,9 +303,6 @@ static void pcie_get_aspm_reg(struct pci_dev *pdev,
         pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
         pci_read_config_dword(pdev, pos + PCI_EXP_LNKCAP, &reg32);
         info->support = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10;
-       /* 00b and 10b are defined as "Reserved". */
-       if (info->support == PCIE_LINK_STATE_L1)
-               info->support = 0;
         info->latency_encoding_l0s = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12;
         info->latency_encoding_l1  = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15;
         pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c

index fb45f5ee8df12a79c49e68ca88736f31920e5d80..454970d2d701b807317f06415a8baccaf7af50fe 100644 (file)
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -746,7 +746,9 @@ static acpi_status WMID_set_u32(u32 value, u32 cap, struct wmi_interface *iface)
                         return AE_BAD_PARAMETER;
                 if (quirks->mailled == 1) {
                         param = value ? 0x92 : 0x93;
+                       i8042_lock_chip();
                         i8042_command(&param, 0x1059);
+                       i8042_unlock_chip();
                         return 0;
                 }
                 break;
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c

index c431198bdbc4a4a7a3b9714105de76cfd45e6423..82daa3c1dc9c45eb4619156f849f71902309c66d 100644 (file)
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -14,7 +14,6 @@
  
  #include <linux/init.h>
  #include <linux/miscdevice.h>
-#include <linux/utsname.h>
  #include <linux/debugfs.h>
  #include <asm/ipl.h>
  #include <asm/sclp.h>
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig

index 82b34893e5b5900ed93db3cf3a41d35953bc0775..9a4dd5992f65d6e1a60f1c0f8a490ae1bfca5d85 100644 (file)
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -117,8 +117,6 @@ source "drivers/staging/vt6655/Kconfig"
  
  source "drivers/staging/vt6656/Kconfig"
  
-source "drivers/staging/cpc-usb/Kconfig"
-
  source "drivers/staging/udlfb/Kconfig"
  
  source "drivers/staging/hv/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile

index b1cad0d9ba729a62b806072c21e99687cb25b397..104f2f8897ecc390fb0c9c8015e33436f06198fc 100644 (file)
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -40,7 +40,6 @@ obj-$(CONFIG_USB_SERIAL_QUATECH_USB2) += quatech_usb2/
  obj-$(CONFIG_OCTEON_ETHERNET)  += octeon/
  obj-$(CONFIG_VT6655)           += vt6655/
  obj-$(CONFIG_VT6656)           += vt6656/
-obj-$(CONFIG_USB_CPC)          += cpc-usb/
  obj-$(CONFIG_FB_UDL)           += udlfb/
  obj-$(CONFIG_HYPERV)           += hv/
  obj-$(CONFIG_VME_BUS)          += vme/
diff --git a/drivers/staging/cpc-usb/Kconfig b/drivers/staging/cpc-usb/Kconfig

deleted file mode 100644 (file)

index 2be0bc9..0000000
--- a/drivers/staging/cpc-usb/Kconfig
+++ /dev/null
@@ -1,4 +0,0 @@
-config USB_CPC
-       tristate "CPC CAN USB driver"
-       depends on USB && PROC_FS
-       default n
diff --git a/drivers/staging/cpc-usb/Makefile b/drivers/staging/cpc-usb/Makefile

deleted file mode 100644 (file)

index 3f83170..0000000
--- a/drivers/staging/cpc-usb/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_USB_CPC)  += cpc-usb.o
-
-cpc-usb-y := cpc-usb_drv.o sja2m16c_2.o
diff --git a/drivers/staging/cpc-usb/TODO b/drivers/staging/cpc-usb/TODO

deleted file mode 100644 (file)

index 9b1752f..0000000
--- a/drivers/staging/cpc-usb/TODO
+++ /dev/null
@@ -1,10 +0,0 @@
-Things to do for this driver to get merged into the main portion of the
-kernel:
-       - checkpatch cleanups
-       - sparse clean
-       - remove proc code
-       - tie into CAN socket interfaces if possible
-       - figure out sane userspace api
-       - use linux's error codes
-
-Send patches to Greg Kroah-Hartman <greg@kroah.com>
diff --git a/drivers/staging/cpc-usb/cpc-usb_drv.c b/drivers/staging/cpc-usb/cpc-usb_drv.c

deleted file mode 100644 (file)

index c5eca46..0000000
--- a/drivers/staging/cpc-usb/cpc-usb_drv.c
+++ /dev/null
@@ -1,1184 +0,0 @@
-/*
- * CPC-USB CAN Interface Kernel Driver
- *
- * Copyright (C) 2004-2009 EMS Dr. Thomas Wuensche
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published
- * by the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/poll.h>
-#include <linux/smp_lock.h>
-#include <linux/completion.h>
-#include <asm/uaccess.h>
-#include <linux/usb.h>
-
-
-#include <linux/proc_fs.h>
-
-#include "cpc.h"
-
-#include "cpc_int.h"
-#include "cpcusb.h"
-
-#include "sja2m16c.h"
-
-/* Version Information */
-#define DRIVER_AUTHOR  "Sebastian Haas <haas@ems-wuensche.com>"
-#define DRIVER_DESC    "CPC-USB Driver for Linux Kernel 2.6"
-#define DRIVER_VERSION CPC_DRIVER_VERSION
-
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL v2");
-
-/* Define these values to match your devices */
-#define USB_CPCUSB_VENDOR_ID   0x12D6
-
-#define USB_CPCUSB_M16C_PRODUCT_ID    0x0888
-#define USB_CPCUSB_LPC2119_PRODUCT_ID 0x0444
-
-#define CPC_USB_PROC_DIR     CPC_PROC_DIR "cpc-usb"
-
-static struct proc_dir_entry *procDir;
-static struct proc_dir_entry *procEntry;
-
-/* Module parameters */
-static int debug;
-module_param(debug, int, S_IRUGO);
-
-/* table of devices that work with this driver */
-static struct usb_device_id cpcusb_table[] = {
-       {USB_DEVICE(USB_CPCUSB_VENDOR_ID, USB_CPCUSB_M16C_PRODUCT_ID)},
-       {USB_DEVICE(USB_CPCUSB_VENDOR_ID, USB_CPCUSB_LPC2119_PRODUCT_ID)},
-       {}                      /* Terminating entry */
-};
-
-MODULE_DEVICE_TABLE(usb, cpcusb_table);
-
-/* use to prevent kernel panic if driver is unloaded
- * while a programm has still open the device
- */
-DECLARE_WAIT_QUEUE_HEAD(rmmodWq);
-atomic_t useCount;
-
-static CPC_USB_T *CPCUSB_Table[CPC_USB_CARD_CNT] = { 0 };
-static unsigned int CPCUsbCnt;
-
-/* prevent races between open() and disconnect() */
-static DECLARE_MUTEX(disconnect_sem);
-
-/* local function prototypes */
-static ssize_t cpcusb_read(struct file *file, char *buffer, size_t count,
-                          loff_t *ppos);
-static ssize_t cpcusb_write(struct file *file, const char *buffer,
-                           size_t count, loff_t *ppos);
-static unsigned int cpcusb_poll(struct file *file, poll_table * wait);
-static int cpcusb_open(struct inode *inode, struct file *file);
-static int cpcusb_release(struct inode *inode, struct file *file);
-
-static int cpcusb_probe(struct usb_interface *interface,
-                       const struct usb_device_id *id);
-static void cpcusb_disconnect(struct usb_interface *interface);
-
-static void cpcusb_read_bulk_callback(struct urb *urb);
-static void cpcusb_write_bulk_callback(struct urb *urb);
-static void cpcusb_read_interrupt_callback(struct urb *urb);
-
-static int cpcusb_setup_intrep(CPC_USB_T *card);
-
-static struct file_operations cpcusb_fops = {
-       /*
-        * The owner field is part of the module-locking
-        * mechanism. The idea is that the kernel knows
-        * which module to increment the use-counter of
-        * BEFORE it calls the device's open() function.
-        * This also means that the kernel can decrement
-        * the use-counter again before calling release()
-        * or should the open() function fail.
-        */
-       .owner = THIS_MODULE,
-
-       .read = cpcusb_read,
-       .write = cpcusb_write,
-       .poll = cpcusb_poll,
-       .open = cpcusb_open,
-       .release = cpcusb_release,
-};
-
-/*
- * usb class driver info in order to get a minor number from the usb core,
- * and to have the device registered with devfs and the driver core
- */
-static struct usb_class_driver cpcusb_class = {
-       .name = "usb/cpc_usb%d",
-       .fops = &cpcusb_fops,
-       .minor_base = CPC_USB_BASE_MNR,
-};
-
-/* usb specific object needed to register this driver with the usb subsystem */
-static struct usb_driver cpcusb_driver = {
-       .name = "cpc-usb",
-       .probe = cpcusb_probe,
-       .disconnect = cpcusb_disconnect,
-       .id_table = cpcusb_table,
-};
-
-static int cpcusb_create_info_output(char *buf)
-{
-       int i = 0, j;
-
-       for (j = 0; j < CPC_USB_CARD_CNT; j++) {
-               if (CPCUSB_Table[j]) {
-                       CPC_USB_T *card = CPCUSB_Table[j];
-                       CPC_CHAN_T *chan = card->chan;
-
-                       /* MINOR CHANNELNO BUSNO SLOTNO */
-                       i += sprintf(&buf[i], "%d %s\n", chan->minor,
-                                    card->serialNumber);
-               }
-       }
-
-       return i;
-}
-
-static int cpcusb_proc_read_info(char *page, char **start, off_t off,
-                                int count, int *eof, void *data)
-{
-       int len = cpcusb_create_info_output(page);
-
-       if (len <= off + count)
-               *eof = 1;
-       *start = page + off;
-       len -= off;
-       if (len > count)
-               len = count;
-       if (len < 0)
-               len = 0;
-
-       return len;
-}
-
-/*
- * Remove CPC-USB and cleanup
- */
-static inline void cpcusb_delete(CPC_USB_T *card)
-{
-       if (card) {
-               if (card->chan) {
-                       if (card->chan->buf)
-                               vfree(card->chan->buf);
-
-                       if (card->chan->CPCWait_q)
-                               kfree(card->chan->CPCWait_q);
-
-                       kfree(card->chan);
-               }
-
-               CPCUSB_Table[card->idx] = NULL;
-               kfree(card);
-       }
-}
-
-/*
- * setup the interrupt IN endpoint of a specific CPC-USB device
- */
-static int cpcusb_setup_intrep(CPC_USB_T *card)
-{
-       int retval = 0;
-       struct usb_endpoint_descriptor *ep;
-
-       ep = &card->interface->altsetting[0].endpoint[card->num_intr_in].desc;
-
-       card->intr_in_buffer[0] = 0;
-       card->free_slots = 15;  /* initial size */
-
-       /* setup the urb */
-       usb_fill_int_urb(card->intr_in_urb, card->udev,
-                        usb_rcvintpipe(card->udev, card->num_intr_in),
-                        card->intr_in_buffer,
-                        sizeof(card->intr_in_buffer),
-                        cpcusb_read_interrupt_callback,
-                        card,
-                        ep->bInterval);
-
-       card->intr_in_urb->status = 0;  /* needed! */
-
-       /* submit the urb */
-       retval = usb_submit_urb(card->intr_in_urb, GFP_KERNEL);
-
-       if (retval)
-               err("%s - failed submitting intr urb, error %d", __func__,
-                   retval);
-
-       return retval;
-}
-
-static int cpcusb_open(struct inode *inode, struct file *file)
-{
-       CPC_USB_T *card = NULL;
-       struct usb_interface *interface;
-       int subminor;
-       int j, retval = 0;
-
-       subminor = iminor(inode);
-
-       /* prevent disconnects */
-       down(&disconnect_sem);
-
-       interface = usb_find_interface(&cpcusb_driver, subminor);
-       if (!interface) {
-               err("%s - error, can't find device for minor %d",
-                               __func__, subminor);
-               retval = CPC_ERR_NO_INTERFACE_PRESENT;
-               goto exit_no_device;
-       }
-
-       card = usb_get_intfdata(interface);
-       if (!card) {
-               retval = CPC_ERR_NO_INTERFACE_PRESENT;
-               goto exit_no_device;
-       }
-
-       /* lock this device */
-       down(&card->sem);
-
-       /* increment our usage count for the driver */
-       if (card->open) {
-               dbg("device already opened");
-               retval = CPC_ERR_CHANNEL_ALREADY_OPEN;
-               goto exit_on_error;
-       }
-
-       /* save our object in the file's private structure */
-       file->private_data = card;
-       for (j = 0; j < CPC_USB_URB_CNT; j++) {
-               usb_fill_bulk_urb(card->urbs[j].urb, card->udev,
-                                 usb_rcvbulkpipe(card->udev, card->num_bulk_in),
-                                 card->urbs[j].buffer, card->urbs[j].size,
-                                 cpcusb_read_bulk_callback, card);
-
-               retval = usb_submit_urb(card->urbs[j].urb, GFP_KERNEL);
-
-               if (retval) {
-                       err("%s - failed submitting read urb, error %d",
-                           __func__, retval);
-                       retval = CPC_ERR_TRANSMISSION_FAILED;
-                       goto exit_on_error;
-               }
-       }
-
-       info("%s - %d URB's submitted", __func__, j);
-
-       ResetBuffer(card->chan);
-
-       cpcusb_setup_intrep(card);
-       card->open = 1;
-
-       atomic_inc(&useCount);
-
-exit_on_error:
-       /* unlock this device */
-       up(&card->sem);
-
-exit_no_device:
-       up(&disconnect_sem);
-
-       return retval;
-}
-
-static unsigned int cpcusb_poll(struct file *file, poll_table * wait)
-{
-       CPC_USB_T *card = (CPC_USB_T *) file->private_data;
-       unsigned int retval = 0;
-
-       if (!card) {
-               err("%s - device object lost", __func__);
-               return -EIO;
-       }
-
-       poll_wait(file, card->chan->CPCWait_q, wait);
-
-       if (IsBufferNotEmpty(card->chan) || !(card->present))
-               retval |= (POLLIN | POLLRDNORM);
-
-       if (card->free_slots)
-               retval |= (POLLOUT | POLLWRNORM);
-
-       return retval;
-}
-
-static int cpcusb_release(struct inode *inode, struct file *file)
-{
-       CPC_USB_T *card = (CPC_USB_T *) file->private_data;
-       int j, retval = 0;
-
-       if (card == NULL) {
-               dbg("%s - object is NULL", __func__);
-               return CPC_ERR_NO_INTERFACE_PRESENT;
-       }
-
-       /* lock our device */
-       down(&card->sem);
-
-       if (!card->open) {
-               dbg("%s - device not opened", __func__);
-               retval = CPC_ERR_NO_INTERFACE_PRESENT;
-               goto exit_not_opened;
-       }
-
-       /* if device wasn't unplugged kill all urbs */
-       if (card->present) {
-               /* kill read urbs */
-               for (j = 0; j < CPC_USB_URB_CNT; j++) {
-                       usb_kill_urb(card->urbs[j].urb);
-               }
-
-               /* kill irq urb */
-               usb_kill_urb(card->intr_in_urb);
-
-               /* kill write urbs */
-               for (j = 0; j < CPC_USB_URB_CNT; j++) {
-                       if (atomic_read(&card->wrUrbs[j].busy)) {
-                               usb_kill_urb(card->wrUrbs[j].urb);
-                               wait_for_completion(&card->wrUrbs[j].finished);
-                       }
-               }
-       }
-
-       atomic_dec(&useCount);
-
-       /* last process detached */
-       if (atomic_read(&useCount) == 0) {
-               wake_up(&rmmodWq);
-       }
-
-       if (!card->present && card->open) {
-               /* the device was unplugged before the file was released */
-               up(&card->sem);
-               cpcusb_delete(card);
-               return 0;
-       }
-
-       card->open = 0;
-
-exit_not_opened:
-       up(&card->sem);
-
-       return 0;
-}
-
-static ssize_t cpcusb_read(struct file *file, char *buffer, size_t count,
-                          loff_t *ppos)
-{
-       CPC_USB_T *card = (CPC_USB_T *) file->private_data;
-       CPC_CHAN_T *chan;
-       int retval = 0;
-
-       if (count < sizeof(CPC_MSG_T))
-               return CPC_ERR_UNKNOWN;
-
-       /* check if can read from the given address */
-       if (!access_ok(VERIFY_WRITE, buffer, count))
-               return CPC_ERR_UNKNOWN;
-
-       /* lock this object */
-       down(&card->sem);
-
-       /* verify that the device wasn't unplugged */
-       if (!card->present) {
-               up(&card->sem);
-               return CPC_ERR_NO_INTERFACE_PRESENT;
-       }
-
-       if (IsBufferEmpty(card->chan)) {
-               retval = 0;
-       } else {
-               chan = card->chan;
-
-#if 0
-               /* convert LPC2119 params back to SJA1000 params */
-               if (card->deviceRevision >= 0x0200
-                   && chan->buf[chan->oidx].type == CPC_MSG_T_CAN_PRMS) {
-                       LPC2119_TO_SJA1000_Params(&chan->buf[chan->oidx]);
-               }
-#endif
-
-               if (copy_to_user(buffer, &chan->buf[chan->oidx], count) != 0) {
-                       retval = CPC_ERR_IO_TRANSFER;
-               } else {
-                       chan->oidx = (chan->oidx + 1) % CPC_MSG_BUF_CNT;
-                       chan->WnR = 1;
-                       retval = sizeof(CPC_MSG_T);
-               }
-       }
-/*     spin_unlock_irqrestore(&card->slock, flags); */
-
-       /* unlock the device */
-       up(&card->sem);
-
-       return retval;
-}
-
-#define SHIFT  1
-static inline void cpcusb_align_buffer_alignment(unsigned char *buf)
-{
-       /* CPC-USB uploads packed bytes. */
-       CPC_MSG_T *cpc = (CPC_MSG_T *) buf;
-       unsigned int i;
-
-       for (i = 0; i < cpc->length + (2 * sizeof(unsigned long)); i++) {
-               ((unsigned char *) &cpc->msgid)[1 + i] =
-                   ((unsigned char *) &cpc->msgid)[1 + SHIFT + i];
-       }
-}
-
-static int cpc_get_buffer_count(CPC_CHAN_T *chan)
-{
-       /* check the buffer parameters */
-       if (chan->iidx == chan->oidx)
-               return !chan->WnR ? CPC_MSG_BUF_CNT : 0;
-       else if (chan->iidx >= chan->oidx)
-               return (chan->iidx - chan->oidx) % CPC_MSG_BUF_CNT;
-
-       return (chan->iidx + CPC_MSG_BUF_CNT - chan->oidx) % CPC_MSG_BUF_CNT;
-}
-
-static ssize_t cpcusb_write(struct file *file, const char *buffer,
-                           size_t count, loff_t *ppos)
-{
-       CPC_USB_T *card = (CPC_USB_T *) file->private_data;
-       CPC_USB_WRITE_URB_T *wrUrb = NULL;
-
-       ssize_t bytes_written = 0;
-       int retval = 0;
-       int j;
-
-       unsigned char *obuf = NULL;
-       unsigned char type = 0;
-       CPC_MSG_T *info = NULL;
-
-       dbg("%s - entered minor %d, count = %zu, present = %d",
-           __func__, card->minor, count, card->present);
-
-       if (count > sizeof(CPC_MSG_T))
-               return CPC_ERR_UNKNOWN;
-
-       /* check if can read from the given address */
-       if (!access_ok(VERIFY_READ, buffer, count))
-               return CPC_ERR_UNKNOWN;
-
-       /* lock this object */
-       down(&card->sem);
-
-       /* verify that the device wasn't unplugged */
-       if (!card->present) {
-               retval = CPC_ERR_NO_INTERFACE_PRESENT;
-               goto exit;
-       }
-
-       /* verify that we actually have some data to write */
-       if (count == 0) {
-               dbg("%s - write request of 0 bytes", __func__);
-               goto exit;
-       }
-
-       if (card->free_slots <= 5) {
-               info = (CPC_MSG_T *) buffer;
-
-               if (info->type != CPC_CMD_T_CLEAR_CMD_QUEUE
-                   || card->free_slots <= 0) {
-                       dbg("%s - send buffer full please try again %d",
-                           __func__, card->free_slots);
-                       retval = CPC_ERR_CAN_NO_TRANSMIT_BUF;
-                       goto exit;
-               }
-       }
-
-       /* Find a free write urb */
-       for (j = 0; j < CPC_USB_URB_CNT; j++) {
-               if (!atomic_read(&card->wrUrbs[j].busy)) {
-                       wrUrb = &card->wrUrbs[j];       /* remember found URB */
-                       atomic_set(&wrUrb->busy, 1);    /* lock this URB      */
-                       init_completion(&wrUrb->finished);      /* init completion    */
-                       dbg("WR URB no. %d started", j);
-                       break;
-               }
-       }
-
-       /* don't found write urb say error */
-       if (!wrUrb) {
-               dbg("%s - no free send urb available", __func__);
-               retval = CPC_ERR_CAN_NO_TRANSMIT_BUF;
-               goto exit;
-       }
-       dbg("URB write req");
-
-       obuf = (unsigned char *) wrUrb->urb->transfer_buffer;
-
-       /* copy the data from userspace into our transfer buffer;
-        * this is the only copy required.
-        */
-       if (copy_from_user(&obuf[4], buffer, count) != 0) {
-               atomic_set(&wrUrb->busy, 0);    /* release urb */
-               retval = CPC_ERR_IO_TRANSFER;
-               goto exit;
-       }
-
-       /* check if it is a DRIVER information message, so we can
-        * response to that message and not the USB
-        */
-       info = (CPC_MSG_T *) &obuf[4];
-
-       bytes_written = 11 + info->length;
-       if (bytes_written >= wrUrb->size) {
-               retval = CPC_ERR_IO_TRANSFER;
-               goto exit;
-       }
-
-       switch (info->type) {
-       case CPC_CMD_T_CLEAR_MSG_QUEUE:
-               ResetBuffer(card->chan);
-               break;
-
-       case CPC_CMD_T_INQ_MSG_QUEUE_CNT:
-               retval = cpc_get_buffer_count(card->chan);
-               atomic_set(&wrUrb->busy, 0);
-
-               goto exit;
-
-       case CPC_CMD_T_INQ_INFO:
-               if (info->msg.info.source == CPC_INFOMSG_T_DRIVER) {
-                       /* release urb cause we'll use it for driver
-                        * information
-                        */
-                       atomic_set(&wrUrb->busy, 0);
-                       if (IsBufferFull(card->chan)) {
-                               retval = CPC_ERR_IO_TRANSFER;
-                               goto exit;
-                       }
-
-                       /* it is a driver information request message and we have
-                        * free rx slots to store the response
-                        */
-                       type = info->msg.info.type;
-                       info = &card->chan->buf[card->chan->iidx];
-
-                       info->type = CPC_MSG_T_INFO;
-                       info->msg.info.source = CPC_INFOMSG_T_DRIVER;
-                       info->msg.info.type = type;
-
-                       switch (type) {
-                       case CPC_INFOMSG_T_VERSION:
-                               info->length = strlen(CPC_DRIVER_VERSION) + 2;
-                               sprintf(info->msg.info.msg, "%s\n",
-                                       CPC_DRIVER_VERSION);
-                               break;
-
-                       case CPC_INFOMSG_T_SERIAL:
-                               info->length = strlen(CPC_DRIVER_SERIAL) + 2;
-                               sprintf(info->msg.info.msg, "%s\n",
-                                       CPC_DRIVER_SERIAL);
-                               break;
-
-                       default:
-                               info->length = 2;
-                               info->msg.info.type =
-                                   CPC_INFOMSG_T_UNKNOWN_TYPE;
-                       }
-
-                       card->chan->WnR = 0;
-                       card->chan->iidx =
-                           (card->chan->iidx + 1) % CPC_MSG_BUF_CNT;
-
-                       retval = info->length;
-                       goto exit;
-               }
-               break;
-       case CPC_CMD_T_CAN_PRMS:
-               /* Check the controller type. If it's the new CPC-USB, make sure if these are SJA1000 params */
-               if (info->msg.canparams.cc_type != SJA1000
-                   && info->msg.canparams.cc_type != M16C_BASIC
-                   && (card->productId == USB_CPCUSB_LPC2119_PRODUCT_ID
-                       && info->msg.canparams.cc_type != SJA1000)) {
-                       /* don't forget to release the urb */
-                       atomic_set(&wrUrb->busy, 0);
-                       retval = CPC_ERR_WRONG_CONTROLLER_TYPE;
-                       goto exit;
-               }
-               break;
-       }
-
-       /* just convert the params if it is an old CPC-USB with M16C controller */
-       if (card->productId == USB_CPCUSB_M16C_PRODUCT_ID) {
-               /* if it is a parameter message convert it from SJA1000 controller
-                * settings to M16C Basic controller settings
-                */
-               SJA1000_TO_M16C_BASIC_Params((CPC_MSG_T *) &obuf[4]);
-       }
-
-       /* don't forget the byte alignment */
-       cpcusb_align_buffer_alignment(&obuf[4]);
-
-       /* setup a the 4 byte header */
-       obuf[0] = obuf[1] = obuf[2] = obuf[3] = 0;
-
-       /* this urb was already set up, except for this write size */
-       wrUrb->urb->transfer_buffer_length = bytes_written + 4;
-
-       /* send the data out the bulk port */
-       /* a character device write uses GFP_KERNEL,
-          unless a spinlock is held */
-       retval = usb_submit_urb(wrUrb->urb, GFP_KERNEL);
-       if (retval) {
-               atomic_set(&wrUrb->busy, 0);    /* release urb */
-               err("%s - failed submitting write urb, error %d",
-                   __func__, retval);
-       } else {
-               retval = bytes_written;
-       }
-
-exit:
-       /* unlock the device */
-       up(&card->sem);
-
-       dbg("%s - leaved", __func__);
-
-       return retval;
-}
-
-/*
- * callback for interrupt IN urb
- */
-static void cpcusb_read_interrupt_callback(struct urb *urb)
-{
-       CPC_USB_T *card = (CPC_USB_T *) urb->context;
-       int retval;
-       unsigned long flags;
-
-       spin_lock_irqsave(&card->slock, flags);
-
-       if (!card->present) {
-               spin_unlock_irqrestore(&card->slock, flags);
-               info("%s - no such device", __func__);
-               return;
-       }
-
-       switch (urb->status) {
-       case 0: /* success */
-               card->free_slots = card->intr_in_buffer[1];
-               break;
-       case -ECONNRESET:
-       case -ENOENT:
-       case -ESHUTDOWN:
-               /* urb was killed */
-               spin_unlock_irqrestore(&card->slock, flags);
-               dbg("%s - intr urb killed", __func__);
-               return;
-       default:
-               info("%s - nonzero urb status %d", __func__, urb->status);
-               break;
-       }
-
-       retval = usb_submit_urb(urb, GFP_ATOMIC);
-       if (retval) {
-               err("%s - failed resubmitting intr urb, error %d",
-                   __func__, retval);
-       }
-
-       spin_unlock_irqrestore(&card->slock, flags);
-       wake_up_interruptible(card->chan->CPCWait_q);
-
-       return;
-}
-
-#define UN_SHIFT  1
-#define CPCMSG_HEADER_LEN_FIRMWARE   11
-static inline int cpcusb_unalign_and_copy_buffy(unsigned char *out,
-                                               unsigned char *in)
-{
-       unsigned int i, j;
-
-       for (i = 0; i < 3; i++)
-               out[i] = in[i];
-
-       for (j = 0; j < (in[1] + (CPCMSG_HEADER_LEN_FIRMWARE - 3)); j++)
-               out[j + i + UN_SHIFT] = in[j + i];
-
-       return i + j;
-}
-
-/*
- * callback for bulk IN urb
- */
-static void cpcusb_read_bulk_callback(struct urb *urb)
-{
-       CPC_USB_T *card = (CPC_USB_T *) urb->context;
-       CPC_CHAN_T *chan;
-       unsigned char *ibuf = urb->transfer_buffer;
-       int retval, msgCnt, start, again = 0;
-       unsigned long flags;
-
-       if (!card) {
-               err("%s - device object lost", __func__);
-               return;
-       }
-
-       spin_lock_irqsave(&card->slock, flags);
-
-       if (!card->present) {
-               spin_unlock_irqrestore(&card->slock, flags);
-               info("%s - no such device", __func__);
-               return;
-       }
-
-       switch (urb->status) {
-       case 0:         /* success */
-               break;
-       case -ECONNRESET:
-       case -ENOENT:
-       case -ESHUTDOWN:
-               /* urb was killed */
-               spin_unlock_irqrestore(&card->slock, flags);
-               dbg("%s - read urb killed", __func__);
-               return;
-       default:
-               info("%s - nonzero urb status %d", __func__, urb->status);
-               break;
-       }
-
-       if (urb->actual_length) {
-               msgCnt = ibuf[0] & ~0x80;
-               again = ibuf[0] & 0x80;
-
-               /* we have a 4 byte header */
-               start = 4;
-               chan = card->chan;
-               while (msgCnt) {
-                       if (!(IsBufferFull(card->chan))) {
-                               start +=
-                                   cpcusb_unalign_and_copy_buffy((unsigned char *)
-                                                         &chan->buf[chan->iidx], &ibuf[start]);
-
-                               if (start > urb->transfer_buffer_length) {
-                                       err("%d > %d", start, urb->transfer_buffer_length);
-                                       break;
-                               }
-
-                               chan->WnR = 0;
-                               chan->iidx = (chan->iidx + 1) % CPC_MSG_BUF_CNT;
-                               msgCnt--;
-                       } else {
-                               break;
-                       }
-               }
-       }
-
-       usb_fill_bulk_urb(urb, card->udev,
-                         usb_rcvbulkpipe(card->udev, card->num_bulk_in),
-                         urb->transfer_buffer,
-                         urb->transfer_buffer_length,
-                         cpcusb_read_bulk_callback, card);
-
-       retval = usb_submit_urb(urb, GFP_ATOMIC);
-
-       if (retval) {
-               err("%s - failed resubmitting read urb, error %d", __func__, retval);
-       }
-
-       spin_unlock_irqrestore(&card->slock, flags);
-
-       wake_up_interruptible(card->chan->CPCWait_q);
-}
-
-/*
- * callback for bulk IN urb
- */
-static void cpcusb_write_bulk_callback(struct urb *urb)
-{
-       CPC_USB_T *card = (CPC_USB_T *) urb->context;
-       unsigned long flags;
-       int j;
-
-       spin_lock_irqsave(&card->slock, flags);
-
-       /* find this urb */
-       for (j = 0; j < CPC_USB_URB_CNT; j++) {
-               if (card->wrUrbs[j].urb == urb) {
-                       dbg("URB found no. %d", j);
-                       /* notify anyone waiting that the write has finished */
-                       complete(&card->wrUrbs[j].finished);
-                       atomic_set(&card->wrUrbs[j].busy, 0);
-                       break;
-               }
-       }
-
-       switch (urb->status) {
-       case 0:         /* success */
-               break;
-       case -ECONNRESET:
-       case -ENOENT:
-       case -ESHUTDOWN:
-               /* urb was killed */
-               spin_unlock_irqrestore(&card->slock, flags);
-               dbg("%s - write urb no. %d killed", __func__, j);
-               return;
-       default:
-               info("%s - nonzero urb status %d", __func__, urb->status);
-               break;
-       }
-
-       spin_unlock_irqrestore(&card->slock, flags);
-
-       wake_up_interruptible(card->chan->CPCWait_q);
-}
-
-static inline int cpcusb_get_free_slot(void)
-{
-       int i;
-
-       for (i = 0; i < CPC_USB_CARD_CNT; i++) {
-               if (!CPCUSB_Table[i])
-                       return i;
-       }
-
-       return -1;
-}
-
-/*
- * probe function for new CPC-USB devices
- */
-static int cpcusb_probe(struct usb_interface *interface,
-                       const struct usb_device_id *id)
-{
-       CPC_USB_T *card = NULL;
-       CPC_CHAN_T *chan = NULL;
-
-       struct usb_device *udev = interface_to_usbdev(interface);
-       struct usb_host_interface *iface_desc;
-       struct usb_endpoint_descriptor *endpoint;
-
-       int i, j, retval = -ENOMEM, slot;
-
-       slot = cpcusb_get_free_slot();
-       if (slot < 0) {
-               info("No more devices supported");
-               return -ENOMEM;
-       }
-
-       /* allocate memory for our device state and initialize it */
-       card = kzalloc(sizeof(CPC_USB_T), GFP_KERNEL);
-       if (!card) {
-               err("Out of memory");
-               return -ENOMEM;
-       }
-       CPCUSB_Table[slot] = card;
-
-       /* allocate and initialize the channel struct */
-       card->chan = kmalloc(sizeof(CPC_CHAN_T), GFP_KERNEL);
-       if (!card->chan) {
-               kfree(card);
-               err("Out of memory");
-               return -ENOMEM;
-       }
-
-       chan = card->chan;
-       memset(chan, 0, sizeof(CPC_CHAN_T));
-       ResetBuffer(chan);
-
-       init_MUTEX(&card->sem);
-       spin_lock_init(&card->slock);
-
-       card->udev = udev;
-       card->interface = interface;
-       if (udev->descriptor.iSerialNumber) {
-               usb_string(udev, udev->descriptor.iSerialNumber, card->serialNumber,
-                                  128);
-               info("Serial %s", card->serialNumber);
-       }
-
-       card->productId = udev->descriptor.idProduct;
-       info("Product %s",
-            card->productId == USB_CPCUSB_LPC2119_PRODUCT_ID ?
-                        "CPC-USB/ARM7" : "CPC-USB/M16C");
-
-       /* set up the endpoint information */
-       /* check out the endpoints */
-       /* use only the first bulk-in and bulk-out endpoints */
-       iface_desc = &interface->altsetting[0];
-       for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
-               endpoint = &iface_desc->endpoint[i].desc;
-
-               if (!card->num_intr_in &&
-                   (endpoint->bEndpointAddress & USB_DIR_IN) &&
-                   ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
-                    == USB_ENDPOINT_XFER_INT)) {
-                       card->intr_in_urb = usb_alloc_urb(0, GFP_KERNEL);
-                       card->num_intr_in = 1;
-
-                       if (!card->intr_in_urb) {
-                               err("No free urbs available");
-                               goto error;
-                       }
-
-                       dbg("intr_in urb %d", card->num_intr_in);
-               }
-
-               if (!card->num_bulk_in &&
-                   (endpoint->bEndpointAddress & USB_DIR_IN) &&
-                   ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
-                    == USB_ENDPOINT_XFER_BULK)) {
-                       card->num_bulk_in = 2;
-                       for (j = 0; j < CPC_USB_URB_CNT; j++) {
-                               card->urbs[j].size = endpoint->wMaxPacketSize;
-                               card->urbs[j].urb = usb_alloc_urb(0, GFP_KERNEL);
-                               if (!card->urbs[j].urb) {
-                                       err("No free urbs available");
-                                       goto error;
-                               }
-                               card->urbs[j].buffer =
-                                   usb_buffer_alloc(udev,
-                                                    card->urbs[j].size,
-                                                    GFP_KERNEL,
-                                                    &card->urbs[j].urb->transfer_dma);
-                               if (!card->urbs[j].buffer) {
-                                       err("Couldn't allocate bulk_in_buffer");
-                                       goto error;
-                               }
-                       }
-                       info("%s - %d reading URB's allocated",
-                            __func__, CPC_USB_URB_CNT);
-               }
-
-               if (!card->num_bulk_out &&
-                   !(endpoint->bEndpointAddress & USB_DIR_IN) &&
-                   ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
-                    == USB_ENDPOINT_XFER_BULK)) {
-
-                       card->num_bulk_out = 2;
-
-                       for (j = 0; j < CPC_USB_URB_CNT; j++) {
-                               card->wrUrbs[j].size =
-                                   endpoint->wMaxPacketSize;
-                               card->wrUrbs[j].urb =
-                                   usb_alloc_urb(0, GFP_KERNEL);
-                               if (!card->wrUrbs[j].urb) {
-                                       err("No free urbs available");
-                                       goto error;
-                               }
-                               card->wrUrbs[j].buffer = usb_buffer_alloc(udev,
-                                                              card->wrUrbs[j].size, GFP_KERNEL,
-                                                              &card->wrUrbs[j].urb->transfer_dma);
-
-                               if (!card->wrUrbs[j].buffer) {
-                                       err("Couldn't allocate bulk_out_buffer");
-                                       goto error;
-                               }
-
-                               usb_fill_bulk_urb(card->wrUrbs[j].urb, udev,
-                                               usb_sndbulkpipe(udev, endpoint->bEndpointAddress),
-                                               card->wrUrbs[j].buffer,
-                                               card->wrUrbs[j].size,
-                                               cpcusb_write_bulk_callback,
-                                               card);
-                       }
-
-                       info("%s - %d writing URB's allocated", __func__, CPC_USB_URB_CNT);
-               }
-       }
-
-       if (!(card->num_bulk_in && card->num_bulk_out)) {
-               err("Couldn't find both bulk-in and bulk-out endpoints");
-               goto error;
-       }
-
-       /* allow device read, write and ioctl */
-       card->present = 1;
-
-       /* we can register the device now, as it is ready */
-       usb_set_intfdata(interface, card);
-       retval = usb_register_dev(interface, &cpcusb_class);
-
-       if (retval) {
-               /* something prevented us from registering this driver */
-               err("Not able to get a minor for this device.");
-               usb_set_intfdata(interface, NULL);
-               goto error;
-       }
-
-       card->chan->minor = card->minor = interface->minor;
-
-       chan->buf = vmalloc(sizeof(CPC_MSG_T) * CPC_MSG_BUF_CNT);
-       if (chan->buf == NULL) {
-               err("Out of memory");
-               retval = -ENOMEM;
-               goto error;
-       }
-       info("Allocated memory for %d messages (%lu kbytes)",
-            CPC_MSG_BUF_CNT, (long unsigned int)(sizeof(CPC_MSG_T) * CPC_MSG_BUF_CNT) / 1000);
-       memset(chan->buf, 0, sizeof(CPC_MSG_T) * CPC_MSG_BUF_CNT);
-
-       ResetBuffer(chan);
-
-       card->chan->CPCWait_q = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
-       if (!card->chan->CPCWait_q) {
-               err("Out of memory");
-               retval = -ENOMEM;
-               goto error;
-       }
-       init_waitqueue_head(card->chan->CPCWait_q);
-
-       CPCUSB_Table[slot] = card;
-       card->idx = slot;
-       CPCUsbCnt++;
-
-       /* let the user know what node this device is now attached to */
-       info("Device now attached to USB-%d", card->minor);
-       return 0;
-
-error:
-       for (j = 0; j < CPC_USB_URB_CNT; j++) {
-               if (card->urbs[j].buffer) {
-                       usb_buffer_free(card->udev, card->urbs[j].size,
-                                       card->urbs[j].buffer,
-                                       card->urbs[j].urb->transfer_dma);
-                       card->urbs[j].buffer = NULL;
-               }
-               if (card->urbs[j].urb) {
-                       usb_free_urb(card->urbs[j].urb);
-                       card->urbs[j].urb = NULL;
-               }
-       }
-
-       cpcusb_delete(card);
-       return retval;
-}
-
-/*
- * called by the usb core when the device is removed from the system
- */
-static void cpcusb_disconnect(struct usb_interface *interface)
-{
-       CPC_USB_T *card = NULL;
-       int minor, j;
-
-       /* prevent races with open() */
-       down(&disconnect_sem);
-
-       card = usb_get_intfdata(interface);
-       usb_set_intfdata(interface, NULL);
-
-       down(&card->sem);
-
-       /* prevent device read, write and ioctl */
-       card->present = 0;
-
-       minor = card->minor;
-
-       /* free all urbs and their buffers */
-       for (j = 0; j < CPC_USB_URB_CNT; j++) {
-               /* terminate an ongoing write */
-               if (atomic_read(&card->wrUrbs[j].busy)) {
-                       usb_kill_urb(card->wrUrbs[j].urb);
-                       wait_for_completion(&card->wrUrbs[j].finished);
-               }
-               usb_buffer_free(card->udev, card->wrUrbs[j].size,
-                               card->wrUrbs[j].buffer,
-                               card->wrUrbs[j].urb->transfer_dma);
-               usb_free_urb(card->wrUrbs[j].urb);
-       }
-       info("%d write URBs freed", CPC_USB_URB_CNT);
-
-       /* free all urbs and their buffers */
-       for (j = 0; j < CPC_USB_URB_CNT; j++) {
-               usb_buffer_free(card->udev, card->urbs[j].size,
-                               card->urbs[j].buffer,
-                               card->urbs[j].urb->transfer_dma);
-               usb_free_urb(card->urbs[j].urb);
-       }
-       info("%d read URBs freed", CPC_USB_URB_CNT);
-       usb_free_urb(card->intr_in_urb);
-
-       /* give back our minor */
-       usb_deregister_dev(interface, &cpcusb_class);
-
-       up(&card->sem);
-
-       /* if the device is opened, cpcusb_release will clean this up */
-       if (!card->open)
-               cpcusb_delete(card);
-       else
-               wake_up_interruptible(card->chan->CPCWait_q);
-
-       up(&disconnect_sem);
-
-       CPCUsbCnt--;
-       info("USB-%d now disconnected", minor);
-}
-
-static int __init CPCUsb_Init(void)
-{
-       int result, i;
-
-       info(DRIVER_DESC " v" DRIVER_VERSION);
-       info("Build on " __DATE__ " at " __TIME__);
-
-       for (i = 0; i < CPC_USB_CARD_CNT; i++)
-               CPCUSB_Table[i] = 0;
-
-       /* register this driver with the USB subsystem */
-       result = usb_register(&cpcusb_driver);
-       if (result) {
-               err("usb_register failed. Error number %d", result);
-               return result;
-       }
-
-       procDir = proc_mkdir(CPC_USB_PROC_DIR, NULL);
-       if (!procDir) {
-               err("Could not create proc entry");
-       } else {
-               procEntry = create_proc_read_entry("info", 0444, procDir,
-                                                  cpcusb_proc_read_info,
-                                                  NULL);
-               if (!procEntry) {
-                       err("Could not create proc entry %s", CPC_USB_PROC_DIR "/info");
-                       remove_proc_entry(CPC_USB_PROC_DIR, NULL);
-                       procDir = NULL;
-               }
-       }
-
-       return 0;
-}
-
-static void __exit CPCUsb_Exit(void)
-{
-       wait_event(rmmodWq, !atomic_read(&useCount));
-
-       /* deregister this driver with the USB subsystem */
-       usb_deregister(&cpcusb_driver);
-
-       if (procDir) {
-               if (procEntry)
-                       remove_proc_entry("info", procDir);
-               remove_proc_entry(CPC_USB_PROC_DIR, NULL);
-       }
-}
-
-module_init(CPCUsb_Init);
-module_exit(CPCUsb_Exit);
diff --git a/drivers/staging/cpc-usb/cpc.h b/drivers/staging/cpc-usb/cpc.h

deleted file mode 100644 (file)

index b2fda5d..0000000
--- a/drivers/staging/cpc-usb/cpc.h
+++ /dev/null
@@ -1,417 +0,0 @@
-/*
- * CPC CAN Interface Definitions
- *
- * Copyright (C) 2000-2008 EMS Dr. Thomas Wuensche
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-#ifndef CPC_HEADER
-#define CPC_HEADER
-
-/*
- * the maximum length of the union members within a CPC_MSG
- * this value can be defined by the customer, but has to be
- * >= 64 bytes
- * however, if not defined before, we set a length of 64 byte
- */
-#if !defined(CPC_MSG_LEN) || (CPC_MSG_LEN < 64)
-#undef CPC_MSG_LEN
-#define CPC_MSG_LEN 64
-#endif
-
-/*
- * Transmission of events from CPC interfaces to PC can be individually
- * controlled per event type. Default state is: don't transmit
- * Control values are constructed by bit-or of Subject and Action
- * and passed to CPC_Control()
- */
-
-/* Control-Values for CPC_Control() Command Subject Selection */
-#define CONTR_CAN_Message 0x04
-#define CONTR_Busload    0x08
-#define        CONTR_CAN_State   0x0C
-#define        CONTR_SendAck     0x10
-#define        CONTR_Filter      0x14
-#define CONTR_CmdQueue    0x18 /* reserved, do not use */
-#define CONTR_BusError    0x1C
-
-/* Control Command Actions */
-#define CONTR_CONT_OFF    0
-#define CONTR_CONT_ON     1
-#define CONTR_SING_ON     2
-/*
- * CONTR_SING_ON doesn't change CONTR_CONT_ON state, so it should be
- * read as: transmit at least once
- */
-
-/* defines for confirmed request */
-#define DO_NOT_CONFIRM 0
-#define DO_CONFIRM     1
-
-/* event flags */
-#define EVENT_READ 0x01
-#define EVENT_WRITE 0x02
-
-/*
- * Messages from CPC to PC contain a message object type field.
- * The following message types are sent by CPC and can be used in
- * handlers, others should be ignored.
- */
-#define CPC_MSG_T_RESYNC        0 /* Normally to be ignored */
-#define CPC_MSG_T_CAN           1 /* CAN data frame */
-#define CPC_MSG_T_BUSLOAD       2 /* Busload message */
-#define CPC_MSG_T_STRING        3 /* Normally to be ignored */
-#define CPC_MSG_T_CONTI         4 /* Normally to be ignored */
-#define CPC_MSG_T_MEM           7 /* Normally not to be handled */
-#define        CPC_MSG_T_RTR           8 /* CAN remote frame */
-#define CPC_MSG_T_TXACK                9 /* Send acknowledge */
-#define CPC_MSG_T_POWERUP      10 /* Power-up message */
-#define        CPC_MSG_T_CMD_NO       11 /* Normally to be ignored */
-#define        CPC_MSG_T_CAN_PRMS     12 /* Actual CAN parameters */
-#define        CPC_MSG_T_ABORTED      13 /* Command aborted message */
-#define        CPC_MSG_T_CANSTATE     14 /* CAN state message */
-#define CPC_MSG_T_RESET        15 /* used to reset CAN-Controller */
-#define        CPC_MSG_T_XCAN         16 /* XCAN data frame */
-#define CPC_MSG_T_XRTR         17 /* XCAN remote frame */
-#define CPC_MSG_T_INFO         18 /* information strings */
-#define CPC_MSG_T_CONTROL      19 /* used for control of interface/driver behaviour */
-#define CPC_MSG_T_CONFIRM      20 /* response type for confirmed requests */
-#define CPC_MSG_T_OVERRUN      21 /* response type for overrun conditions */
-#define CPC_MSG_T_KEEPALIVE    22 /* response type for keep alive conditions */
-#define CPC_MSG_T_CANERROR     23 /* response type for bus error conditions */
-#define CPC_MSG_T_DISCONNECTED 24 /* response type for a disconnected interface */
-#define CPC_MSG_T_ERR_COUNTER  25 /* RX/TX error counter of CAN controller */
-
-#define CPC_MSG_T_FIRMWARE    100 /* response type for USB firmware download */
-
-/*
- * Messages from the PC to the CPC interface contain a command field
- * Most of the command types are wrapped by the library functions and have therefore
- * normally not to be used.
- * However, programmers who wish to circumvent the library and talk directly
- * to the drivers (mainly Linux programmers) can use the following
- * command types:
- */
-#define CPC_CMD_T_CAN                 1        /* CAN data frame */
-#define CPC_CMD_T_CONTROL             3        /* used for control of interface/driver behaviour */
-#define        CPC_CMD_T_CAN_PRMS            6 /* set CAN parameters */
-#define        CPC_CMD_T_CLEARBUF            8 /* clears input queue; this is depricated, use CPC_CMD_T_CLEAR_MSG_QUEUE instead */
-#define        CPC_CMD_T_INQ_CAN_PARMS      11 /* inquire actual CAN parameters */
-#define        CPC_CMD_T_FILTER_PRMS        12 /* set filter parameter */
-#define        CPC_CMD_T_RTR                13 /* CAN remote frame */
-#define        CPC_CMD_T_CANSTATE           14 /* CAN state message */
-#define        CPC_CMD_T_XCAN               15 /* XCAN data frame */
-#define CPC_CMD_T_XRTR               16        /* XCAN remote frame */
-#define CPC_CMD_T_RESET              17        /* used to reset CAN-Controller */
-#define CPC_CMD_T_INQ_INFO           18        /* miscellanous information strings */
-#define CPC_CMD_T_OPEN_CHAN          19        /* open a channel */
-#define CPC_CMD_T_CLOSE_CHAN         20        /* close a channel */
-#define CPC_CMD_T_CNTBUF             21        /* this is depricated, use CPC_CMD_T_INQ_MSG_QUEUE_CNT instead */
-#define CPC_CMD_T_CAN_EXIT          200 /* exit the CAN (disable interrupts; reset bootrate; reset output_cntr; mode = 1) */
-
-#define CPC_CMD_T_INQ_MSG_QUEUE_CNT  CPC_CMD_T_CNTBUF   /* inquires the count of elements in the message queue */
-#define CPC_CMD_T_INQ_ERR_COUNTER    25                        /* request the CAN controllers error counter */
-#define        CPC_CMD_T_CLEAR_MSG_QUEUE    CPC_CMD_T_CLEARBUF /* clear CPC_MSG queue */
-#define        CPC_CMD_T_CLEAR_CMD_QUEUE    28                 /* clear CPC_CMD queue */
-#define CPC_CMD_T_FIRMWARE          100                 /* reserved, must not be used */
-#define CPC_CMD_T_USB_RESET         101                 /* reserved, must not be used */
-#define CPC_CMD_T_WAIT_NOTIFY       102                 /* reserved, must not be used */
-#define CPC_CMD_T_WAIT_SETUP        103                 /* reserved, must not be used */
-#define        CPC_CMD_T_ABORT             255                 /* Normally not to be used */
-
-/* definitions for CPC_MSG_T_INFO information sources */
-#define CPC_INFOMSG_T_UNKNOWN_SOURCE 0
-#define CPC_INFOMSG_T_INTERFACE      1
-#define CPC_INFOMSG_T_DRIVER         2
-#define CPC_INFOMSG_T_LIBRARY        3
-
-/* information types */
-#define CPC_INFOMSG_T_UNKNOWN_TYPE   0
-#define CPC_INFOMSG_T_VERSION        1
-#define CPC_INFOMSG_T_SERIAL         2
-
-/* definitions for controller types */
-#define PCA82C200   1 /* Philips basic CAN controller, replaced by SJA1000 */
-#define SJA1000     2 /* Philips basic CAN controller */
-#define AN82527     3 /* Intel full CAN controller */
-#define M16C_BASIC  4 /* M16C controller running in basic CAN (not full CAN) mode */
-
-/* channel open error codes */
-#define CPC_ERR_NO_FREE_CHANNEL            -1  /* no more free space within the channel array */
-#define CPC_ERR_CHANNEL_ALREADY_OPEN       -2  /* the channel is already open */
-#define CPC_ERR_CHANNEL_NOT_ACTIVE         -3  /* access to a channel not active failed */
-#define CPC_ERR_NO_DRIVER_PRESENT          -4  /* no driver at the location searched by the library */
-#define CPC_ERR_NO_INIFILE_PRESENT         -5  /* the library could not find the inifile */
-#define CPC_ERR_WRONG_PARAMETERS           -6  /* wrong parameters in the inifile */
-#define CPC_ERR_NO_INTERFACE_PRESENT       -7  /* 1. The specified interface is not connected */
-                                               /* 2. The interface (mostly CPC-USB) was disconnected upon operation */
-#define CPC_ERR_NO_MATCHING_CHANNEL        -8  /* the driver couldn't find a matching channel */
-#define CPC_ERR_NO_BUFFER_AVAILABLE        -9  /* the driver couldn't allocate buffer for messages */
-#define CPC_ERR_NO_INTERRUPT               -10 /* the requested interrupt couldn't be claimed */
-#define CPC_ERR_NO_MATCHING_INTERFACE      -11 /* no interface type related to this channel was found */
-#define CPC_ERR_NO_RESOURCES               -12 /* the requested resources could not be claimed */
-#define CPC_ERR_SOCKET                     -13 /* error concerning TCP sockets */
-
-/* init error codes */
-#define CPC_ERR_WRONG_CONTROLLER_TYPE      -14 /* wrong CAN controller type within initialization */
-#define CPC_ERR_NO_RESET_MODE              -15 /* the controller could not be set into reset mode */
-#define CPC_ERR_NO_CAN_ACCESS              -16 /* the CAN controller could not be accessed */
-
-/* transmit error codes */
-#define CPC_ERR_CAN_WRONG_ID               -20 /* the provided CAN id is too big */
-#define CPC_ERR_CAN_WRONG_LENGTH           -21 /* the provided CAN length is too long */
-#define CPC_ERR_CAN_NO_TRANSMIT_BUF        -22 /* the transmit buffer was occupied */
-#define CPC_ERR_CAN_TRANSMIT_TIMEOUT       -23 /* The message could not be sent within a */
-                                               /* specified time */
-
-/* other error codes */
-#define CPC_ERR_SERVICE_NOT_SUPPORTED      -30 /* the requested service is not supported by the interface */
-#define CPC_ERR_IO_TRANSFER                -31 /* a transmission error down to the driver occurred */
-#define CPC_ERR_TRANSMISSION_FAILED        -32 /* a transmission error down to the interface occurred */
-#define CPC_ERR_TRANSMISSION_TIMEOUT       -33 /* a timeout occurred within transmission to the interface */
-#define CPC_ERR_OP_SYS_NOT_SUPPORTED       -35 /* the operating system is not supported */
-#define CPC_ERR_UNKNOWN                    -40 /* an unknown error ocurred (mostly IOCTL errors) */
-
-#define CPC_ERR_LOADING_DLL                -50 /* the library 'cpcwin.dll' could not be loaded */
-#define CPC_ERR_ASSIGNING_FUNCTION         -51 /* the specified function could not be assigned */
-#define CPC_ERR_DLL_INITIALIZATION         -52 /* the DLL was not initialized correctly */
-#define CPC_ERR_MISSING_LICFILE            -55 /* the file containing the licenses does not exist */
-#define CPC_ERR_MISSING_LICENSE            -56 /* a required license was not found */
-
-/* CAN state bit values. Ignore any bits not listed */
-#define CPC_CAN_STATE_BUSOFF     0x80
-#define CPC_CAN_STATE_ERROR      0x40
-
-/* Mask to help ignore undefined bits */
-#define CPC_CAN_STATE_MASK       0xc0
-
-/*
- * CAN-Message representation in a CPC_MS
- * Message object type is CPC_MSG_T_CAN or CPC_MSG_T_RTR
- * or CPC_MSG_T_XCAN or CPC_MSG_T_XRTR
- */
-typedef struct CPC_CAN_MSG {
-       u32 id;
-       u8 length;
-       u8 msg[8];
-} CPC_CAN_MSG_T;
-
-/* representation of the CAN parameters for the PCA82C200 controller */
-typedef struct CPC_PCA82C200_PARAMS {
-       u8 acc_code;    /* Acceptance-code for receive, Standard: 0 */
-       u8 acc_mask;    /* Acceptance-mask for receive, Standard: 0xff (everything) */
-       u8 btr0;        /* Bus-timing register 0 */
-       u8 btr1;        /* Bus-timing register 1 */
-       u8 outp_contr;  /* Output-control register */
-} CPC_PCA82C200_PARAMS_T;
-
-/* representation of the CAN parameters for the SJA1000 controller */
-typedef struct CPC_SJA1000_PARAMS {
-       u8 mode;        /* enables single or dual acceptance filtering */
-       u8 acc_code0;   /* Acceptance-code for receive, Standard: 0 */
-       u8 acc_code1;
-       u8 acc_code2;
-       u8 acc_code3;
-       u8 acc_mask0;   /* Acceptance-mask for receive, Standard: 0xff (everything) */
-       u8 acc_mask1;
-       u8 acc_mask2;
-       u8 acc_mask3;
-       u8 btr0;        /* Bus-timing register 0 */
-       u8 btr1;        /* Bus-timing register 1 */
-       u8 outp_contr;  /* Output-control register */
-} CPC_SJA1000_PARAMS_T;
-
-/*
- * representation of the CAN parameters for the M16C controller
- * in basic CAN mode (means no full CAN)
- */
-typedef struct CPC_M16C_BASIC_PARAMS {
-       u8 con0;
-       u8 con1;
-       u8 ctlr0;
-       u8 ctlr1;
-       u8 clk;
-       u8 acc_std_code0;
-       u8 acc_std_code1;
-       u8 acc_ext_code0;
-       u8 acc_ext_code1;
-       u8 acc_ext_code2;
-       u8 acc_ext_code3;
-       u8 acc_std_mask0;
-       u8 acc_std_mask1;
-       u8 acc_ext_mask0;
-       u8 acc_ext_mask1;
-       u8 acc_ext_mask2;
-       u8 acc_ext_mask3;
-} CPC_M16C_BASIC_PARAMS_T;
-
-/* CAN params message representation */
-typedef struct CPC_CAN_PARAMS {
-       u8 cc_type;     /* represents the controller type */
-       union {
-               CPC_M16C_BASIC_PARAMS_T m16c_basic;
-               CPC_SJA1000_PARAMS_T sja1000;
-               CPC_PCA82C200_PARAMS_T pca82c200;
-       } cc_params;
-} CPC_CAN_PARAMS_T;
-
-/* CHAN init params representation */
-typedef struct CPC_CHAN_PARAMS {
-       int fd;
-} CPC_CHAN_PARAMS_T;
-
-/* CAN init params message representation */
-typedef struct CPC_INIT_PARAMS {
-       CPC_CHAN_PARAMS_T chanparams;
-       CPC_CAN_PARAMS_T canparams;
-} CPC_INIT_PARAMS_T;
-
-/* structure for confirmed message handling */
-typedef struct CPC_CONFIRM {
-       u8 result; /* error code */
-} CPC_CONFIRM_T;
-
-/* structure for information requests */
-typedef struct CPC_INFO {
-       u8 source;                 /* interface, driver or library */
-       u8 type;                   /* version or serial number */
-       char msg[CPC_MSG_LEN - 2]; /* string holding the requested information */
-} CPC_INFO_T;
-
-/*
- * OVERRUN
- * In general two types of overrun may occur.
- * A hardware overrun, where the CAN controller
- * lost a message, because the interrupt was
- * not handled before the next messgae comes in.
- * Or a software overrun, where i.e. a received
- * message could not be stored in the CPC_MSG
- * buffer.
- */
-
-/* After a software overrun has occurred
- * we wait until we have CPC_OVR_GAP slots
- * free in the CPC_MSG buffer.
- */
-#define CPC_OVR_GAP               10
-
-/*
- * Two types of software overrun may occur.
- * A received CAN message or a CAN state event
- * can cause an overrun.
- * Note: A CPC_CMD which would normally store
- * its result immediately in the CPC_MSG
- * queue may fail, because the message queue is full.
- * This will not generate an overrun message, but
- * will halt command execution, until this command
- * is able to store its message in the message queue.
- */
-#define CPC_OVR_EVENT_CAN       0x01
-#define CPC_OVR_EVENT_CANSTATE  0x02
-#define CPC_OVR_EVENT_BUSERROR  0x04
-
-/*
- * If the CAN controller lost a message
- * we indicate it with the highest bit
- * set in the count field.
- */
-#define CPC_OVR_HW              0x80
-
-/* structure for overrun conditions */
-typedef struct {
-       u8 event;
-       u8 count;
-} CPC_OVERRUN_T;
-
-/*
- * CAN errors
- * Each CAN controller type has different
- * registers to record errors.
- * Therefor a structure containing the specific
- * errors is set up for each controller here
- */
-
-/*
- * SJA1000 error structure
- * see the SJA1000 datasheet for detailed
- * explanation of the registers
- */
-typedef struct CPC_SJA1000_CAN_ERROR {
-       u8 ecc;   /* error capture code register */
-       u8 rxerr; /* RX error counter register */
-       u8 txerr; /* TX error counter register */
-} CPC_SJA1000_CAN_ERROR_T;
-
-/*
- * M16C error structure
- * see the M16C datasheet for detailed
- * explanation of the registers
- */
-typedef struct CPC_M16C_CAN_ERROR {
-       u8 tbd; /* to be defined */
-} CPC_M16C_CAN_ERROR_T;
-
-/* structure for CAN error conditions */
-#define  CPC_CAN_ECODE_ERRFRAME   0x01
-typedef struct CPC_CAN_ERROR {
-       u8 ecode;
-       struct {
-               u8 cc_type; /* CAN controller type */
-               union {
-                       CPC_SJA1000_CAN_ERROR_T sja1000;
-                       CPC_M16C_CAN_ERROR_T m16c;
-               } regs;
-       } cc;
-} CPC_CAN_ERROR_T;
-
-/*
- * Structure containing RX/TX error counter.
- * This structure is used to request the
- * values of the CAN controllers TX and RX
- * error counter.
- */
-typedef struct CPC_CAN_ERR_COUNTER {
-       u8 rx;
-       u8 tx;
-} CPC_CAN_ERR_COUNTER_T;
-
-/* If this flag is set, transmissions from PC to CPC are protected against loss */
-#define CPC_SECURE_TO_CPC      0x01
-
-/* If this flag is set, transmissions from CPC to PC are protected against loss */
-#define CPC_SECURE_TO_PC       0x02
-
-/* If this flag is set, the CAN-transmit buffer is checked to be free before sending a message */
-#define CPC_SECURE_SEND                0x04
-
-/*
- * If this flag is set, the transmission complete flag is checked
- * after sending a message
- * THIS IS CURRENTLY ONLY IMPLEMENTED IN THE PASSIVE INTERFACE DRIVERS
- */
-#define CPC_SECURE_TRANSMIT    0x08
-
-/* main message type used between library and application */
-typedef struct CPC_MSG {
-       u8 type;        /* type of message */
-       u8 length;      /* length of data within union 'msg' */
-       u8 msgid;       /* confirmation handle */
-       u32 ts_sec;     /* timestamp in seconds */
-       u32 ts_nsec;    /* timestamp in nano seconds */
-       union {
-               u8 generic[CPC_MSG_LEN];
-               CPC_CAN_MSG_T canmsg;
-               CPC_CAN_PARAMS_T canparams;
-               CPC_CONFIRM_T confirmation;
-               CPC_INFO_T info;
-               CPC_OVERRUN_T overrun;
-               CPC_CAN_ERROR_T error;
-               CPC_CAN_ERR_COUNTER_T err_counter;
-               u8 busload;
-               u8 canstate;
-       } msg;
-} CPC_MSG_T;
-
-#endif /* CPC_HEADER */
diff --git a/drivers/staging/cpc-usb/cpc_int.h b/drivers/staging/cpc-usb/cpc_int.h

deleted file mode 100644 (file)

index 38674e9..0000000
--- a/drivers/staging/cpc-usb/cpc_int.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * CPCLIB
- *
- * Copyright (C) 2000-2008 EMS Dr. Thomas Wuensche
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- */
-#ifndef CPC_INT_H
-#define CPC_INT_H
-
-#include <linux/wait.h>
-
-#define CPC_MSG_BUF_CNT        1500
-
-#define CPC_PROC_DIR "driver/"
-
-#undef dbg
-#undef err
-#undef info
-
-/* Use our own dbg macro */
-#define dbg(format, arg...) do { if (debug) printk( KERN_INFO format "\n" , ## arg); } while (0)
-#define err(format, arg...) do { printk( KERN_INFO "ERROR " format "\n" , ## arg); } while (0)
-#define info(format, arg...) do { printk( KERN_INFO format "\n" , ## arg); } while (0)
-
-/* Macros help using of our buffers */
-#define IsBufferFull(x)     (!(x)->WnR) && ((x)->iidx == (x)->oidx)
-#define IsBufferEmpty(x)    ((x)->WnR) && ((x)->iidx == (x)->oidx)
-#define IsBufferNotEmpty(x) (!(x)->WnR) || ((x)->iidx != (x)->oidx)
-#define ResetBuffer(x)      do { (x)->oidx = (x)->iidx=0; (x)->WnR = 1; } while(0);
-
-#define CPC_BufWriteAllowed ((chan->oidx != chan->iidx) || chan->WnR)
-
-typedef void (*chan_write_byte_t) (void *chan, unsigned int reg,
-                                  unsigned char val);
-typedef unsigned char (*chan_read_byte_t) (void *chan, unsigned int reg);
-
-typedef struct CPC_CHAN {
-       void __iomem * canBase; /* base address of SJA1000 */
-       chan_read_byte_t read_byte;     /* CAN controller read access routine */
-       chan_write_byte_t write_byte;   /* CAN controller write access routine */
-       CPC_MSG_T *buf;         /* buffer for CPC msg */
-       unsigned int iidx;
-       unsigned int oidx;
-       unsigned int WnR;
-       unsigned int minor;
-       unsigned int locked;
-       unsigned int irqDisabled;
-
-       unsigned char cpcCtrlCANMessage;
-       unsigned char cpcCtrlCANState;
-       unsigned char cpcCtrlBUSState;
-
-       unsigned char controllerType;
-
-       unsigned long ovrTimeSec;
-       unsigned long ovrTimeNSec;
-       unsigned long ovrLockedBuffer;
-       CPC_OVERRUN_T ovr;
-
-       /* for debugging only */
-       unsigned int handledIrqs;
-       unsigned int lostMessages;
-
-       unsigned int sentStdCan;
-       unsigned int sentExtCan;
-       unsigned int sentStdRtr;
-       unsigned int sentExtRtr;
-
-       unsigned int recvStdCan;
-       unsigned int recvExtCan;
-       unsigned int recvStdRtr;
-       unsigned int recvExtRtr;
-
-       wait_queue_head_t *CPCWait_q;
-
-       void *private;
-} CPC_CHAN_T;
-
-#endif
diff --git a/drivers/staging/cpc-usb/cpcusb.h b/drivers/staging/cpc-usb/cpcusb.h

deleted file mode 100644 (file)

index 6bdf30b..0000000
--- a/drivers/staging/cpc-usb/cpcusb.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/* Header for CPC-USB Driver ********************
- * Copyright 1999, 2000, 2001
- *
- * Company:  EMS Dr. Thomas Wuensche
- *           Sonnenhang 3
- *           85304 Ilmmuenster
- *           Phone: +49-8441-490260
- *           Fax:   +49-8441-81860
- *           email: support@ems-wuensche.com
- *           WWW:   www.ems-wuensche.com
- */
-
-#ifndef CPCUSB_H
-#define CPCUSB_H
-
-#undef err
-#undef dbg
-#undef info
-
-/* Use our own dbg macro */
-#define dbg(format, arg...) do { if (debug) printk(KERN_INFO "CPC-USB: " format "\n" , ## arg); } while (0)
-#define info(format, arg...) do { printk(KERN_INFO "CPC-USB: " format "\n" , ## arg); } while (0)
-#define err(format, arg...) do { printk(KERN_INFO "CPC-USB(ERROR): " format "\n" , ## arg); } while (0)
-
-#define CPC_USB_CARD_CNT      4
-
-typedef struct CPC_USB_READ_URB {
-       unsigned char *buffer;  /* the buffer to send data */
-       size_t size;            /* the size of the send buffer */
-       struct urb *urb;        /* the urb used to send data */
-} CPC_USB_READ_URB_T;
-
-typedef struct CPC_USB_WRITE_URB {
-       unsigned char *buffer;  /* the buffer to send data */
-       size_t size;            /* the size of the send buffer */
-       struct urb *urb;        /* the urb used to send data */
-       atomic_t busy;          /* true if write urb is busy */
-       struct completion finished;     /* wait for the write to finish */
-} CPC_USB_WRITE_URB_T;
-
-#define CPC_USB_URB_CNT  10
-
-typedef struct CPC_USB {
-       struct usb_device *udev;        /* save off the usb device pointer */
-       struct usb_interface *interface;        /* the interface for this device */
-       unsigned char minor;    /* the starting minor number for this device */
-       unsigned char num_ports;        /* the number of ports this device has */
-       int num_intr_in;        /* number of interrupt in endpoints we have */
-       int num_bulk_in;        /* number of bulk in endpoints we have */
-       int num_bulk_out;       /* number of bulk out endpoints we have */
-
-       CPC_USB_READ_URB_T urbs[CPC_USB_URB_CNT];
-
-       unsigned char intr_in_buffer[4];        /* interrupt transfer buffer */
-       struct urb *intr_in_urb;        /* interrupt transfer urb */
-
-       CPC_USB_WRITE_URB_T wrUrbs[CPC_USB_URB_CNT];
-
-       int open;               /* if the port is open or not */
-       int present;            /* if the device is not disconnected */
-       struct semaphore sem;   /* locks this structure */
-
-       int free_slots;         /* free send slots of CPC-USB */
-       int idx;
-
-       spinlock_t slock;
-
-       char serialNumber[128]; /* serial number */
-       int productId;          /* product id to differ between M16C and LPC2119 */
-       CPC_CHAN_T *chan;
-} CPC_USB_T;
-
-#define CPCTable               CPCUSB_Table
-
-#define CPC_DRIVER_VERSION "0.724"
-#define CPC_DRIVER_SERIAL  "not applicable"
-
-#define OBUF_SIZE 255          /* 4096 */
-
-/* read timeouts -- RD_NAK_TIMEOUT * RD_EXPIRE = Number of seconds */
-#define RD_NAK_TIMEOUT (10*HZ) /* Default number of X seconds to wait */
-#define RD_EXPIRE 12           /* Number of attempts to wait X seconds */
-
-#define CPC_USB_BASE_MNR 0     /* CPC-USB start at minor 0  */
-
-#endif
diff --git a/drivers/staging/cpc-usb/sja2m16c.h b/drivers/staging/cpc-usb/sja2m16c.h

deleted file mode 100644 (file)

index 654bd3f..0000000
--- a/drivers/staging/cpc-usb/sja2m16c.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _SJA2M16C_H
-#define _SJA2M16C_H
-
-#include "cpc.h"
-
-#define BAUDRATE_TOLERANCE_PERCENT     1
-#define SAMPLEPOINT_TOLERANCE_PERCENT  5
-#define SAMPLEPOINT_UPPER_LIMIT                88
-
-/* M16C parameters */
-struct FIELD_C0CONR {
-       unsigned int brp:4;
-       unsigned int sam:1;
-       unsigned int pr:3;
-       unsigned int dummy:8;
-};
-struct FIELD_C1CONR {
-       unsigned int ph1:3;
-       unsigned int ph2:3;
-       unsigned int sjw:2;
-       unsigned int dummy:8;
-};
-typedef union C0CONR {
-       unsigned char c0con;
-       struct FIELD_C0CONR bc0con;
-} C0CONR_T;
-typedef union C1CONR {
-       unsigned char c1con;
-       struct FIELD_C1CONR bc1con;
-} C1CONR_T;
-
-#define SJA_TSEG1      ((pParams->btr1 & 0x0f)+1)
-#define SJA_TSEG2      (((pParams->btr1 & 0x70)>>4)+1)
-#define SJA_BRP                ((pParams->btr0 & 0x3f)+1)
-#define SJA_SJW                ((pParams->btr0 & 0xc0)>>6)
-#define SJA_SAM                ((pParams->btr1 & 0x80)>>7)
-int baudrate_m16c(int clk, int brp, int pr, int ph1, int ph2);
-int samplepoint_m16c(int brp, int pr, int ph1, int ph2);
-int SJA1000_TO_M16C_BASIC_Params(CPC_MSG_T *pMsg);
-
-#endif
diff --git a/drivers/staging/cpc-usb/sja2m16c_2.c b/drivers/staging/cpc-usb/sja2m16c_2.c

deleted file mode 100644 (file)

index bf0230f..0000000
--- a/drivers/staging/cpc-usb/sja2m16c_2.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/****************************************************************************
-*
-*      Copyright (c) 2003,2004 by EMS Dr. Thomas Wuensche
-*
-*                  - All rights reserved -
-*
-* This code is provided "as is" without warranty of any kind, either
-* expressed or implied, including but not limited to the liability
-* concerning the freedom from material defects, the fitness for parti-
-* cular purposes or the freedom of proprietary rights of third parties.
-*
-*****************************************************************************
-* Module name.: cpcusb
-*****************************************************************************
-* Include file: cpc.h
-*****************************************************************************
-* Project.....: Windows Driver Development Kit
-* Filename....: sja2m16c.cpp
-* Authors.....: (GU) Gerhard Uttenthaler
-*               (CS) Christian Schoett
-*****************************************************************************
-* Short descr.: converts baudrate between SJA1000 and M16C
-*****************************************************************************
-* Description.: handles the baudrate conversion from SJA1000 parameters to
-*               M16C parameters
-*****************************************************************************
-* Address     : EMS Dr. Thomas Wuensche
-*               Sonnenhang 3
-*               D-85304 Ilmmuenster
-*               Tel. : +49-8441-490260
-*               Fax. : +49-8441-81860
-*               email: support@ems-wuensche.com
-*****************************************************************************
-*                            History
-*****************************************************************************
-* Version  Date        Auth Remark
-*
-* 01.00    ??          GU   - initial release
-* 01.10    ??????????  CS   - adapted to fit into the USB Windows driver
-* 02.00    18.08.2004  GU   - improved the baudrate calculating algorithm
-*                           - implemented acceptance filtering
-* 02.10    10.09.2004  CS   - adapted to fit into the USB Windows driver
-*****************************************************************************
-*                            ToDo's
-*****************************************************************************
-*/
-
-/****************************************************************************/
-/*     I N C L U D E S
-*/
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/poll.h>
-#include <linux/smp_lock.h>
-#include <linux/completion.h>
-#include <asm/uaccess.h>
-#include <linux/usb.h>
-
-#include "cpc.h"
-#include "cpc_int.h"
-#include "cpcusb.h"
-
-#include "sja2m16c.h"
-
-/*********************************************************************/
-int baudrate_m16c(int clk, int brp, int pr, int ph1, int ph2)
-{
-       return (16000000 / (1 << clk)) / 2 / (brp + 1) / (1 + pr + 1 +
-                                                           ph1 + 1 + ph2 +
-                                                           1);
-}
-
-
-/*********************************************************************/
-int samplepoint_m16c(int brp, int pr, int ph1, int ph2)
-{
-       return (100 * (1 + pr + 1 + ph1 + 1)) / (1 + pr + 1 + ph1 + 1 +
-                                                 ph2 + 1);
-}
-
-
-/****************************************************************************
-* Function.....: SJA1000_TO_M16C_BASIC_Params
-*
-* Task.........: This routine converts SJA1000 CAN btr parameters into M16C
-*                parameters based on the sample point and the error. In
-*                addition it converts the acceptance filter parameters to
-*                suit the M16C parameters
-*
-* Parameters...: None
-*
-* Return values: None
-*
-* Comments.....:
-*****************************************************************************
-*                History
-*****************************************************************************
-* 19.01.2005  CS   - modifed the conversion of SJA1000 filter params into
-*                    M16C params. Due to compatibility reasons with the
-*                    older 82C200 CAN controller the SJA1000
-****************************************************************************/
-int SJA1000_TO_M16C_BASIC_Params(CPC_MSG_T * in)
-{
-       int sjaBaudrate;
-       int sjaSamplepoint;
-       int *baudrate_error;    // BRP[0..15], PR[0..7], PH1[0..7], PH2[0..7]
-       int *samplepoint_error; // BRP[0..15], PR[0..7], PH1[0..7], PH2[0..7]
-       int baudrate_error_merk;
-       int clk, brp, pr, ph1, ph2;
-       int clk_merk, brp_merk, pr_merk, ph1_merk, ph2_merk;
-       int index;
-       unsigned char acc_code0, acc_code1, acc_code2, acc_code3;
-       unsigned char acc_mask0, acc_mask1, acc_mask2, acc_mask3;
-       CPC_MSG_T * out;
-       C0CONR_T c0con;
-       C1CONR_T c1con;
-       int tmpAccCode;
-       int tmpAccMask;
-
-           // we have to convert the parameters into M16C parameters
-           CPC_SJA1000_PARAMS_T * pParams;
-
-           // check if the type is CAN parameters and if we have to convert the given params
-           if (in->type != CPC_CMD_T_CAN_PRMS
-               || in->msg.canparams.cc_type != SJA1000)
-               return 0;
-       pParams =
-           (CPC_SJA1000_PARAMS_T *) & in->msg.canparams.cc_params.sja1000;
-       acc_code0 = pParams->acc_code0;
-       acc_code1 = pParams->acc_code1;
-       acc_code2 = pParams->acc_code2;
-       acc_code3 = pParams->acc_code3;
-       acc_mask0 = pParams->acc_mask0;
-       acc_mask1 = pParams->acc_mask1;
-       acc_mask2 = pParams->acc_mask2;
-       acc_mask3 = pParams->acc_mask3;
-
-#ifdef _DEBUG_OUTPUT_CAN_PARAMS
-           info("acc_code0: %2.2Xh\n", acc_code0);
-       info("acc_code1: %2.2Xh\n", acc_code1);
-       info("acc_code2: %2.2Xh\n", acc_code2);
-       info("acc_code3: %2.2Xh\n", acc_code3);
-       info("acc_mask0: %2.2Xh\n", acc_mask0);
-       info("acc_mask1: %2.2Xh\n", acc_mask1);
-       info("acc_mask2: %2.2Xh\n", acc_mask2);
-       info("acc_mask3: %2.2Xh\n", acc_mask3);
-
-#endif /*  */
-           if (!
-                (baudrate_error =
-                 (int *) vmalloc(sizeof(int) * 16 * 8 * 8 * 8 * 5))) {
-               err("Could not allocate memory\n");
-               return -3;
-       }
-       if (!
-             (samplepoint_error =
-              (int *) vmalloc(sizeof(int) * 16 * 8 * 8 * 8 * 5))) {
-               err("Could not allocate memory\n");
-               vfree(baudrate_error);
-               return -3;
-       }
-       memset(baudrate_error, 0xff, sizeof(baudrate_error));
-       memset(samplepoint_error, 0xff, sizeof(baudrate_error));
-       sjaBaudrate =
-           16000000 / 2 / SJA_BRP / (1 + SJA_TSEG1 + SJA_TSEG2);
-       sjaSamplepoint =
-           100 * (1 + SJA_TSEG1) / (1 + SJA_TSEG1 + SJA_TSEG2);
-       if (sjaBaudrate == 0) {
-               vfree(baudrate_error);
-               vfree(samplepoint_error);
-               return -2;
-       }
-
-#ifdef _DEBUG_OUTPUT_CAN_PARAMS
-           info("\nStarting SJA CAN params\n");
-       info("-------------------------\n");
-       info("TS1     : %2.2Xh TS2 : %2.2Xh\n", SJA_TSEG1, SJA_TSEG2);
-       info("BTR0    : %2.2Xh BTR1: %2.2Xh\n", pParams->btr0,
-             pParams->btr1);
-       info("Baudrate: %d.%dkBaud\n", sjaBaudrate / 1000,
-             sjaBaudrate % 1000);
-       info("Sample P: 0.%d\n", sjaSamplepoint);
-       info("\n");
-
-#endif /*  */
-           c0con.bc0con.sam = SJA_SAM;
-       c1con.bc1con.sjw = SJA_SJW;
-
-           // calculate errors for all baudrates
-           index = 0;
-       for (clk = 0; clk < 5; clk++) {
-               for (brp = 0; brp < 16; brp++) {
-                       for (pr = 0; pr < 8; pr++) {
-                               for (ph1 = 0; ph1 < 8; ph1++) {
-                                       for (ph2 = 0; ph2 < 8; ph2++) {
-                                               baudrate_error[index] =
-                                                   100 *
-                                                   abs(baudrate_m16c
-                                                       (clk, brp, pr, ph1,
-                                                        ph2) -
-                                                       sjaBaudrate) /
-                                                   sjaBaudrate;
-                                               samplepoint_error[index] =
-                                                   abs(samplepoint_m16c
-                                                       (brp, pr, ph1,
-                                                        ph2) -
-                                                       sjaSamplepoint);
-
-#if 0
-                                                   info
-                                                   ("Baudrate      : %d kBaud\n",
-                                                    baudrate_m16c(clk,
-                                                                  brp, pr,
-                                                                  ph1,
-                                                                  ph2));
-                                               info
-                                                   ("Baudrate Error: %d\n",
-                                                    baudrate_error
-                                                    [index]);
-                                               info
-                                                   ("Sample P Error: %d\n",
-                                                    samplepoint_error
-                                                    [index]);
-                                               info
-                                                   ("clk           : %d\n",
-                                                    clk);
-
-#endif /*  */
-                                                   index++;
-                                       }
-                               }
-                       }
-               }
-       }
-
-           // mark all baudrate_error entries which are outer limits
-           index = 0;
-       for (clk = 0; clk < 5; clk++) {
-               for (brp = 0; brp < 16; brp++) {
-                       for (pr = 0; pr < 8; pr++) {
-                               for (ph1 = 0; ph1 < 8; ph1++) {
-                                       for (ph2 = 0; ph2 < 8; ph2++) {
-                                               if ((baudrate_error[index]
-                                                     >
-                                                     BAUDRATE_TOLERANCE_PERCENT)
-                                                    ||
-                                                    (samplepoint_error
-                                                      [index] >
-                                                      SAMPLEPOINT_TOLERANCE_PERCENT)
-                                                    ||
-                                                    (samplepoint_m16c
-                                                      (brp, pr, ph1,
-                                                       ph2) >
-                                                      SAMPLEPOINT_UPPER_LIMIT))
-                                               {
-                                                       baudrate_error
-                                                           [index] = -1;
-                                               } else
-                                                   if (((1 + pr + 1 +
-                                                         ph1 + 1 + ph2 +
-                                                         1) < 8)
-                                                       ||
-                                                       ((1 + pr + 1 +
-                                                         ph1 + 1 + ph2 +
-                                                         1) > 25)) {
-                                                       baudrate_error
-                                                           [index] = -1;
-                                               }
-
-#if 0
-                                                   else {
-                                                       info
-                                                           ("Baudrate      : %d kBaud\n",
-                                                            baudrate_m16c
-                                                            (clk, brp, pr,
-                                                             ph1, ph2));
-                                                       info
-                                                           ("Baudrate Error: %d\n",
-                                                            baudrate_error
-                                                            [index]);
-                                                       info
-                                                           ("Sample P Error: %d\n",
-                                                            samplepoint_error
-                                                            [index]);
-                                               }
-
-#endif /*  */
-                                                   index++;
-                                       }
-                               }
-                       }
-               }
-       }
-
-           // find list of minimum of baudrate_error within unmarked entries
-           clk_merk = brp_merk = pr_merk = ph1_merk = ph2_merk = 0;
-       baudrate_error_merk = 100;
-       index = 0;
-       for (clk = 0; clk < 5; clk++) {
-               for (brp = 0; brp < 16; brp++) {
-                       for (pr = 0; pr < 8; pr++) {
-                               for (ph1 = 0; ph1 < 8; ph1++) {
-                                       for (ph2 = 0; ph2 < 8; ph2++) {
-                                               if (baudrate_error[index]
-                                                    != -1) {
-                                                       if (baudrate_error
-                                                            [index] <
-                                                            baudrate_error_merk)
-                                                       {
-                                                               baudrate_error_merk
-                                                                   =
-                                                                   baudrate_error
-                                                                   [index];
-                                                               brp_merk =
-                                                                   brp;
-                                                               pr_merk =
-                                                                   pr;
-                                                               ph1_merk =
-                                                                   ph1;
-                                                               ph2_merk =
-                                                                   ph2;
-                                                               clk_merk =
-                                                                   clk;
-
-#if 0
-                                                                   info
-                                                                   ("brp: %2.2Xh pr: %2.2Xh ph1: %2.2Xh ph2: %2.2Xh\n",
-                                                                    brp,
-                                                                    pr,
-                                                                    ph1,
-                                                                    ph2);
-                                                               info
-                                                                   ("Baudrate      : %d kBaud\n",
-                                                                    baudrate_m16c
-                                                                    (clk,
-                                                                     brp,
-                                                                     pr,
-                                                                     ph1,
-                                                                     ph2));
-                                                               info
-                                                                   ("Baudrate Error: %d\n",
-                                                                    baudrate_error
-                                                                    [index]);
-                                                               info
-                                                                   ("Sample P Error: %d\n",
-                                                                    samplepoint_error
-                                                                    [index]);
-
-#endif /*  */
-                                                       }
-                                               }
-                                               index++;
-                                       }
-                               }
-                       }
-               }
-       }
-       if (baudrate_error_merk == 100) {
-               info("ERROR: Could not convert CAN init parameter\n");
-               vfree(baudrate_error);
-               vfree(samplepoint_error);
-               return -1;
-       }
-
-           // setting m16c CAN parameter
-           c0con.bc0con.brp = brp_merk;
-       c0con.bc0con.pr = pr_merk;
-       c1con.bc1con.ph1 = ph1_merk;
-       c1con.bc1con.ph2 = ph2_merk;
-
-#ifdef _DEBUG_OUTPUT_CAN_PARAMS
-           info("\nResulting M16C CAN params\n");
-       info("-------------------------\n");
-       info("clk     : %2.2Xh\n", clk_merk);
-       info("ph1     : %2.2Xh ph2: %2.2Xh\n", c1con.bc1con.ph1 + 1,
-             c1con.bc1con.ph2 + 1);
-       info("pr      : %2.2Xh brp: %2.2Xh\n", c0con.bc0con.pr + 1,
-             c0con.bc0con.brp + 1);
-       info("sjw     : %2.2Xh sam: %2.2Xh\n", c1con.bc1con.sjw,
-             c0con.bc0con.sam);
-       info("co1     : %2.2Xh co0: %2.2Xh\n", c1con.c1con, c0con.c0con);
-       info("Baudrate: %d.%dBaud\n",
-              baudrate_m16c(clk_merk, c0con.bc0con.brp, c0con.bc0con.pr,
-                            c1con.bc1con.ph1, c1con.bc1con.ph2) / 1000,
-              baudrate_m16c(clk_merk, c0con.bc0con.brp, c0con.bc0con.pr,
-                             c1con.bc1con.ph1, c1con.bc1con.ph2) % 1000);
-       info("Sample P: 0.%d\n",
-             samplepoint_m16c(c0con.bc0con.brp, c0con.bc0con.pr,
-                              c1con.bc1con.ph1, c1con.bc1con.ph2));
-       info("\n");
-
-#endif /*  */
-           out = in;
-       out->type = 6;
-       out->length = sizeof(CPC_M16C_BASIC_PARAMS_T) + 1;
-       out->msg.canparams.cc_type = M16C_BASIC;
-       out->msg.canparams.cc_params.m16c_basic.con0 = c0con.c0con;
-       out->msg.canparams.cc_params.m16c_basic.con1 = c1con.c1con;
-       out->msg.canparams.cc_params.m16c_basic.ctlr0 = 0x4C;
-       out->msg.canparams.cc_params.m16c_basic.ctlr1 = 0x00;
-       out->msg.canparams.cc_params.m16c_basic.clk = clk_merk;
-       out->msg.canparams.cc_params.m16c_basic.acc_std_code0 =
-           acc_code0;
-       out->msg.canparams.cc_params.m16c_basic.acc_std_code1 = acc_code1;
-
-//      info("code0: 0x%2.2X, code1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_code0, out->msg.canparams.cc_params.m16c_basic.acc_std_code1);
-           tmpAccCode = (acc_code1 >> 5) + (acc_code0 << 3);
-       out->msg.canparams.cc_params.m16c_basic.acc_std_code0 =
-           (unsigned char) tmpAccCode;
-       out->msg.canparams.cc_params.m16c_basic.acc_std_code1 =
-           (unsigned char) (tmpAccCode >> 8);
-
-//      info("code0: 0x%2.2X, code1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_code0, out->msg.canparams.cc_params.m16c_basic.acc_std_code1);
-           out->msg.canparams.cc_params.m16c_basic.acc_std_mask0 =
-           ~acc_mask0;
-       out->msg.canparams.cc_params.m16c_basic.acc_std_mask1 =
-           ~acc_mask1;
-
-//      info("mask0: 0x%2.2X, mask1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_mask0, out->msg.canparams.cc_params.m16c_basic.acc_std_mask1);
-           tmpAccMask = ((acc_mask1) >> 5) + ((acc_mask0) << 3);
-
-//      info("tmpAccMask: 0x%4.4X\n", tmpAccMask);
-           out->msg.canparams.cc_params.m16c_basic.acc_std_mask0 =
-           (unsigned char) ~tmpAccMask;
-       out->msg.canparams.cc_params.m16c_basic.acc_std_mask1 =
-           (unsigned char) ~(tmpAccMask >> 8);
-
-//      info("mask0: 0x%2.2X, mask1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_mask0, out->msg.canparams.cc_params.m16c_basic.acc_std_mask1);
-           out->msg.canparams.cc_params.m16c_basic.acc_ext_code0 =
-           (unsigned char) tmpAccCode;
-       out->msg.canparams.cc_params.m16c_basic.acc_ext_code1 =
-           (unsigned char) (tmpAccCode >> 8);
-       out->msg.canparams.cc_params.m16c_basic.acc_ext_code2 = acc_code2;
-       out->msg.canparams.cc_params.m16c_basic.acc_ext_code3 = acc_code3;
-       out->msg.canparams.cc_params.m16c_basic.acc_ext_mask0 =
-           (unsigned char) ~tmpAccMask;
-       out->msg.canparams.cc_params.m16c_basic.acc_ext_mask1 =
-           (unsigned char) ~(tmpAccMask >> 8);
-       out->msg.canparams.cc_params.m16c_basic.acc_ext_mask2 =
-           ~acc_mask2;
-       out->msg.canparams.cc_params.m16c_basic.acc_ext_mask3 =
-           ~acc_mask3;
-       vfree(baudrate_error);
-       vfree(samplepoint_error);
-       return 0;
-}
-
-
diff --git a/drivers/staging/go7007/Makefile b/drivers/staging/go7007/Makefile

index d14ea84a01f62f4ce7b82f51b22ec72381af2052..1301caa7495ddcdeb9515b2988d96be3f04e4d5f 100644 (file)
--- a/drivers/staging/go7007/Makefile
+++ b/drivers/staging/go7007/Makefile
@@ -32,8 +32,3 @@ endif
  
  EXTRA_CFLAGS += -Idrivers/media/dvb/frontends
  EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core
-
-# Ubuntu 8.04 has CONFIG_SND undefined, so include lum sound/config.h too
-ifeq ($(CONFIG_SND),)
-EXTRA_CFLAGS += -include sound/config.h
-endif
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig

index ebd7237230e34107398929f6b662a0a04f54fc40..240750881d28d0a8c2d5c3d5a10312c701afe83b 100644 (file)
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -22,7 +22,6 @@ config USB_ARCH_HAS_HCD
         default y if PCMCIA && !M32R                    # sl811_cs
         default y if ARM                                # SL-811
         default y if SUPERH                             # r8a66597-hcd
-       default y if MICROBLAZE
         default PCI
  
  # many non-PCI SOC chips embed OHCI
diff --git a/drivers/usb/gadget/f_loopback.c b/drivers/usb/gadget/f_loopback.c

index eb6ddfc20857741354ca959702dfbdae0f55103f..6cb29d3df575b3757abb639de812d0cd42eb8daf 100644 (file)
--- a/drivers/usb/gadget/f_loopback.c
+++ b/drivers/usb/gadget/f_loopback.c
@@ -22,7 +22,6 @@
  /* #define VERBOSE_DEBUG */
  
  #include <linux/kernel.h>
-#include <linux/utsname.h>
  #include <linux/device.h>
  
  #include "g_zero.h"
diff --git a/drivers/usb/gadget/f_obex.c b/drivers/usb/gadget/f_obex.c

index 46d6266f30ec6f788b0adcf491d162de6f02c80f..b4a3ba654ea53def501a4b7b6befdf83ba78bad1 100644 (file)
--- a/drivers/usb/gadget/f_obex.c
+++ b/drivers/usb/gadget/f_obex.c
@@ -24,7 +24,6 @@
  /* #define VERBOSE_DEBUG */
  
  #include <linux/kernel.h>
-#include <linux/utsname.h>
  #include <linux/device.h>
  
  #include "u_serial.h"
diff --git a/drivers/usb/gadget/f_sourcesink.c b/drivers/usb/gadget/f_sourcesink.c

index bffe91d525f977ba2fa32dec574996e99f67ad72..09cba273d2dbda8c7fd346745410c9717fc87df2 100644 (file)
--- a/drivers/usb/gadget/f_sourcesink.c
+++ b/drivers/usb/gadget/f_sourcesink.c
@@ -22,7 +22,6 @@
  /* #define VERBOSE_DEBUG */
  
  #include <linux/kernel.h>
-#include <linux/utsname.h>
  #include <linux/device.h>
  
  #include "g_zero.h"
diff --git a/drivers/usb/gadget/u_audio.c b/drivers/usb/gadget/u_audio.c

index b5200d551458d9dc6a75fe88c9c1ac014e588d13..8252595d619d1a456e8e4f72db4096b996a1286e 100644 (file)
--- a/drivers/usb/gadget/u_audio.c
+++ b/drivers/usb/gadget/u_audio.c
@@ -10,7 +10,6 @@
   */
  
  #include <linux/kernel.h>
-#include <linux/utsname.h>
  #include <linux/device.h>
  #include <linux/delay.h>
  #include <linux/ctype.h>
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c

index f8751ff863cdfa0711e661812f4ed0bc066aae59..2fc02bd9584882b3cf523864f33dc0f412af1471 100644 (file)
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -23,7 +23,6 @@
  /* #define VERBOSE_DEBUG */
  
  #include <linux/kernel.h>
-#include <linux/utsname.h>
  #include <linux/device.h>
  #include <linux/ctype.h>
  #include <linux/etherdevice.h>
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c

index 68fa0e43b78107fd7b02bac3d6ede9049edcbf01..8c075b2416bb6237402326dcd92d9d00dc454b7c 100644 (file)
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -912,6 +912,7 @@ static void sierra_release(struct usb_serial *serial)
         }
  }
  
+#ifdef CONFIG_PM
  static void stop_read_write_urbs(struct usb_serial *serial)
  {
         int i, j;
@@ -988,6 +989,10 @@ static int sierra_resume(struct usb_serial *serial)
  
         return ec ? -EIO : 0;
  }
+#else
+#define sierra_suspend NULL
+#define sierra_resume NULL
+#endif
  
  static struct usb_serial_driver sierra_device = {
         .driver = {
diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c

index ba3d71f5c7d09a1464a3bfdaf09dc6bf646e80b0..9554ad5f9af799641e6eda438f136886704a061a 100644 (file)
--- a/drivers/vlynq/vlynq.c
+++ b/drivers/vlynq/vlynq.c
@@ -702,7 +702,7 @@ static int vlynq_probe(struct platform_device *pdev)
         dev->mem_start = mem_res->start;
         dev->mem_end = mem_res->end;
  
-       len = regs_res->end - regs_res->start;
+       len = resource_size(regs_res);
         if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) {
                 printk(KERN_ERR "%s: Can't request vlynq registers\n",
                        dev_name(&dev->dev));
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig

index 74e0723e90bcf8f15d6c9c51ae0c5dc0c45073fb..795233702a4ee07e9db56c8939a06691be7ae665 100644 (file)
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -8,3 +8,12 @@ config 9P_FS
           See <http://v9fs.sf.net> for more information.
  
           If unsure, say N.
+
+config 9P_FSCACHE
+       bool "Enable 9P client caching support (EXPERIMENTAL)"
+       depends on EXPERIMENTAL
+       depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y
+       help
+         Choose Y here to enable persistent, read-only local
+         caching support for 9p clients using FS-Cache
+
diff --git a/fs/9p/Makefile b/fs/9p/Makefile

index bc7f0d1551e68f3efacd09795433886f318f239f..1a940ec7af611859aa01b012e866c66fdc8cf9e0 100644 (file)
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -8,5 +8,6 @@ obj-$(CONFIG_9P_FS) := 9p.o
         vfs_dir.o \
         vfs_dentry.o \
         v9fs.o \
-       fid.o \
+       fid.o
  
+9p-$(CONFIG_9P_FSCACHE) += cache.o
diff --git a/fs/9p/cache.c b/fs/9p/cache.c

new file mode 100644 (file)

index 0000000..51c94e2
--- /dev/null
+++ b/fs/9p/cache.c
@@ -0,0 +1,474 @@
+/*
+ * V9FS cache definitions.
+ *
+ *  Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/jiffies.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <net/9p/9p.h>
+
+#include "v9fs.h"
+#include "cache.h"
+
+#define CACHETAG_LEN  11
+
+struct kmem_cache *vcookie_cache;
+
+struct fscache_netfs v9fs_cache_netfs = {
+       .name           = "9p",
+       .version        = 0,
+};
+
+static void init_once(void *foo)
+{
+       struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo;
+       vcookie->fscache = NULL;
+       vcookie->qid = NULL;
+       inode_init_once(&vcookie->inode);
+}
+
+/**
+ * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain
+ *                         vcookie to inode mapping
+ *
+ * Returns 0 on success.
+ */
+
+static int v9fs_init_vcookiecache(void)
+{
+       vcookie_cache = kmem_cache_create("vcookie_cache",
+                                         sizeof(struct v9fs_cookie),
+                                         0, (SLAB_RECLAIM_ACCOUNT|
+                                             SLAB_MEM_SPREAD),
+                                         init_once);
+       if (!vcookie_cache)
+               return -ENOMEM;
+
+       return 0;
+}
+
+/**
+ * v9fs_destroy_vcookiecache - destroy the cache of vcookies
+ *
+ */
+
+static void v9fs_destroy_vcookiecache(void)
+{
+       kmem_cache_destroy(vcookie_cache);
+}
+
+int __v9fs_cache_register(void)
+{
+       int ret;
+       ret = v9fs_init_vcookiecache();
+       if (ret < 0)
+               return ret;
+
+       return fscache_register_netfs(&v9fs_cache_netfs);
+}
+
+void __v9fs_cache_unregister(void)
+{
+       v9fs_destroy_vcookiecache();
+       fscache_unregister_netfs(&v9fs_cache_netfs);
+}
+
+/**
+ * v9fs_random_cachetag - Generate a random tag to be associated
+ *                       with a new cache session.
+ *
+ * The value of jiffies is used for a fairly randomly cache tag.
+ */
+
+static
+int v9fs_random_cachetag(struct v9fs_session_info *v9ses)
+{
+       v9ses->cachetag = kmalloc(CACHETAG_LEN, GFP_KERNEL);
+       if (!v9ses->cachetag)
+               return -ENOMEM;
+
+       return scnprintf(v9ses->cachetag, CACHETAG_LEN, "%lu", jiffies);
+}
+
+static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
+                                          void *buffer, uint16_t bufmax)
+{
+       struct v9fs_session_info *v9ses;
+       uint16_t klen = 0;
+
+       v9ses = (struct v9fs_session_info *)cookie_netfs_data;
+       P9_DPRINTK(P9_DEBUG_FSC, "session %p buf %p size %u", v9ses,
+                  buffer, bufmax);
+
+       if (v9ses->cachetag)
+               klen = strlen(v9ses->cachetag);
+
+       if (klen > bufmax)
+               return 0;
+
+       memcpy(buffer, v9ses->cachetag, klen);
+       P9_DPRINTK(P9_DEBUG_FSC, "cache session tag %s", v9ses->cachetag);
+       return klen;
+}
+
+const struct fscache_cookie_def v9fs_cache_session_index_def = {
+       .name           = "9P.session",
+       .type           = FSCACHE_COOKIE_TYPE_INDEX,
+       .get_key        = v9fs_cache_session_get_key,
+};
+
+void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
+{
+       /* If no cache session tag was specified, we generate a random one. */
+       if (!v9ses->cachetag)
+               v9fs_random_cachetag(v9ses);
+
+       v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index,
+                                               &v9fs_cache_session_index_def,
+                                               v9ses);
+       P9_DPRINTK(P9_DEBUG_FSC, "session %p get cookie %p", v9ses,
+                  v9ses->fscache);
+}
+
+void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
+{
+       P9_DPRINTK(P9_DEBUG_FSC, "session %p put cookie %p", v9ses,
+                  v9ses->fscache);
+       fscache_relinquish_cookie(v9ses->fscache, 0);
+       v9ses->fscache = NULL;
+}
+
+
+static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
+                                        void *buffer, uint16_t bufmax)
+{
+       const struct v9fs_cookie *vcookie = cookie_netfs_data;
+       memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path));
+
+       P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode,
+                  vcookie->qid->path);
+       return sizeof(vcookie->qid->path);
+}
+
+static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
+                                     uint64_t *size)
+{
+       const struct v9fs_cookie *vcookie = cookie_netfs_data;
+       *size = i_size_read(&vcookie->inode);
+
+       P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode,
+                  *size);
+}
+
+static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
+                                        void *buffer, uint16_t buflen)
+{
+       const struct v9fs_cookie *vcookie = cookie_netfs_data;
+       memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version));
+
+       P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode,
+                  vcookie->qid->version);
+       return sizeof(vcookie->qid->version);
+}
+
+static enum
+fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
+                                           const void *buffer,
+                                           uint16_t buflen)
+{
+       const struct v9fs_cookie *vcookie = cookie_netfs_data;
+
+       if (buflen != sizeof(vcookie->qid->version))
+               return FSCACHE_CHECKAUX_OBSOLETE;
+
+       if (memcmp(buffer, &vcookie->qid->version,
+                  sizeof(vcookie->qid->version)))
+               return FSCACHE_CHECKAUX_OBSOLETE;
+
+       return FSCACHE_CHECKAUX_OKAY;
+}
+
+static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
+{
+       struct v9fs_cookie *vcookie = cookie_netfs_data;
+       struct pagevec pvec;
+       pgoff_t first;
+       int loop, nr_pages;
+
+       pagevec_init(&pvec, 0);
+       first = 0;
+
+       for (;;) {
+               nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping,
+                                         first,
+                                         PAGEVEC_SIZE - pagevec_count(&pvec));
+               if (!nr_pages)
+                       break;
+
+               for (loop = 0; loop < nr_pages; loop++)
+                       ClearPageFsCache(pvec.pages[loop]);
+
+               first = pvec.pages[nr_pages - 1]->index + 1;
+
+               pvec.nr = nr_pages;
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+}
+
+const struct fscache_cookie_def v9fs_cache_inode_index_def = {
+       .name           = "9p.inode",
+       .type           = FSCACHE_COOKIE_TYPE_DATAFILE,
+       .get_key        = v9fs_cache_inode_get_key,
+       .get_attr       = v9fs_cache_inode_get_attr,
+       .get_aux        = v9fs_cache_inode_get_aux,
+       .check_aux      = v9fs_cache_inode_check_aux,
+       .now_uncached   = v9fs_cache_inode_now_uncached,
+};
+
+void v9fs_cache_inode_get_cookie(struct inode *inode)
+{
+       struct v9fs_cookie *vcookie;
+       struct v9fs_session_info *v9ses;
+
+       if (!S_ISREG(inode->i_mode))
+               return;
+
+       vcookie = v9fs_inode2cookie(inode);
+       if (vcookie->fscache)
+               return;
+
+       v9ses = v9fs_inode2v9ses(inode);
+       vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+                                                 &v9fs_cache_inode_index_def,
+                                                 vcookie);
+
+       P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
+                  vcookie->fscache);
+}
+
+void v9fs_cache_inode_put_cookie(struct inode *inode)
+{
+       struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+       if (!vcookie->fscache)
+               return;
+       P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
+                  vcookie->fscache);
+
+       fscache_relinquish_cookie(vcookie->fscache, 0);
+       vcookie->fscache = NULL;
+}
+
+void v9fs_cache_inode_flush_cookie(struct inode *inode)
+{
+       struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+       if (!vcookie->fscache)
+               return;
+       P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
+                  vcookie->fscache);
+
+       fscache_relinquish_cookie(vcookie->fscache, 1);
+       vcookie->fscache = NULL;
+}
+
+void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
+{
+       struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+       struct p9_fid *fid;
+
+       if (!vcookie->fscache)
+               return;
+
+       spin_lock(&vcookie->lock);
+       fid = filp->private_data;
+       if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+               v9fs_cache_inode_flush_cookie(inode);
+       else
+               v9fs_cache_inode_get_cookie(inode);
+
+       spin_unlock(&vcookie->lock);
+}
+
+void v9fs_cache_inode_reset_cookie(struct inode *inode)
+{
+       struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+       struct v9fs_session_info *v9ses;
+       struct fscache_cookie *old;
+
+       if (!vcookie->fscache)
+               return;
+
+       old = vcookie->fscache;
+
+       spin_lock(&vcookie->lock);
+       fscache_relinquish_cookie(vcookie->fscache, 1);
+
+       v9ses = v9fs_inode2v9ses(inode);
+       vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+                                                 &v9fs_cache_inode_index_def,
+                                                 vcookie);
+
+       P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
+                  inode, old, vcookie->fscache);
+
+       spin_unlock(&vcookie->lock);
+}
+
+int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+       struct inode *inode = page->mapping->host;
+       struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+       BUG_ON(!vcookie->fscache);
+
+       if (PageFsCache(page)) {
+               if (fscache_check_page_write(vcookie->fscache, page)) {
+                       if (!(gfp & __GFP_WAIT))
+                               return 0;
+                       fscache_wait_on_page_write(vcookie->fscache, page);
+               }
+
+               fscache_uncache_page(vcookie->fscache, page);
+               ClearPageFsCache(page);
+       }
+
+       return 1;
+}
+
+void __v9fs_fscache_invalidate_page(struct page *page)
+{
+       struct inode *inode = page->mapping->host;
+       struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+       BUG_ON(!vcookie->fscache);
+
+       if (PageFsCache(page)) {
+               fscache_wait_on_page_write(vcookie->fscache, page);
+               BUG_ON(!PageLocked(page));
+               fscache_uncache_page(vcookie->fscache, page);
+               ClearPageFsCache(page);
+       }
+}
+
+static void v9fs_vfs_readpage_complete(struct page *page, void *data,
+                                      int error)
+{
+       if (!error)
+               SetPageUptodate(page);
+
+       unlock_page(page);
+}
+
+/**
+ * __v9fs_readpage_from_fscache - read a page from cache
+ *
+ * Returns 0 if the pages are in cache and a BIO is submitted,
+ * 1 if the pages are not in cache and -error otherwise.
+ */
+
+int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+       int ret;
+       const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+       P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+       if (!vcookie->fscache)
+               return -ENOBUFS;
+
+       ret = fscache_read_or_alloc_page(vcookie->fscache,
+                                        page,
+                                        v9fs_vfs_readpage_complete,
+                                        NULL,
+                                        GFP_KERNEL);
+       switch (ret) {
+       case -ENOBUFS:
+       case -ENODATA:
+               P9_DPRINTK(P9_DEBUG_FSC, "page/inode not in cache %d", ret);
+               return 1;
+       case 0:
+               P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted");
+               return ret;
+       default:
+               P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret);
+               return ret;
+       }
+}
+
+/**
+ * __v9fs_readpages_from_fscache - read multiple pages from cache
+ *
+ * Returns 0 if the pages are in cache and a BIO is submitted,
+ * 1 if the pages are not in cache and -error otherwise.
+ */
+
+int __v9fs_readpages_from_fscache(struct inode *inode,
+                                 struct address_space *mapping,
+                                 struct list_head *pages,
+                                 unsigned *nr_pages)
+{
+       int ret;
+       const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+       P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
+       if (!vcookie->fscache)
+               return -ENOBUFS;
+
+       ret = fscache_read_or_alloc_pages(vcookie->fscache,
+                                         mapping, pages, nr_pages,
+                                         v9fs_vfs_readpage_complete,
+                                         NULL,
+                                         mapping_gfp_mask(mapping));
+       switch (ret) {
+       case -ENOBUFS:
+       case -ENODATA:
+               P9_DPRINTK(P9_DEBUG_FSC, "pages/inodes not in cache %d", ret);
+               return 1;
+       case 0:
+               BUG_ON(!list_empty(pages));
+               BUG_ON(*nr_pages != 0);
+               P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted");
+               return ret;
+       default:
+               P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret);
+               return ret;
+       }
+}
+
+/**
+ * __v9fs_readpage_to_fscache - write a page to the cache
+ *
+ */
+
+void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
+{
+       int ret;
+       const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+       P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+       ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL);
+       P9_DPRINTK(P9_DEBUG_FSC, "ret =  %d", ret);
+       if (ret != 0)
+               v9fs_uncache_page(inode, page);
+}
diff --git a/fs/9p/cache.h b/fs/9p/cache.h

new file mode 100644 (file)

index 0000000..a94192b
--- /dev/null
+++ b/fs/9p/cache.h
@@ -0,0 +1,176 @@
+/*
+ * V9FS cache definitions.
+ *
+ *  Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#ifndef _9P_CACHE_H
+#ifdef CONFIG_9P_FSCACHE
+#include <linux/fscache.h>
+#include <linux/spinlock.h>
+
+extern struct kmem_cache *vcookie_cache;
+
+struct v9fs_cookie {
+       spinlock_t lock;
+       struct inode inode;
+       struct fscache_cookie *fscache;
+       struct p9_qid *qid;
+};
+
+static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode)
+{
+       return container_of(inode, struct v9fs_cookie, inode);
+}
+
+extern struct fscache_netfs v9fs_cache_netfs;
+extern const struct fscache_cookie_def v9fs_cache_session_index_def;
+extern const struct fscache_cookie_def v9fs_cache_inode_index_def;
+
+extern void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses);
+extern void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses);
+
+extern void v9fs_cache_inode_get_cookie(struct inode *inode);
+extern void v9fs_cache_inode_put_cookie(struct inode *inode);
+extern void v9fs_cache_inode_flush_cookie(struct inode *inode);
+extern void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp);
+extern void v9fs_cache_inode_reset_cookie(struct inode *inode);
+
+extern int __v9fs_cache_register(void);
+extern void __v9fs_cache_unregister(void);
+
+extern int __v9fs_fscache_release_page(struct page *page, gfp_t gfp);
+extern void __v9fs_fscache_invalidate_page(struct page *page);
+extern int __v9fs_readpage_from_fscache(struct inode *inode,
+                                       struct page *page);
+extern int __v9fs_readpages_from_fscache(struct inode *inode,
+                                        struct address_space *mapping,
+                                        struct list_head *pages,
+                                        unsigned *nr_pages);
+extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page);
+
+
+/**
+ * v9fs_cache_register - Register v9fs file system with the cache
+ */
+static inline int v9fs_cache_register(void)
+{
+       return __v9fs_cache_register();
+}
+
+/**
+ * v9fs_cache_unregister - Unregister v9fs from the cache
+ */
+static inline void v9fs_cache_unregister(void)
+{
+       __v9fs_cache_unregister();
+}
+
+static inline int v9fs_fscache_release_page(struct page *page,
+                                           gfp_t gfp)
+{
+       return __v9fs_fscache_release_page(page, gfp);
+}
+
+static inline void v9fs_fscache_invalidate_page(struct page *page)
+{
+       __v9fs_fscache_invalidate_page(page);
+}
+
+static inline int v9fs_readpage_from_fscache(struct inode *inode,
+                                            struct page *page)
+{
+       return __v9fs_readpage_from_fscache(inode, page);
+}
+
+static inline int v9fs_readpages_from_fscache(struct inode *inode,
+                                             struct address_space *mapping,
+                                             struct list_head *pages,
+                                             unsigned *nr_pages)
+{
+       return __v9fs_readpages_from_fscache(inode, mapping, pages,
+                                            nr_pages);
+}
+
+static inline void v9fs_readpage_to_fscache(struct inode *inode,
+                                           struct page *page)
+{
+       if (PageFsCache(page))
+               __v9fs_readpage_to_fscache(inode, page);
+}
+
+static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
+{
+       struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+       fscache_uncache_page(vcookie->fscache, page);
+       BUG_ON(PageFsCache(page));
+}
+
+static inline void v9fs_vcookie_set_qid(struct inode *inode,
+                                       struct p9_qid *qid)
+{
+       struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+       spin_lock(&vcookie->lock);
+       vcookie->qid = qid;
+       spin_unlock(&vcookie->lock);
+}
+
+#else /* CONFIG_9P_FSCACHE */
+
+static inline int v9fs_cache_register(void)
+{
+       return 1;
+}
+
+static inline void v9fs_cache_unregister(void) {}
+
+static inline int v9fs_fscache_release_page(struct page *page,
+                                           gfp_t gfp) {
+       return 1;
+}
+
+static inline void v9fs_fscache_invalidate_page(struct page *page) {}
+
+static inline int v9fs_readpage_from_fscache(struct inode *inode,
+                                            struct page *page)
+{
+       return -ENOBUFS;
+}
+
+static inline int v9fs_readpages_from_fscache(struct inode *inode,
+                                             struct address_space *mapping,
+                                             struct list_head *pages,
+                                             unsigned *nr_pages)
+{
+       return -ENOBUFS;
+}
+
+static inline void v9fs_readpage_to_fscache(struct inode *inode,
+                                           struct page *page)
+{}
+
+static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
+{}
+
+static inline void v9fs_vcookie_set_qid(struct inode *inode,
+                                       struct p9_qid *qid)
+{}
+
+#endif /* CONFIG_9P_FSCACHE */
+#endif /* _9P_CACHE_H */
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c

index f7003cfac63d3307b7797f4637985872a28f9a03..cf62b05e296a67af103178e0b86e673cb1d16031 100644 (file)
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -34,21 +34,25 @@
  #include <net/9p/transport.h>
  #include "v9fs.h"
  #include "v9fs_vfs.h"
+#include "cache.h"
+
+static DEFINE_SPINLOCK(v9fs_sessionlist_lock);
+static LIST_HEAD(v9fs_sessionlist);
  
  /*
-  * Option Parsing (code inspired by NFS code)
-  *  NOTE: each transport will parse its own options
-  */
+ * Option Parsing (code inspired by NFS code)
+ *  NOTE: each transport will parse its own options
+ */
  
  enum {
         /* Options that take integer arguments */
         Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid,
         /* String options */
-       Opt_uname, Opt_remotename, Opt_trans,
+       Opt_uname, Opt_remotename, Opt_trans, Opt_cache, Opt_cachetag,
         /* Options that take no arguments */
         Opt_nodevmap,
         /* Cache options */
-       Opt_cache_loose,
+       Opt_cache_loose, Opt_fscache,
         /* Access options */
         Opt_access,
         /* Error token */
@@ -63,8 +67,10 @@ static const match_table_t tokens = {
         {Opt_uname, "uname=%s"},
         {Opt_remotename, "aname=%s"},
         {Opt_nodevmap, "nodevmap"},
-       {Opt_cache_loose, "cache=loose"},
+       {Opt_cache, "cache=%s"},
         {Opt_cache_loose, "loose"},
+       {Opt_fscache, "fscache"},
+       {Opt_cachetag, "cachetag=%s"},
         {Opt_access, "access=%s"},
         {Opt_err, NULL}
  };
@@ -89,16 +95,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
         v9ses->afid = ~0;
         v9ses->debug = 0;
         v9ses->cache = 0;
+#ifdef CONFIG_9P_FSCACHE
+       v9ses->cachetag = NULL;
+#endif
  
         if (!opts)
                 return 0;
  
         options = kstrdup(opts, GFP_KERNEL);
-       if (!options) {
-               P9_DPRINTK(P9_DEBUG_ERROR,
-                          "failed to allocate copy of option string\n");
-               return -ENOMEM;
-       }
+       if (!options)
+               goto fail_option_alloc;
  
         while ((p = strsep(&options, ",")) != NULL) {
                 int token;
@@ -143,16 +149,33 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
                 case Opt_cache_loose:
                         v9ses->cache = CACHE_LOOSE;
                         break;
+               case Opt_fscache:
+                       v9ses->cache = CACHE_FSCACHE;
+                       break;
+               case Opt_cachetag:
+#ifdef CONFIG_9P_FSCACHE
+                       v9ses->cachetag = match_strdup(&args[0]);
+#endif
+                       break;
+               case Opt_cache:
+                       s = match_strdup(&args[0]);
+                       if (!s)
+                               goto fail_option_alloc;
+
+                       if (strcmp(s, "loose") == 0)
+                               v9ses->cache = CACHE_LOOSE;
+                       else if (strcmp(s, "fscache") == 0)
+                               v9ses->cache = CACHE_FSCACHE;
+                       else
+                               v9ses->cache = CACHE_NONE;
+                       kfree(s);
+                       break;
  
                 case Opt_access:
                         s = match_strdup(&args[0]);
-                       if (!s) {
-                               P9_DPRINTK(P9_DEBUG_ERROR,
-                                          "failed to allocate copy"
-                                          " of option argument\n");
-                               ret = -ENOMEM;
-                               break;
-                       }
+                       if (!s)
+                               goto fail_option_alloc;
+
                         v9ses->flags &= ~V9FS_ACCESS_MASK;
                         if (strcmp(s, "user") == 0)
                                 v9ses->flags |= V9FS_ACCESS_USER;
@@ -173,6 +196,11 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
         }
         kfree(options);
         return ret;
+
+fail_option_alloc:
+       P9_DPRINTK(P9_DEBUG_ERROR,
+                  "failed to allocate copy of option argument\n");
+       return -ENOMEM;
  }
  
  /**
@@ -200,6 +228,10 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
                 return ERR_PTR(-ENOMEM);
         }
  
+       spin_lock(&v9fs_sessionlist_lock);
+       list_add(&v9ses->slist, &v9fs_sessionlist);
+       spin_unlock(&v9fs_sessionlist_lock);
+
         v9ses->flags = V9FS_EXTENDED | V9FS_ACCESS_USER;
         strcpy(v9ses->uname, V9FS_DEFUSER);
         strcpy(v9ses->aname, V9FS_DEFANAME);
@@ -249,6 +281,11 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
         else
                 fid->uid = ~0;
  
+#ifdef CONFIG_9P_FSCACHE
+       /* register the session for caching */
+       v9fs_cache_session_get_cookie(v9ses);
+#endif
+
         return fid;
  
  error:
@@ -268,8 +305,18 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
                 v9ses->clnt = NULL;
         }
  
+#ifdef CONFIG_9P_FSCACHE
+       if (v9ses->fscache) {
+               v9fs_cache_session_put_cookie(v9ses);
+               kfree(v9ses->cachetag);
+       }
+#endif
         __putname(v9ses->uname);
         __putname(v9ses->aname);
+
+       spin_lock(&v9fs_sessionlist_lock);
+       list_del(&v9ses->slist);
+       spin_unlock(&v9fs_sessionlist_lock);
  }
  
  /**
@@ -286,25 +333,132 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
  
  extern int v9fs_error_init(void);
  
+static struct kobject *v9fs_kobj;
+
+#ifdef CONFIG_9P_FSCACHE
  /**
- * v9fs_init - Initialize module
+ * caches_show - list caches associated with a session
+ *
+ * Returns the size of buffer written.
+ */
+
+static ssize_t caches_show(struct kobject *kobj,
+                          struct kobj_attribute *attr,
+                          char *buf)
+{
+       ssize_t n = 0, count = 0, limit = PAGE_SIZE;
+       struct v9fs_session_info *v9ses;
+
+       spin_lock(&v9fs_sessionlist_lock);
+       list_for_each_entry(v9ses, &v9fs_sessionlist, slist) {
+               if (v9ses->cachetag) {
+                       n = snprintf(buf, limit, "%s\n", v9ses->cachetag);
+                       if (n < 0) {
+                               count = n;
+                               break;
+                       }
+
+                       count += n;
+                       limit -= n;
+               }
+       }
+
+       spin_unlock(&v9fs_sessionlist_lock);
+       return count;
+}
+
+static struct kobj_attribute v9fs_attr_cache = __ATTR_RO(caches);
+#endif /* CONFIG_9P_FSCACHE */
+
+static struct attribute *v9fs_attrs[] = {
+#ifdef CONFIG_9P_FSCACHE
+       &v9fs_attr_cache.attr,
+#endif
+       NULL,
+};
+
+static struct attribute_group v9fs_attr_group = {
+       .attrs = v9fs_attrs,
+};
+
+/**
+ * v9fs_sysfs_init - Initialize the v9fs sysfs interface
+ *
+ */
+
+static int v9fs_sysfs_init(void)
+{
+       v9fs_kobj = kobject_create_and_add("9p", fs_kobj);
+       if (!v9fs_kobj)
+               return -ENOMEM;
+
+       if (sysfs_create_group(v9fs_kobj, &v9fs_attr_group)) {
+               kobject_put(v9fs_kobj);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+/**
+ * v9fs_sysfs_cleanup - Unregister the v9fs sysfs interface
+ *
+ */
+
+static void v9fs_sysfs_cleanup(void)
+{
+       sysfs_remove_group(v9fs_kobj, &v9fs_attr_group);
+       kobject_put(v9fs_kobj);
+}
+
+/**
+ * init_v9fs - Initialize module
   *
   */
  
  static int __init init_v9fs(void)
  {
+       int err;
         printk(KERN_INFO "Installing v9fs 9p2000 file system support\n");
         /* TODO: Setup list of registered trasnport modules */
-       return register_filesystem(&v9fs_fs_type);
+       err = register_filesystem(&v9fs_fs_type);
+       if (err < 0) {
+               printk(KERN_ERR "Failed to register filesystem\n");
+               return err;
+       }
+
+       err = v9fs_cache_register();
+       if (err < 0) {
+               printk(KERN_ERR "Failed to register v9fs for caching\n");
+               goto out_fs_unreg;
+       }
+
+       err = v9fs_sysfs_init();
+       if (err < 0) {
+               printk(KERN_ERR "Failed to register with sysfs\n");
+               goto out_sysfs_cleanup;
+       }
+
+       return 0;
+
+out_sysfs_cleanup:
+       v9fs_sysfs_cleanup();
+
+out_fs_unreg:
+       unregister_filesystem(&v9fs_fs_type);
+
+       return err;
  }
  
  /**
- * v9fs_init - shutdown module
+ * exit_v9fs - shutdown module
   *
   */
  
  static void __exit exit_v9fs(void)
  {
+       v9fs_sysfs_cleanup();
+       v9fs_cache_unregister();
         unregister_filesystem(&v9fs_fs_type);
  }
  
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h

index 38762bf102a9b04d124400ddfbb0b59daa007005..019f4ccb70c1a14d302d8d8e945cdbd98455bb43 100644 (file)
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -51,6 +51,7 @@ enum p9_session_flags {
  enum p9_cache_modes {
         CACHE_NONE,
         CACHE_LOOSE,
+       CACHE_FSCACHE,
  };
  
  /**
@@ -60,6 +61,8 @@ enum p9_cache_modes {
   * @debug: debug level
   * @afid: authentication handle
   * @cache: cache mode of type &p9_cache_modes
+ * @cachetag: the tag of the cache associated with this session
+ * @fscache: session cookie associated with FS-Cache
   * @options: copy of options string given by user
   * @uname: string user name to mount hierarchy as
   * @aname: mount specifier for remote hierarchy
@@ -68,7 +71,7 @@ enum p9_cache_modes {
   * @dfltgid: default numeric groupid to mount hierarchy as
   * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy
   * @clnt: reference to 9P network client instantiated for this session
- * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug
+ * @slist: reference to list of registered 9p sessions
   *
   * This structure holds state for each session instance established during
   * a sys_mount() .
@@ -84,6 +87,10 @@ struct v9fs_session_info {
         unsigned short debug;
         unsigned int afid;
         unsigned int cache;
+#ifdef CONFIG_9P_FSCACHE
+       char *cachetag;
+       struct fscache_cookie *fscache;
+#endif
  
         char *uname;            /* user name to mount as */
         char *aname;            /* name of remote hierarchy being mounted */
@@ -92,11 +99,9 @@ struct v9fs_session_info {
         unsigned int dfltgid;   /* default gid for legacy support */
         u32 uid;                /* if ACCESS_SINGLE, the uid that has access */
         struct p9_client *clnt; /* 9p client */
-       struct dentry *debugfs_dir;
+       struct list_head slist; /* list of sessions registered with v9fs */
  };
  
-extern struct dentry *v9fs_debugfs_root;
-
  struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
                                                                         char *);
  void v9fs_session_close(struct v9fs_session_info *v9ses);
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h

index f0c7de78e205b1063fa9d552617c7d17a4123997..3a7560e358657f059897e6f27aaa3ce4a20c0953 100644 (file)
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -44,7 +44,13 @@ extern const struct file_operations v9fs_dir_operations;
  extern const struct dentry_operations v9fs_dentry_operations;
  extern const struct dentry_operations v9fs_cached_dentry_operations;
  
+#ifdef CONFIG_9P_FSCACHE
+struct inode *v9fs_alloc_inode(struct super_block *sb);
+void v9fs_destroy_inode(struct inode *inode);
+#endif
+
  struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+void v9fs_clear_inode(struct inode *inode);
  ino_t v9fs_qid2ino(struct p9_qid *qid);
  void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
  int v9fs_dir_release(struct inode *inode, struct file *filp);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c

index 92828281a30b9a0a449255003d6bf4468fab0512..90e38449f4b3a7dd0ab5c642e63bc504eb5adaef 100644 (file)
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -38,6 +38,7 @@
  
  #include "v9fs.h"
  #include "v9fs_vfs.h"
+#include "cache.h"
  
  /**
   * v9fs_vfs_readpage - read an entire page in from 9P
@@ -52,18 +53,31 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
         int retval;
         loff_t offset;
         char *buffer;
+       struct inode *inode;
  
+       inode = page->mapping->host;
         P9_DPRINTK(P9_DEBUG_VFS, "\n");
+
+       BUG_ON(!PageLocked(page));
+
+       retval = v9fs_readpage_from_fscache(inode, page);
+       if (retval == 0)
+               return retval;
+
         buffer = kmap(page);
         offset = page_offset(page);
  
         retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset);
-       if (retval < 0)
+       if (retval < 0) {
+               v9fs_uncache_page(inode, page);
                 goto done;
+       }
  
         memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval);
         flush_dcache_page(page);
         SetPageUptodate(page);
+
+       v9fs_readpage_to_fscache(inode, page);
         retval = 0;
  
  done:
@@ -72,6 +86,78 @@ done:
         return retval;
  }
  
+/**
+ * v9fs_vfs_readpages - read a set of pages from 9P
+ *
+ * @filp: file being read
+ * @mapping: the address space
+ * @pages: list of pages to read
+ * @nr_pages: count of pages to read
+ *
+ */
+
+static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping,
+                            struct list_head *pages, unsigned nr_pages)
+{
+       int ret = 0;
+       struct inode *inode;
+
+       inode = mapping->host;
+       P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp);
+
+       ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages);
+       if (ret == 0)
+               return ret;
+
+       ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp);
+       P9_DPRINTK(P9_DEBUG_VFS, "  = %d\n", ret);
+       return ret;
+}
+
+/**
+ * v9fs_release_page - release the private state associated with a page
+ *
+ * Returns 1 if the page can be released, false otherwise.
+ */
+
+static int v9fs_release_page(struct page *page, gfp_t gfp)
+{
+       if (PagePrivate(page))
+               return 0;
+
+       return v9fs_fscache_release_page(page, gfp);
+}
+
+/**
+ * v9fs_invalidate_page - Invalidate a page completely or partially
+ *
+ * @page: structure to page
+ * @offset: offset in the page
+ */
+
+static void v9fs_invalidate_page(struct page *page, unsigned long offset)
+{
+       if (offset == 0)
+               v9fs_fscache_invalidate_page(page);
+}
+
+/**
+ * v9fs_launder_page - Writeback a dirty page
+ * Since the writes go directly to the server, we simply return a 0
+ * here to indicate success.
+ *
+ * Returns 0 on success.
+ */
+
+static int v9fs_launder_page(struct page *page)
+{
+       return 0;
+}
+
  const struct address_space_operations v9fs_addr_operations = {
        .readpage = v9fs_vfs_readpage,
+      .readpages = v9fs_vfs_readpages,
+      .releasepage = v9fs_release_page,
+      .invalidatepage = v9fs_invalidate_page,
+      .launder_page = v9fs_launder_page,
  };
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c

index 68bf2af6c389568fcbbca52ed3c17a68934cc057..3902bf43a0883bfe4b92d384170362e53c910e0d 100644 (file)
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -32,6 +32,7 @@
  #include <linux/string.h>
  #include <linux/inet.h>
  #include <linux/list.h>
+#include <linux/pagemap.h>
  #include <asm/uaccess.h>
  #include <linux/idr.h>
  #include <net/9p/9p.h>
@@ -40,6 +41,7 @@
  #include "v9fs.h"
  #include "v9fs_vfs.h"
  #include "fid.h"
+#include "cache.h"
  
  static const struct file_operations v9fs_cached_file_operations;
  
@@ -72,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
                         return err;
                 }
                 if (omode & P9_OTRUNC) {
-                       inode->i_size = 0;
+                       i_size_write(inode, 0);
                         inode->i_blocks = 0;
                 }
                 if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses)))
@@ -85,6 +87,10 @@ int v9fs_file_open(struct inode *inode, struct file *file)
                 /* enable cached file options */
                 if(file->f_op == &v9fs_file_operations)
                         file->f_op = &v9fs_cached_file_operations;
+
+#ifdef CONFIG_9P_FSCACHE
+               v9fs_cache_inode_set_cookie(inode, file);
+#endif
         }
  
         return 0;
@@ -210,6 +216,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
         struct p9_client *clnt;
         struct inode *inode = filp->f_path.dentry->d_inode;
         int origin = *offset;
+       unsigned long pg_start, pg_end;
  
         P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
                 (int)count, (int)*offset);
@@ -225,7 +232,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
                 if (count < rsize)
                         rsize = count;
  
-               n = p9_client_write(fid, NULL, data+total, *offset+total,
+               n = p9_client_write(fid, NULL, data+total, origin+total,
                                                                         rsize);
                 if (n <= 0)
                         break;
@@ -234,14 +241,14 @@ v9fs_file_write(struct file *filp, const char __user * data,
         } while (count > 0);
  
         if (total > 0) {
-               invalidate_inode_pages2_range(inode->i_mapping, origin,
-                                                               origin+total);
+               pg_start = origin >> PAGE_CACHE_SHIFT;
+               pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
+               if (inode->i_mapping && inode->i_mapping->nrpages)
+                       invalidate_inode_pages2_range(inode->i_mapping,
+                                                     pg_start, pg_end);
                 *offset += total;
-       }
-
-       if (*offset > inode->i_size) {
-               inode->i_size = *offset;
-               inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
+               i_size_write(inode, i_size_read(inode) + total);
+               inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
         }
  
         if (n < 0)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c

index 06a223d50a8165d908e738ff293f572a896c1bca..5947628aefef142f99edb0bcc67f9bc625d125aa 100644 (file)
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -40,6 +40,7 @@
  #include "v9fs.h"
  #include "v9fs_vfs.h"
  #include "fid.h"
+#include "cache.h"
  
  static const struct inode_operations v9fs_dir_inode_operations;
  static const struct inode_operations v9fs_dir_inode_operations_ext;
@@ -197,6 +198,39 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
         wstat->extension = NULL;
  }
  
+#ifdef CONFIG_9P_FSCACHE
+/**
+ * v9fs_alloc_inode - helper function to allocate an inode
+ * This callback is executed before setting up the inode so that we
+ * can associate a vcookie with each inode.
+ *
+ */
+
+struct inode *v9fs_alloc_inode(struct super_block *sb)
+{
+       struct v9fs_cookie *vcookie;
+       vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache,
+                                                        GFP_KERNEL);
+       if (!vcookie)
+               return NULL;
+
+       vcookie->fscache = NULL;
+       vcookie->qid = NULL;
+       spin_lock_init(&vcookie->lock);
+       return &vcookie->inode;
+}
+
+/**
+ * v9fs_destroy_inode - destroy an inode
+ *
+ */
+
+void v9fs_destroy_inode(struct inode *inode)
+{
+       kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
+}
+#endif
+
  /**
   * v9fs_get_inode - helper function to setup an inode
   * @sb: superblock
@@ -326,6 +360,21 @@ error:
  }
  */
  
+
+/**
+ * v9fs_clear_inode - release an inode
+ * @inode: inode to release
+ *
+ */
+void v9fs_clear_inode(struct inode *inode)
+{
+       filemap_fdatawrite(inode->i_mapping);
+
+#ifdef CONFIG_9P_FSCACHE
+       v9fs_cache_inode_put_cookie(inode);
+#endif
+}
+
  /**
   * v9fs_inode_from_fid - populate an inode by issuing a attribute request
   * @v9ses: session information
@@ -356,8 +405,14 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
  
         v9fs_stat2inode(st, ret, sb);
         ret->i_ino = v9fs_qid2ino(&st->qid);
+
+#ifdef CONFIG_9P_FSCACHE
+       v9fs_vcookie_set_qid(ret, &st->qid);
+       v9fs_cache_inode_get_cookie(ret);
+#endif
         p9stat_free(st);
         kfree(st);
+
         return ret;
  
  error:
@@ -751,7 +806,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
         P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
         err = -EPERM;
         v9ses = v9fs_inode2v9ses(dentry->d_inode);
-       if (v9ses->cache == CACHE_LOOSE)
+       if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
                 return simple_getattr(mnt, dentry, stat);
  
         fid = v9fs_fid_lookup(dentry);
@@ -872,10 +927,10 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
         } else
                 inode->i_rdev = 0;
  
-       inode->i_size = stat->length;
+       i_size_write(inode, stat->length);
  
         /* not real number of blocks, but 512 byte ones ... */
-       inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
+       inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
  }
  
  /**
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c

index 8961f1a8f6682bc04fbb7829f783bae29ec6a580..14a86448572cc0eb0de2e472e47cf2fdd80466b5 100644 (file)
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -44,20 +44,8 @@
  #include "v9fs_vfs.h"
  #include "fid.h"
  
-static void v9fs_clear_inode(struct inode *);
  static const struct super_operations v9fs_super_ops;
  
-/**
- * v9fs_clear_inode - release an inode
- * @inode: inode to release
- *
- */
-
-static void v9fs_clear_inode(struct inode *inode)
-{
-       filemap_fdatawrite(inode->i_mapping);
-}
-
  /**
   * v9fs_set_super - set the superblock
   * @s: super block
@@ -220,6 +208,10 @@ v9fs_umount_begin(struct super_block *sb)
  }
  
  static const struct super_operations v9fs_super_ops = {
+#ifdef CONFIG_9P_FSCACHE
+       .alloc_inode = v9fs_alloc_inode,
+       .destroy_inode = v9fs_destroy_inode,
+#endif
         .statfs = simple_statfs,
         .clear_inode = v9fs_clear_inode,
         .show_options = generic_show_options,
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c

index 798cb071d1329a45cb3fe95d1a84161167f4084e..3f57ce4bee5d4371649c2a67354dcc16f4eacb48 100644 (file)
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -19,9 +19,6 @@ static int
  adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh,
                int create)
  {
-       if (block < 0)
-               goto abort_negative;
-
         if (!create) {
                 if (block >= inode->i_blocks)
                         goto abort_toobig;
@@ -34,10 +31,6 @@ adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh,
         /* don't support allocation of blocks yet */
         return -EIO;
  
-abort_negative:
-       adfs_error(inode->i_sb, "block %d < 0", block);
-       return -EIO;
-
  abort_toobig:
         return 0;
  }
diff --git a/fs/attr.c b/fs/attr.c

index 9fe1b1bd30a808c82b1ebfbd8f644732e4f36f3c..96d394bdaddfa29e52b74d815dd5842e87cb6820 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,7 +18,7 @@
  /* Taken over from the old code... */
  
  /* POSIX UID/GID verification for setting inode attributes. */
-int inode_change_ok(struct inode *inode, struct iattr *attr)
+int inode_change_ok(const struct inode *inode, struct iattr *attr)
  {
         int retval = -EPERM;
         unsigned int ia_valid = attr->ia_valid;
@@ -60,9 +60,51 @@ fine:
  error:
         return retval;
  }
-
  EXPORT_SYMBOL(inode_change_ok);
  
+/**
+ * inode_newsize_ok - may this inode be truncated to a given size
+ * @inode:     the inode to be truncated
+ * @offset:    the new size to assign to the inode
+ * @Returns:   0 on success, -ve errno on failure
+ *
+ * inode_newsize_ok will check filesystem limits and ulimits to check that the
+ * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
+ * when necessary. Caller must not proceed with inode size change if failure is
+ * returned. @inode must be a file (not directory), with appropriate
+ * permissions to allow truncate (inode_newsize_ok does NOT check these
+ * conditions).
+ *
+ * inode_newsize_ok must be called with i_mutex held.
+ */
+int inode_newsize_ok(const struct inode *inode, loff_t offset)
+{
+       if (inode->i_size < offset) {
+               unsigned long limit;
+
+               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+               if (limit != RLIM_INFINITY && offset > limit)
+                       goto out_sig;
+               if (offset > inode->i_sb->s_maxbytes)
+                       goto out_big;
+       } else {
+               /*
+                * truncation of in-use swapfiles is disallowed - it would
+                * cause subsequent swapout to scribble on the now-freed
+                * blocks.
+                */
+               if (IS_SWAPFILE(inode))
+                       return -ETXTBSY;
+       }
+
+       return 0;
+out_sig:
+       send_sig(SIGXFSZ, current, 0);
+out_big:
+       return -EFBIG;
+}
+EXPORT_SYMBOL(inode_newsize_ok);
+
  int inode_setattr(struct inode * inode, struct iattr * attr)
  {
         unsigned int ia_valid = attr->ia_valid;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c

index dd376c124e71561193f0a824f407314d6edda4aa..33baf27fac78e5c4fbc36e12e3a4a16b2322afe4 100644 (file)
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -737,12 +737,7 @@ befs_put_super(struct super_block *sb)
  {
         kfree(BEFS_SB(sb)->mount_opts.iocharset);
         BEFS_SB(sb)->mount_opts.iocharset = NULL;
-
-       if (BEFS_SB(sb)->nls) {
-               unload_nls(BEFS_SB(sb)->nls);
-               BEFS_SB(sb)->nls = NULL;
-       }
-
+       unload_nls(BEFS_SB(sb)->nls);
         kfree(sb->s_fs_info);
         sb->s_fs_info = NULL;
  }
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c

index 442d94fe255cfab4c81e26bdf0aa8cc46c3283ac..b9b3bb51b1e485ff0caa0b7555d468804b552b9a 100644 (file)
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1711,42 +1711,52 @@ struct elf_note_info {
         int numnote;
  };
  
-static int fill_note_info(struct elfhdr *elf, int phdrs,
-                         struct elf_note_info *info,
-                         long signr, struct pt_regs *regs)
+static int elf_note_info_init(struct elf_note_info *info)
  {
-#define        NUM_NOTES       6
-       struct list_head *t;
-
-       info->notes = NULL;
-       info->prstatus = NULL;
-       info->psinfo = NULL;
-       info->fpu = NULL;
-#ifdef ELF_CORE_COPY_XFPREGS
-       info->xfpu = NULL;
-#endif
+       memset(info, 0, sizeof(*info));
         INIT_LIST_HEAD(&info->thread_list);
  
-       info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
-                             GFP_KERNEL);
+       /* Allocate space for six ELF notes */
+       info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
         if (!info->notes)
                 return 0;
         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
         if (!info->psinfo)
-               return 0;
+               goto notes_free;
         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
         if (!info->prstatus)
-               return 0;
+               goto psinfo_free;
         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
         if (!info->fpu)
-               return 0;
+               goto prstatus_free;
  #ifdef ELF_CORE_COPY_XFPREGS
         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
         if (!info->xfpu)
-               return 0;
+               goto fpu_free;
+#endif
+       return 1;
+#ifdef ELF_CORE_COPY_XFPREGS
+ fpu_free:
+       kfree(info->fpu);
  #endif
+ prstatus_free:
+       kfree(info->prstatus);
+ psinfo_free:
+       kfree(info->psinfo);
+ notes_free:
+       kfree(info->notes);
+       return 0;
+}
+
+static int fill_note_info(struct elfhdr *elf, int phdrs,
+                         struct elf_note_info *info,
+                         long signr, struct pt_regs *regs)
+{
+       struct list_head *t;
+
+       if (!elf_note_info_init(info))
+               return 0;
  
-       info->thread_status_size = 0;
         if (signr) {
                 struct core_thread *ct;
                 struct elf_thread_status *ets;
@@ -1806,8 +1816,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
  #endif
  
         return 1;
-
-#undef NUM_NOTES
  }
  
  static size_t get_note_info_size(struct elf_note_info *info)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c

index 76285471073ec18193edaf0304cb4e385affae3f..38502c67987c573541325aaf0f8a5db1ada1f756 100644 (file)
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -283,20 +283,23 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
         }
  
         stack_size = exec_params.stack_size;
-       if (stack_size < interp_params.stack_size)
-               stack_size = interp_params.stack_size;
-
         if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
                 executable_stack = EXSTACK_ENABLE_X;
         else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
                 executable_stack = EXSTACK_DISABLE_X;
-       else if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
-               executable_stack = EXSTACK_ENABLE_X;
-       else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
-               executable_stack = EXSTACK_DISABLE_X;
         else
                 executable_stack = EXSTACK_DEFAULT;
  
+       if (stack_size == 0) {
+               stack_size = interp_params.stack_size;
+               if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
+                       executable_stack = EXSTACK_ENABLE_X;
+               else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
+                       executable_stack = EXSTACK_DISABLE_X;
+               else
+                       executable_stack = EXSTACK_DEFAULT;
+       }
+
         retval = -ENOEXEC;
         if (stack_size == 0)
                 goto error;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c

index e92f229e3c6e9d994d61f5581625c35c2bde6bd7..a2796651e75690eb8e8a875146be852bfb1d637b 100644 (file)
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -278,8 +278,6 @@ static int decompress_exec(
                 ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos);
                 if (ret <= 0)
                         break;
-               if (ret >= (unsigned long) -4096)
-                       break;
                 len -= ret;
  
                 strm.next_in = buf;
@@ -335,7 +333,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
                                         "(%d != %d)", (unsigned) r, curid, id);
                         goto failed;
                 } else if ( ! p->lib_list[id].loaded &&
-                               load_flat_shared_library(id, p) > (unsigned long) -4096) {
+                               IS_ERR_VALUE(load_flat_shared_library(id, p))) {
                         printk("BINFMT_FLAT: failed to load library %d", id);
                         goto failed;
                 }
@@ -545,7 +543,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
                                   MAP_PRIVATE|MAP_EXECUTABLE, 0);
                 up_write(&current->mm->mmap_sem);
-               if (!textpos  || textpos >= (unsigned long) -4096) {
+               if (!textpos || IS_ERR_VALUE(textpos)) {
                         if (!textpos)
                                 textpos = (unsigned long) -ENOMEM;
                         printk("Unable to mmap process text, errno %d\n", (int)-textpos);
@@ -560,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                         PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
                 up_write(&current->mm->mmap_sem);
  
-               if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) {
+               if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) {
                         if (!realdatastart)
                                 realdatastart = (unsigned long) -ENOMEM;
                         printk("Unable to allocate RAM for process data, errno %d\n",
@@ -587,7 +585,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                         result = bprm->file->f_op->read(bprm->file, (char *) datapos,
                                         data_len + (relocs * sizeof(unsigned long)), &fpos);
                 }
-               if (result >= (unsigned long)-4096) {
+               if (IS_ERR_VALUE(result)) {
                         printk("Unable to read data+bss, errno %d\n", (int)-result);
                         do_munmap(current->mm, textpos, text_len);
                         do_munmap(current->mm, realdatastart, data_len + extra);
@@ -607,7 +605,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                         PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
                 up_write(&current->mm->mmap_sem);
  
-               if (!textpos  || textpos >= (unsigned long) -4096) {
+               if (!textpos || IS_ERR_VALUE(textpos)) {
                         if (!textpos)
                                 textpos = (unsigned long) -ENOMEM;
                         printk("Unable to allocate RAM for process text/data, errno %d\n",
@@ -641,7 +639,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                         fpos = 0;
                         result = bprm->file->f_op->read(bprm->file,
                                         (char *) textpos, text_len, &fpos);
-                       if (result < (unsigned long) -4096)
+                       if (!IS_ERR_VALUE(result))
                                 result = decompress_exec(bprm, text_len, (char *) datapos,
                                                  data_len + (relocs * sizeof(unsigned long)), 0);
                 }
@@ -651,13 +649,13 @@ static int load_flat_file(struct linux_binprm * bprm,
                         fpos = 0;
                         result = bprm->file->f_op->read(bprm->file,
                                         (char *) textpos, text_len, &fpos);
-                       if (result < (unsigned long) -4096) {
+                       if (!IS_ERR_VALUE(result)) {
                                 fpos = ntohl(hdr->data_start);
                                 result = bprm->file->f_op->read(bprm->file, (char *) datapos,
                                         data_len + (relocs * sizeof(unsigned long)), &fpos);
                         }
                 }
-               if (result >= (unsigned long)-4096) {
+               if (IS_ERR_VALUE(result)) {
                         printk("Unable to read code+data+bss, errno %d\n",(int)-result);
                         do_munmap(current->mm, textpos, text_len + data_len + extra +
                                 MAX_SHARED_LIBS * sizeof(unsigned long));
@@ -835,7 +833,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
  
         res = prepare_binprm(&bprm);
  
-       if (res <= (unsigned long)-4096)
+       if (!IS_ERR_VALUE(res))
                 res = load_flat_file(&bprm, libs, id, NULL);
  
         abort_creds(bprm.cred);
@@ -880,7 +878,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
         stack_len += FLAT_DATA_ALIGN - 1;  /* reserve for upcoming alignment */
         
         res = load_flat_file(bprm, &libinfo, 0, &stack_len);
-       if (res > (unsigned long)-4096)
+       if (IS_ERR_VALUE(res))
                 return res;
         
         /* Update data segment pointers for all libraries */
diff --git a/fs/block_dev.c b/fs/block_dev.c

index 5d1ed50bd46c591c1ef63b312c4844d013f5239b..9cf4b926f8e47a509f0994ad0e696d79205335bb 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev);
   * freeze_bdev  --  lock a filesystem and force it into a consistent state
   * @bdev:      blockdevice to lock
   *
- * This takes the block device bd_mount_sem to make sure no new mounts
- * happen on bdev until thaw_bdev() is called.
   * If a superblock is found on this device, we take the s_umount semaphore
   * on it to make sure nobody unmounts until the snapshot creation is done.
   * The reference counter (bd_fsfreeze_count) guarantees that only the last
@@ -232,46 +230,55 @@ struct super_block *freeze_bdev(struct block_device *bdev)
         int error = 0;
  
         mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (bdev->bd_fsfreeze_count > 0) {
-               bdev->bd_fsfreeze_count++;
+       if (++bdev->bd_fsfreeze_count > 1) {
+               /*
+                * We don't even need to grab a reference - the first call
+                * to freeze_bdev grab an active reference and only the last
+                * thaw_bdev drops it.
+                */
                 sb = get_super(bdev);
+               drop_super(sb);
                 mutex_unlock(&bdev->bd_fsfreeze_mutex);
                 return sb;
         }
-       bdev->bd_fsfreeze_count++;
-
-       down(&bdev->bd_mount_sem);
-       sb = get_super(bdev);
-       if (sb && !(sb->s_flags & MS_RDONLY)) {
-               sb->s_frozen = SB_FREEZE_WRITE;
-               smp_wmb();
-
-               sync_filesystem(sb);
-
-               sb->s_frozen = SB_FREEZE_TRANS;
-               smp_wmb();
-
-               sync_blockdev(sb->s_bdev);
-
-               if (sb->s_op->freeze_fs) {
-                       error = sb->s_op->freeze_fs(sb);
-                       if (error) {
-                               printk(KERN_ERR
-                                       "VFS:Filesystem freeze failed\n");
-                               sb->s_frozen = SB_UNFROZEN;
-                               drop_super(sb);
-                               up(&bdev->bd_mount_sem);
-                               bdev->bd_fsfreeze_count--;
-                               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-                               return ERR_PTR(error);
-                       }
+
+       sb = get_active_super(bdev);
+       if (!sb)
+               goto out;
+       if (sb->s_flags & MS_RDONLY) {
+               deactivate_locked_super(sb);
+               mutex_unlock(&bdev->bd_fsfreeze_mutex);
+               return sb;
+       }
+
+       sb->s_frozen = SB_FREEZE_WRITE;
+       smp_wmb();
+
+       sync_filesystem(sb);
+
+       sb->s_frozen = SB_FREEZE_TRANS;
+       smp_wmb();
+
+       sync_blockdev(sb->s_bdev);
+
+       if (sb->s_op->freeze_fs) {
+               error = sb->s_op->freeze_fs(sb);
+               if (error) {
+                       printk(KERN_ERR
+                               "VFS:Filesystem freeze failed\n");
+                       sb->s_frozen = SB_UNFROZEN;
+                       deactivate_locked_super(sb);
+                       bdev->bd_fsfreeze_count--;
+                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
+                       return ERR_PTR(error);
                 }
         }
+       up_write(&sb->s_umount);
  
+ out:
         sync_blockdev(bdev);
         mutex_unlock(&bdev->bd_fsfreeze_mutex);
-
-       return sb;      /* thaw_bdev releases s->s_umount and bd_mount_sem */
+       return sb;      /* thaw_bdev releases s->s_umount */
  }
  EXPORT_SYMBOL(freeze_bdev);
  
@@ -284,44 +291,44 @@ EXPORT_SYMBOL(freeze_bdev);
   */
  int thaw_bdev(struct block_device *bdev, struct super_block *sb)
  {
-       int error = 0;
+       int error = -EINVAL;
  
         mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (!bdev->bd_fsfreeze_count) {
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               return -EINVAL;
-       }
-
-       bdev->bd_fsfreeze_count--;
-       if (bdev->bd_fsfreeze_count > 0) {
-               if (sb)
-                       drop_super(sb);
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               return 0;
-       }
-
-       if (sb) {
-               BUG_ON(sb->s_bdev != bdev);
-               if (!(sb->s_flags & MS_RDONLY)) {
-                       if (sb->s_op->unfreeze_fs) {
-                               error = sb->s_op->unfreeze_fs(sb);
-                               if (error) {
-                                       printk(KERN_ERR
-                                               "VFS:Filesystem thaw failed\n");
-                                       sb->s_frozen = SB_FREEZE_TRANS;
-                                       bdev->bd_fsfreeze_count++;
-                                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
-                                       return error;
-                               }
-                       }
-                       sb->s_frozen = SB_UNFROZEN;
-                       smp_wmb();
-                       wake_up(&sb->s_wait_unfrozen);
+       if (!bdev->bd_fsfreeze_count)
+               goto out_unlock;
+
+       error = 0;
+       if (--bdev->bd_fsfreeze_count > 0)
+               goto out_unlock;
+
+       if (!sb)
+               goto out_unlock;
+
+       BUG_ON(sb->s_bdev != bdev);
+       down_write(&sb->s_umount);
+       if (sb->s_flags & MS_RDONLY)
+               goto out_deactivate;
+
+       if (sb->s_op->unfreeze_fs) {
+               error = sb->s_op->unfreeze_fs(sb);
+               if (error) {
+                       printk(KERN_ERR
+                               "VFS:Filesystem thaw failed\n");
+                       sb->s_frozen = SB_FREEZE_TRANS;
+                       bdev->bd_fsfreeze_count++;
+                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
+                       return error;
                 }
-               drop_super(sb);
         }
  
-       up(&bdev->bd_mount_sem);
+       sb->s_frozen = SB_UNFROZEN;
+       smp_wmb();
+       wake_up(&sb->s_wait_unfrozen);
+
+out_deactivate:
+       if (sb)
+               deactivate_locked_super(sb);
+out_unlock:
         mutex_unlock(&bdev->bd_fsfreeze_mutex);
         return 0;
  }
@@ -430,7 +437,6 @@ static void init_once(void *foo)
  
         memset(bdev, 0, sizeof(*bdev));
         mutex_init(&bdev->bd_mutex);
-       sema_init(&bdev->bd_mount_sem, 1);
         INIT_LIST_HEAD(&bdev->bd_inodes);
         INIT_LIST_HEAD(&bdev->bd_list);
  #ifdef CONFIG_SYSFS
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c

index 019e8af449abfeb1679c1a20c801d6b0082edf8b..282ca085c2fbff854bcf4a396d0773db129c5085 100644 (file)
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -48,6 +48,9 @@ struct btrfs_worker_thread {
         /* number of things on the pending list */
         atomic_t num_pending;
  
+       /* reference counter for this struct */
+       atomic_t refs;
+
         unsigned long sequence;
  
         /* protects the pending list. */
@@ -71,7 +74,12 @@ static void check_idle_worker(struct btrfs_worker_thread *worker)
                 unsigned long flags;
                 spin_lock_irqsave(&worker->workers->lock, flags);
                 worker->idle = 1;
-               list_move(&worker->worker_list, &worker->workers->idle_list);
+
+               /* the list may be empty if the worker is just starting */
+               if (!list_empty(&worker->worker_list)) {
+                       list_move(&worker->worker_list,
+                                &worker->workers->idle_list);
+               }
                 spin_unlock_irqrestore(&worker->workers->lock, flags);
         }
  }
@@ -87,23 +95,49 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
                 unsigned long flags;
                 spin_lock_irqsave(&worker->workers->lock, flags);
                 worker->idle = 0;
-               list_move_tail(&worker->worker_list,
-                              &worker->workers->worker_list);
+
+               if (!list_empty(&worker->worker_list)) {
+                       list_move_tail(&worker->worker_list,
+                                     &worker->workers->worker_list);
+               }
                 spin_unlock_irqrestore(&worker->workers->lock, flags);
         }
  }
  
-static noinline int run_ordered_completions(struct btrfs_workers *workers,
-                                           struct btrfs_work *work)
+static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
  {
+       struct btrfs_workers *workers = worker->workers;
         unsigned long flags;
  
+       rmb();
+       if (!workers->atomic_start_pending)
+               return;
+
+       spin_lock_irqsave(&workers->lock, flags);
+       if (!workers->atomic_start_pending)
+               goto out;
+
+       workers->atomic_start_pending = 0;
+       if (workers->num_workers >= workers->max_workers)
+               goto out;
+
+       spin_unlock_irqrestore(&workers->lock, flags);
+       btrfs_start_workers(workers, 1);
+       return;
+
+out:
+       spin_unlock_irqrestore(&workers->lock, flags);
+}
+
+static noinline int run_ordered_completions(struct btrfs_workers *workers,
+                                           struct btrfs_work *work)
+{
         if (!workers->ordered)
                 return 0;
  
         set_bit(WORK_DONE_BIT, &work->flags);
  
-       spin_lock_irqsave(&workers->lock, flags);
+       spin_lock(&workers->order_lock);
  
         while (1) {
                 if (!list_empty(&workers->prio_order_list)) {
@@ -126,45 +160,118 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
                 if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
                         break;
  
-               spin_unlock_irqrestore(&workers->lock, flags);
+               spin_unlock(&workers->order_lock);
  
                 work->ordered_func(work);
  
                 /* now take the lock again and call the freeing code */
-               spin_lock_irqsave(&workers->lock, flags);
+               spin_lock(&workers->order_lock);
                 list_del(&work->order_list);
                 work->ordered_free(work);
         }
  
-       spin_unlock_irqrestore(&workers->lock, flags);
+       spin_unlock(&workers->order_lock);
         return 0;
  }
  
+static void put_worker(struct btrfs_worker_thread *worker)
+{
+       if (atomic_dec_and_test(&worker->refs))
+               kfree(worker);
+}
+
+static int try_worker_shutdown(struct btrfs_worker_thread *worker)
+{
+       int freeit = 0;
+
+       spin_lock_irq(&worker->lock);
+       spin_lock(&worker->workers->lock);
+       if (worker->workers->num_workers > 1 &&
+           worker->idle &&
+           !worker->working &&
+           !list_empty(&worker->worker_list) &&
+           list_empty(&worker->prio_pending) &&
+           list_empty(&worker->pending) &&
+           atomic_read(&worker->num_pending) == 0) {
+               freeit = 1;
+               list_del_init(&worker->worker_list);
+               worker->workers->num_workers--;
+       }
+       spin_unlock(&worker->workers->lock);
+       spin_unlock_irq(&worker->lock);
+
+       if (freeit)
+               put_worker(worker);
+       return freeit;
+}
+
+static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
+                                       struct list_head *prio_head,
+                                       struct list_head *head)
+{
+       struct btrfs_work *work = NULL;
+       struct list_head *cur = NULL;
+
+       if(!list_empty(prio_head))
+               cur = prio_head->next;
+
+       smp_mb();
+       if (!list_empty(&worker->prio_pending))
+               goto refill;
+
+       if (!list_empty(head))
+               cur = head->next;
+
+       if (cur)
+               goto out;
+
+refill:
+       spin_lock_irq(&worker->lock);
+       list_splice_tail_init(&worker->prio_pending, prio_head);
+       list_splice_tail_init(&worker->pending, head);
+
+       if (!list_empty(prio_head))
+               cur = prio_head->next;
+       else if (!list_empty(head))
+               cur = head->next;
+       spin_unlock_irq(&worker->lock);
+
+       if (!cur)
+               goto out_fail;
+
+out:
+       work = list_entry(cur, struct btrfs_work, list);
+
+out_fail:
+       return work;
+}
+
  /*
   * main loop for servicing work items
   */
  static int worker_loop(void *arg)
  {
         struct btrfs_worker_thread *worker = arg;
-       struct list_head *cur;
+       struct list_head head;
+       struct list_head prio_head;
         struct btrfs_work *work;
+
+       INIT_LIST_HEAD(&head);
+       INIT_LIST_HEAD(&prio_head);
+
         do {
-               spin_lock_irq(&worker->lock);
-again_locked:
+again:
                 while (1) {
-                       if (!list_empty(&worker->prio_pending))
-                               cur = worker->prio_pending.next;
-                       else if (!list_empty(&worker->pending))
-                               cur = worker->pending.next;
-                       else
+
+
+                       work = get_next_work(worker, &prio_head, &head);
+                       if (!work)
                                 break;
  
-                       work = list_entry(cur, struct btrfs_work, list);
                         list_del(&work->list);
                         clear_bit(WORK_QUEUED_BIT, &work->flags);
  
                         work->worker = worker;
-                       spin_unlock_irq(&worker->lock);
  
                         work->func(work);
  
@@ -175,9 +282,13 @@ again_locked:
                          */
                         run_ordered_completions(worker->workers, work);
  
-                       spin_lock_irq(&worker->lock);
-                       check_idle_worker(worker);
+                       check_pending_worker_creates(worker);
+
                 }
+
+               spin_lock_irq(&worker->lock);
+               check_idle_worker(worker);
+
                 if (freezing(current)) {
                         worker->working = 0;
                         spin_unlock_irq(&worker->lock);
@@ -216,8 +327,10 @@ again_locked:
                                 spin_lock_irq(&worker->lock);
                                 set_current_state(TASK_INTERRUPTIBLE);
                                 if (!list_empty(&worker->pending) ||
-                                   !list_empty(&worker->prio_pending))
-                                       goto again_locked;
+                                   !list_empty(&worker->prio_pending)) {
+                                       spin_unlock_irq(&worker->lock);
+                                       goto again;
+                               }
  
                                 /*
                                  * this makes sure we get a wakeup when someone
@@ -226,8 +339,13 @@ again_locked:
                                 worker->working = 0;
                                 spin_unlock_irq(&worker->lock);
  
-                               if (!kthread_should_stop())
-                                       schedule();
+                               if (!kthread_should_stop()) {
+                                       schedule_timeout(HZ * 120);
+                                       if (!worker->working &&
+                                           try_worker_shutdown(worker)) {
+                                               return 0;
+                                       }
+                               }
                         }
                         __set_current_state(TASK_RUNNING);
                 }
@@ -242,16 +360,30 @@ int btrfs_stop_workers(struct btrfs_workers *workers)
  {
         struct list_head *cur;
         struct btrfs_worker_thread *worker;
+       int can_stop;
  
+       spin_lock_irq(&workers->lock);
         list_splice_init(&workers->idle_list, &workers->worker_list);
         while (!list_empty(&workers->worker_list)) {
                 cur = workers->worker_list.next;
                 worker = list_entry(cur, struct btrfs_worker_thread,
                                     worker_list);
-               kthread_stop(worker->task);
-               list_del(&worker->worker_list);
-               kfree(worker);
+
+               atomic_inc(&worker->refs);
+               workers->num_workers -= 1;
+               if (!list_empty(&worker->worker_list)) {
+                       list_del_init(&worker->worker_list);
+                       put_worker(worker);
+                       can_stop = 1;
+               } else
+                       can_stop = 0;
+               spin_unlock_irq(&workers->lock);
+               if (can_stop)
+                       kthread_stop(worker->task);
+               spin_lock_irq(&workers->lock);
+               put_worker(worker);
         }
+       spin_unlock_irq(&workers->lock);
         return 0;
  }
  
@@ -266,10 +398,13 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
         INIT_LIST_HEAD(&workers->order_list);
         INIT_LIST_HEAD(&workers->prio_order_list);
         spin_lock_init(&workers->lock);
+       spin_lock_init(&workers->order_lock);
         workers->max_workers = max;
         workers->idle_thresh = 32;
         workers->name = name;
         workers->ordered = 0;
+       workers->atomic_start_pending = 0;
+       workers->atomic_worker_start = 0;
  }
  
  /*
@@ -293,7 +428,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
                 INIT_LIST_HEAD(&worker->prio_pending);
                 INIT_LIST_HEAD(&worker->worker_list);
                 spin_lock_init(&worker->lock);
+
                 atomic_set(&worker->num_pending, 0);
+               atomic_set(&worker->refs, 1);
                 worker->workers = workers;
                 worker->task = kthread_run(worker_loop, worker,
                                            "btrfs-%s-%d", workers->name,
@@ -303,7 +440,6 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
                         kfree(worker);
                         goto fail;
                 }
-
                 spin_lock_irq(&workers->lock);
                 list_add_tail(&worker->worker_list, &workers->idle_list);
                 worker->idle = 1;
@@ -350,7 +486,6 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
          */
         next = workers->worker_list.next;
         worker = list_entry(next, struct btrfs_worker_thread, worker_list);
-       atomic_inc(&worker->num_pending);
         worker->sequence++;
  
         if (worker->sequence % workers->idle_thresh == 0)
@@ -367,28 +502,18 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
  {
         struct btrfs_worker_thread *worker;
         unsigned long flags;
+       struct list_head *fallback;
  
  again:
         spin_lock_irqsave(&workers->lock, flags);
         worker = next_worker(workers);
-       spin_unlock_irqrestore(&workers->lock, flags);
  
         if (!worker) {
-               spin_lock_irqsave(&workers->lock, flags);
                 if (workers->num_workers >= workers->max_workers) {
-                       struct list_head *fallback = NULL;
-                       /*
-                        * we have failed to find any workers, just
-                        * return the force one
-                        */
-                       if (!list_empty(&workers->worker_list))
-                               fallback = workers->worker_list.next;
-                       if (!list_empty(&workers->idle_list))
-                               fallback = workers->idle_list.next;
-                       BUG_ON(!fallback);
-                       worker = list_entry(fallback,
-                                 struct btrfs_worker_thread, worker_list);
-                       spin_unlock_irqrestore(&workers->lock, flags);
+                       goto fallback;
+               } else if (workers->atomic_worker_start) {
+                       workers->atomic_start_pending = 1;
+                       goto fallback;
                 } else {
                         spin_unlock_irqrestore(&workers->lock, flags);
                         /* we're below the limit, start another worker */
@@ -396,6 +521,28 @@ again:
                         goto again;
                 }
         }
+       goto found;
+
+fallback:
+       fallback = NULL;
+       /*
+        * we have failed to find any workers, just
+        * return the first one we can find.
+        */
+       if (!list_empty(&workers->worker_list))
+               fallback = workers->worker_list.next;
+       if (!list_empty(&workers->idle_list))
+               fallback = workers->idle_list.next;
+       BUG_ON(!fallback);
+       worker = list_entry(fallback,
+                 struct btrfs_worker_thread, worker_list);
+found:
+       /*
+        * this makes sure the worker doesn't exit before it is placed
+        * onto a busy/idle list
+        */
+       atomic_inc(&worker->num_pending);
+       spin_unlock_irqrestore(&workers->lock, flags);
         return worker;
  }
  
@@ -427,7 +574,7 @@ int btrfs_requeue_work(struct btrfs_work *work)
                 spin_lock(&worker->workers->lock);
                 worker->idle = 0;
                 list_move_tail(&worker->worker_list,
-                              &worker->workers->worker_list);
+                             &worker->workers->worker_list);
                 spin_unlock(&worker->workers->lock);
         }
         if (!worker->working) {
@@ -435,9 +582,9 @@ int btrfs_requeue_work(struct btrfs_work *work)
                 worker->working = 1;
         }
  
-       spin_unlock_irqrestore(&worker->lock, flags);
         if (wake)
                 wake_up_process(worker->task);
+       spin_unlock_irqrestore(&worker->lock, flags);
  out:
  
         return 0;
@@ -463,14 +610,18 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
  
         worker = find_worker(workers);
         if (workers->ordered) {
-               spin_lock_irqsave(&workers->lock, flags);
+               /*
+                * you're not allowed to do ordered queues from an
+                * interrupt handler
+                */
+               spin_lock(&workers->order_lock);
                 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
                         list_add_tail(&work->order_list,
                                       &workers->prio_order_list);
                 } else {
                         list_add_tail(&work->order_list, &workers->order_list);
                 }
-               spin_unlock_irqrestore(&workers->lock, flags);
+               spin_unlock(&workers->order_lock);
         } else {
                 INIT_LIST_HEAD(&work->order_list);
         }
@@ -481,7 +632,6 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
                 list_add_tail(&work->list, &worker->prio_pending);
         else
                 list_add_tail(&work->list, &worker->pending);
-       atomic_inc(&worker->num_pending);
         check_busy_worker(worker);
  
         /*
@@ -492,10 +642,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
                 wake = 1;
         worker->working = 1;
  
-       spin_unlock_irqrestore(&worker->lock, flags);
-
         if (wake)
                 wake_up_process(worker->task);
+       spin_unlock_irqrestore(&worker->lock, flags);
+
  out:
         return 0;
  }
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h

index 1b511c109db658ef1d772bb90d77b795f780e63d..fc089b95ec14f24c9a971b44f3718c2029f241ef 100644 (file)
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -73,6 +73,15 @@ struct btrfs_workers {
         /* force completions in the order they were queued */
         int ordered;
  
+       /* more workers required, but in an interrupt handler */
+       int atomic_start_pending;
+
+       /*
+        * are we allowed to sleep while starting workers or are we required
+        * to start them at a later time?
+        */
+       int atomic_worker_start;
+
         /* list with all the work threads.  The workers on the idle thread
          * may be actively servicing jobs, but they haven't yet hit the
          * idle thresh limit above.
@@ -90,6 +99,9 @@ struct btrfs_workers {
         /* lock for finding the next worker thread to queue on */
         spinlock_t lock;
  
+       /* lock for the ordered lists */
+       spinlock_t order_lock;
+
         /* extra name for this worker, used for current->name */
         char *name;
  };
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h

index ea1ea0af8c0e6cf635c3060a08e1dd648a6fb48f..82ee56bba29966e5dee552d764bb0e60fbc8bca9 100644 (file)
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -138,6 +138,7 @@ struct btrfs_inode {
          * of these.
          */
         unsigned ordered_data_close:1;
+       unsigned dummy_inode:1;
  
         struct inode vfs_inode;
  };
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c

index 9d8ba4d54a37c3f96e9585de46b8e99d701410bc..a11a32058b50a4993f072fd1baddc6e9dafb52a8 100644 (file)
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -506,10 +506,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
                  */
                 set_page_extent_mapped(page);
                 lock_extent(tree, last_offset, end, GFP_NOFS);
-               spin_lock(&em_tree->lock);
+               read_lock(&em_tree->lock);
                 em = lookup_extent_mapping(em_tree, last_offset,
                                            PAGE_CACHE_SIZE);
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
  
                 if (!em || last_offset < em->start ||
                     (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
@@ -593,11 +593,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
         em_tree = &BTRFS_I(inode)->extent_tree;
  
         /* we need the actual starting offset of this extent in the file */
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
         em = lookup_extent_mapping(em_tree,
                                    page_offset(bio->bi_io_vec->bv_page),
                                    PAGE_CACHE_SIZE);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
  
         compressed_len = em->block_len;
         cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c

index 3fdcc0512d3ab62f95d42708ca0d6a049340b877..ec96f3a6d536640919dd25a08c7ed22e4423ef15 100644 (file)
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2853,6 +2853,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
         int split;
         int num_doubles = 0;
  
+       l = path->nodes[0];
+       slot = path->slots[0];
+       if (extend && data_size + btrfs_item_size_nr(l, slot) +
+           sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
+               return -EOVERFLOW;
+
         /* first try to make some room by pushing left and right */
         if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
                 wret = push_leaf_right(trans, root, path, data_size, 0);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index 837435ce84caa104dcb00ba9830df29a47e95923..80599b4e42bd350f5e6b7a094c3711a3dd32e8a8 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -114,6 +114,10 @@ struct btrfs_ordered_sum;
   */
  #define BTRFS_DEV_ITEMS_OBJECTID 1ULL
  
+#define BTRFS_BTREE_INODE_OBJECTID 1
+
+#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
+
  /*
   * we can actually store much bigger names, but lets not confuse the rest
   * of linux
@@ -670,6 +674,7 @@ struct btrfs_space_info {
         u64 bytes_reserved;     /* total bytes the allocator has reserved for
                                    current allocations */
         u64 bytes_readonly;     /* total bytes that are read only */
+       u64 bytes_super;        /* total bytes reserved for the super blocks */
  
         /* delalloc accounting */
         u64 bytes_delalloc;     /* number of bytes reserved for allocation,
@@ -726,6 +731,15 @@ enum btrfs_caching_type {
         BTRFS_CACHE_FINISHED    = 2,
  };
  
+struct btrfs_caching_control {
+       struct list_head list;
+       struct mutex mutex;
+       wait_queue_head_t wait;
+       struct btrfs_block_group_cache *block_group;
+       u64 progress;
+       atomic_t count;
+};
+
  struct btrfs_block_group_cache {
         struct btrfs_key key;
         struct btrfs_block_group_item item;
@@ -733,6 +747,7 @@ struct btrfs_block_group_cache {
         spinlock_t lock;
         u64 pinned;
         u64 reserved;
+       u64 bytes_super;
         u64 flags;
         u64 sectorsize;
         int extents_thresh;
@@ -742,8 +757,9 @@ struct btrfs_block_group_cache {
         int dirty;
  
         /* cache tracking stuff */
-       wait_queue_head_t caching_q;
         int cached;
+       struct btrfs_caching_control *caching_ctl;
+       u64 last_byte_to_unpin;
  
         struct btrfs_space_info *space_info;
  
@@ -782,13 +798,16 @@ struct btrfs_fs_info {
  
         /* the log root tree is a directory of all the other log roots */
         struct btrfs_root *log_root_tree;
+
+       spinlock_t fs_roots_radix_lock;
         struct radix_tree_root fs_roots_radix;
  
         /* block group cache stuff */
         spinlock_t block_group_cache_lock;
         struct rb_root block_group_cache_tree;
  
-       struct extent_io_tree pinned_extents;
+       struct extent_io_tree freed_extents[2];
+       struct extent_io_tree *pinned_extents;
  
         /* logical->physical extent mapping */
         struct btrfs_mapping_tree mapping_tree;
@@ -822,11 +841,7 @@ struct btrfs_fs_info {
         struct mutex transaction_kthread_mutex;
         struct mutex cleaner_mutex;
         struct mutex chunk_mutex;
-       struct mutex drop_mutex;
         struct mutex volume_mutex;
-       struct mutex tree_reloc_mutex;
-       struct rw_semaphore extent_commit_sem;
-
         /*
          * this protects the ordered operations list only while we are
          * processing all of the entries on it.  This way we make
@@ -835,10 +850,16 @@ struct btrfs_fs_info {
          * before jumping into the main commit.
          */
         struct mutex ordered_operations_mutex;
+       struct rw_semaphore extent_commit_sem;
+
+       struct rw_semaphore subvol_sem;
+
+       struct srcu_struct subvol_srcu;
  
         struct list_head trans_list;
         struct list_head hashers;
         struct list_head dead_roots;
+       struct list_head caching_block_groups;
  
         atomic_t nr_async_submits;
         atomic_t async_submit_draining;
@@ -996,10 +1017,12 @@ struct btrfs_root {
         u32 stripesize;
  
         u32 type;
-       u64 highest_inode;
-       u64 last_inode_alloc;
+
+       u64 highest_objectid;
         int ref_cows;
         int track_dirty;
+       int in_radix;
+
         u64 defrag_trans_start;
         struct btrfs_key defrag_progress;
         struct btrfs_key defrag_max;
@@ -1920,8 +1943,8 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
  int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root, unsigned long count);
  int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
-int btrfs_update_pinned_extents(struct btrfs_root *root,
-                               u64 bytenr, u64 num, int pin);
+int btrfs_pin_extent(struct btrfs_root *root,
+                    u64 bytenr, u64 num, int reserved);
  int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root, struct extent_buffer *leaf);
  int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
@@ -1971,9 +1994,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
                       u64 root_objectid, u64 owner, u64 offset);
  
  int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root);
  int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
-                              struct btrfs_root *root,
-                              struct extent_io_tree *unpin);
+                              struct btrfs_root *root);
  int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root,
                          u64 bytenr, u64 num_bytes, u64 parent,
@@ -1984,6 +2008,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
  int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
  int btrfs_free_block_groups(struct btrfs_fs_info *info);
  int btrfs_read_block_groups(struct btrfs_root *root);
+int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr);
  int btrfs_make_block_group(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root, u64 bytes_used,
                            u64 type, u64 chunk_objectid, u64 chunk_offset,
@@ -2006,7 +2031,6 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
                                  u64 bytes);
  void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
                               u64 bytes);
-void btrfs_free_pinned_extents(struct btrfs_fs_info *info);
  /* ctree.c */
  int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                      int level, int *slot);
@@ -2100,12 +2124,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
                         struct extent_buffer *parent);
  /* root-item.c */
  int btrfs_find_root_ref(struct btrfs_root *tree_root,
-                  struct btrfs_path *path,
-                  u64 root_id, u64 ref_id);
+                       struct btrfs_path *path,
+                       u64 root_id, u64 ref_id);
  int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
                        struct btrfs_root *tree_root,
-                      u64 root_id, u8 type, u64 ref_id,
-                      u64 dirid, u64 sequence,
+                      u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
+                      const char *name, int name_len);
+int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *tree_root,
+                      u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
                        const char *name, int name_len);
  int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                    struct btrfs_key *key);
@@ -2120,6 +2147,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
  int btrfs_search_root(struct btrfs_root *root, u64 search_start,
                       u64 *found_objectid);
  int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
+int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
  int btrfs_set_root_node(struct btrfs_root_item *item,
                         struct extent_buffer *node);
  /* dir-item.c */
@@ -2138,6 +2166,10 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
                             struct btrfs_path *path, u64 dir,
                             u64 objectid, const char *name, int name_len,
                             int mod);
+struct btrfs_dir_item *
+btrfs_search_dir_index_item(struct btrfs_root *root,
+                           struct btrfs_path *path, u64 dirid,
+                           const char *name, int name_len);
  struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
                               struct btrfs_path *path,
                               const char *name, int name_len);
@@ -2160,6 +2192,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root, u64 offset);
  int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, u64 offset);
+int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
  
  /* inode-map.c */
  int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
@@ -2232,6 +2265,10 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
  int btrfs_add_link(struct btrfs_trans_handle *trans,
                    struct inode *parent_inode, struct inode *inode,
                    const char *name, int name_len, int add_backref, u64 index);
+int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       struct inode *dir, u64 objectid,
+                       const char *name, int name_len);
  int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                struct inode *inode, u64 new_size,
@@ -2242,7 +2279,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
  int btrfs_writepages(struct address_space *mapping,
                      struct writeback_control *wbc);
  int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *new_root, struct dentry *dentry,
+                            struct btrfs_root *new_root,
                              u64 new_dirid, u64 alloc_hint);
  int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
                          size_t size, struct bio *bio, unsigned long bio_flags);
@@ -2258,6 +2295,7 @@ int btrfs_write_inode(struct inode *inode, int wait);
  void btrfs_dirty_inode(struct inode *inode);
  struct inode *btrfs_alloc_inode(struct super_block *sb);
  void btrfs_destroy_inode(struct inode *inode);
+void btrfs_drop_inode(struct inode *inode);
  int btrfs_init_cachep(void);
  void btrfs_destroy_cachep(void);
  long btrfs_ioctl_trans_end(struct file *file);
@@ -2275,6 +2313,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
  int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
  void btrfs_orphan_cleanup(struct btrfs_root *root);
  int btrfs_cont_expand(struct inode *inode, loff_t size);
+int btrfs_invalidate_inodes(struct btrfs_root *root);
+extern struct dentry_operations btrfs_dentry_operations;
  
  /* ioctl.c */
  long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@@ -2290,7 +2330,7 @@ extern struct file_operations btrfs_file_operations;
  int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root, struct inode *inode,
                        u64 start, u64 end, u64 locked_end,
-                      u64 inline_limit, u64 *hint_block);
+                      u64 inline_limit, u64 *hint_block, int drop_cache);
  int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
                               struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c

index 1d70236ba00c7bf4abf5181d2efd3b20d5867e2e..f3a6075519ccc1d96e42157f95769a5ad6641a12 100644 (file)
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -281,6 +281,53 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
         return btrfs_match_dir_item_name(root, path, name, name_len);
  }
  
+struct btrfs_dir_item *
+btrfs_search_dir_index_item(struct btrfs_root *root,
+                           struct btrfs_path *path, u64 dirid,
+                           const char *name, int name_len)
+{
+       struct extent_buffer *leaf;
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+       u32 nritems;
+       int ret;
+
+       key.objectid = dirid;
+       key.type = BTRFS_DIR_INDEX_KEY;
+       key.offset = 0;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               return ERR_PTR(ret);
+
+       leaf = path->nodes[0];
+       nritems = btrfs_header_nritems(leaf);
+
+       while (1) {
+               if (path->slots[0] >= nritems) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               return ERR_PTR(ret);
+                       if (ret > 0)
+                               break;
+                       leaf = path->nodes[0];
+                       nritems = btrfs_header_nritems(leaf);
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+               if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY)
+                       break;
+
+               di = btrfs_match_dir_item_name(root, path, name, name_len);
+               if (di)
+                       return di;
+
+               path->slots[0]++;
+       }
+       return NULL;
+}
+
  struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
                                           struct btrfs_root *root,
                                           struct btrfs_path *path, u64 dir,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 6c4173146bb739060de46b6776b3f471250491ee..644e796fd643e045ca0b0ed057743b7eb5e88438 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -41,6 +41,7 @@
  
  static struct extent_io_ops btree_extent_io_ops;
  static void end_workqueue_fn(struct btrfs_work *work);
+static void free_fs_root(struct btrfs_root *root);
  
  static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
  
@@ -123,15 +124,15 @@ static struct extent_map *btree_get_extent(struct inode *inode,
         struct extent_map *em;
         int ret;
  
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
         em = lookup_extent_mapping(em_tree, start, len);
         if (em) {
                 em->bdev =
                         BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
                 goto out;
         }
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
  
         em = alloc_extent_map(GFP_NOFS);
         if (!em) {
@@ -144,7 +145,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
         em->block_start = 0;
         em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
  
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
         ret = add_extent_mapping(em_tree, em);
         if (ret == -EEXIST) {
                 u64 failed_start = em->start;
@@ -163,7 +164,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
                 free_extent_map(em);
                 em = NULL;
         }
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
  
         if (ret)
                 em = ERR_PTR(ret);
@@ -895,8 +896,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
         root->fs_info = fs_info;
         root->objectid = objectid;
         root->last_trans = 0;
-       root->highest_inode = 0;
-       root->last_inode_alloc = 0;
+       root->highest_objectid = 0;
         root->name = NULL;
         root->in_sysfs = 0;
         root->inode_tree.rb_node = NULL;
@@ -952,14 +952,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
                      root, fs_info, objectid);
         ret = btrfs_find_last_root(tree_root, objectid,
                                    &root->root_item, &root->root_key);
+       if (ret > 0)
+               return -ENOENT;
         BUG_ON(ret);
  
         generation = btrfs_root_generation(&root->root_item);
         blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
         root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
                                      blocksize, generation);
-       root->commit_root = btrfs_root_node(root);
         BUG_ON(!root->node);
+       root->commit_root = btrfs_root_node(root);
         return 0;
  }
  
@@ -1095,7 +1097,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
         struct btrfs_fs_info *fs_info = tree_root->fs_info;
         struct btrfs_path *path;
         struct extent_buffer *l;
-       u64 highest_inode;
         u64 generation;
         u32 blocksize;
         int ret = 0;
@@ -1110,7 +1111,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
                         kfree(root);
                         return ERR_PTR(ret);
                 }
-               goto insert;
+               goto out;
         }
  
         __setup_root(tree_root->nodesize, tree_root->leafsize,
@@ -1120,39 +1121,30 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
         path = btrfs_alloc_path();
         BUG_ON(!path);
         ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
-       if (ret != 0) {
-               if (ret > 0)
-                       ret = -ENOENT;
-               goto out;
+       if (ret == 0) {
+               l = path->nodes[0];
+               read_extent_buffer(l, &root->root_item,
+                               btrfs_item_ptr_offset(l, path->slots[0]),
+                               sizeof(root->root_item));
+               memcpy(&root->root_key, location, sizeof(*location));
         }
-       l = path->nodes[0];
-       read_extent_buffer(l, &root->root_item,
-              btrfs_item_ptr_offset(l, path->slots[0]),
-              sizeof(root->root_item));
-       memcpy(&root->root_key, location, sizeof(*location));
-       ret = 0;
-out:
-       btrfs_release_path(root, path);
         btrfs_free_path(path);
         if (ret) {
-               kfree(root);
+               if (ret > 0)
+                       ret = -ENOENT;
                 return ERR_PTR(ret);
         }
+
         generation = btrfs_root_generation(&root->root_item);
         blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
         root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
                                      blocksize, generation);
         root->commit_root = btrfs_root_node(root);
         BUG_ON(!root->node);
-insert:
-       if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
+out:
+       if (location->objectid != BTRFS_TREE_LOG_OBJECTID)
                 root->ref_cows = 1;
-               ret = btrfs_find_highest_inode(root, &highest_inode);
-               if (ret == 0) {
-                       root->highest_inode = highest_inode;
-                       root->last_inode_alloc = highest_inode;
-               }
-       }
+
         return root;
  }
  
@@ -1187,39 +1179,66 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
                 return fs_info->dev_root;
         if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
                 return fs_info->csum_root;
-
+again:
+       spin_lock(&fs_info->fs_roots_radix_lock);
         root = radix_tree_lookup(&fs_info->fs_roots_radix,
                                  (unsigned long)location->objectid);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
         if (root)
                 return root;
  
+       ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
+       if (ret == 0)
+               ret = -ENOENT;
+       if (ret < 0)
+               return ERR_PTR(ret);
+
         root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
         if (IS_ERR(root))
                 return root;
  
+       WARN_ON(btrfs_root_refs(&root->root_item) == 0);
         set_anon_super(&root->anon_super, NULL);
  
+       ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+       if (ret)
+               goto fail;
+
+       spin_lock(&fs_info->fs_roots_radix_lock);
         ret = radix_tree_insert(&fs_info->fs_roots_radix,
                                 (unsigned long)root->root_key.objectid,
                                 root);
+       if (ret == 0)
+               root->in_radix = 1;
+       spin_unlock(&fs_info->fs_roots_radix_lock);
+       radix_tree_preload_end();
         if (ret) {
-               free_extent_buffer(root->node);
-               kfree(root);
-               return ERR_PTR(ret);
+               if (ret == -EEXIST) {
+                       free_fs_root(root);
+                       goto again;
+               }
+               goto fail;
         }
-       if (!(fs_info->sb->s_flags & MS_RDONLY)) {
-               ret = btrfs_find_dead_roots(fs_info->tree_root,
-                                           root->root_key.objectid);
-               BUG_ON(ret);
+
+       ret = btrfs_find_dead_roots(fs_info->tree_root,
+                                   root->root_key.objectid);
+       WARN_ON(ret);
+
+       if (!(fs_info->sb->s_flags & MS_RDONLY))
                 btrfs_orphan_cleanup(root);
-       }
+
         return root;
+fail:
+       free_fs_root(root);
+       return ERR_PTR(ret);
  }
  
  struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
                                       struct btrfs_key *location,
                                       const char *name, int namelen)
  {
+       return btrfs_read_fs_root_no_name(fs_info, location);
+#if 0
         struct btrfs_root *root;
         int ret;
  
@@ -1236,7 +1255,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
                 kfree(root);
                 return ERR_PTR(ret);
         }
-#if 0
+
         ret = btrfs_sysfs_add_root(root);
         if (ret) {
                 free_extent_buffer(root->node);
@@ -1244,9 +1263,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
                 kfree(root);
                 return ERR_PTR(ret);
         }
-#endif
         root->in_sysfs = 1;
         return root;
+#endif
  }
  
  static int btrfs_congested_fn(void *congested_data, int bdi_bits)
@@ -1325,9 +1344,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
         offset = page_offset(page);
  
         em_tree = &BTRFS_I(inode)->extent_tree;
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
         em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
         if (!em) {
                 __unplug_io_fn(bdi, page);
                 return;
@@ -1360,8 +1379,10 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
  
         err = bdi_register(bdi, NULL, "btrfs-%d",
                                 atomic_inc_return(&btrfs_bdi_num));
-       if (err)
+       if (err) {
+               bdi_destroy(bdi);
                 return err;
+       }
  
         bdi->ra_pages   = default_backing_dev_info.ra_pages;
         bdi->unplug_io_fn       = btrfs_unplug_io_fn;
@@ -1451,9 +1472,12 @@ static int cleaner_kthread(void *arg)
                         break;
  
                 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
-               mutex_lock(&root->fs_info->cleaner_mutex);
-               btrfs_clean_old_snapshots(root);
-               mutex_unlock(&root->fs_info->cleaner_mutex);
+
+               if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
+                   mutex_trylock(&root->fs_info->cleaner_mutex)) {
+                       btrfs_clean_old_snapshots(root);
+                       mutex_unlock(&root->fs_info->cleaner_mutex);
+               }
  
                 if (freezing(current)) {
                         refrigerator();
@@ -1558,15 +1582,36 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                 err = -ENOMEM;
                 goto fail;
         }
-       INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
+
+       ret = init_srcu_struct(&fs_info->subvol_srcu);
+       if (ret) {
+               err = ret;
+               goto fail;
+       }
+
+       ret = setup_bdi(fs_info, &fs_info->bdi);
+       if (ret) {
+               err = ret;
+               goto fail_srcu;
+       }
+
+       fs_info->btree_inode = new_inode(sb);
+       if (!fs_info->btree_inode) {
+               err = -ENOMEM;
+               goto fail_bdi;
+       }
+
+       INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
         INIT_LIST_HEAD(&fs_info->trans_list);
         INIT_LIST_HEAD(&fs_info->dead_roots);
         INIT_LIST_HEAD(&fs_info->hashers);
         INIT_LIST_HEAD(&fs_info->delalloc_inodes);
         INIT_LIST_HEAD(&fs_info->ordered_operations);
+       INIT_LIST_HEAD(&fs_info->caching_block_groups);
         spin_lock_init(&fs_info->delalloc_lock);
         spin_lock_init(&fs_info->new_trans_lock);
         spin_lock_init(&fs_info->ref_cache_lock);
+       spin_lock_init(&fs_info->fs_roots_radix_lock);
  
         init_completion(&fs_info->kobj_unregister);
         fs_info->tree_root = tree_root;
@@ -1585,11 +1630,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
         fs_info->sb = sb;
         fs_info->max_extent = (u64)-1;
         fs_info->max_inline = 8192 * 1024;
-       if (setup_bdi(fs_info, &fs_info->bdi))
-               goto fail_bdi;
-       fs_info->btree_inode = new_inode(sb);
-       fs_info->btree_inode->i_ino = 1;
-       fs_info->btree_inode->i_nlink = 1;
         fs_info->metadata_ratio = 8;
  
         fs_info->thread_pool_size = min_t(unsigned long,
@@ -1602,6 +1642,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
         sb->s_blocksize_bits = blksize_bits(4096);
         sb->s_bdi = &fs_info->bdi;
  
+       fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
+       fs_info->btree_inode->i_nlink = 1;
         /*
          * we set the i_size on the btree inode to the max possible int.
          * the real end of the address space is determined by all of
@@ -1620,28 +1662,32 @@ struct btrfs_root *open_ctree(struct super_block *sb,
  
         BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
  
+       BTRFS_I(fs_info->btree_inode)->root = tree_root;
+       memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
+              sizeof(struct btrfs_key));
+       BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
+       insert_inode_hash(fs_info->btree_inode);
+
         spin_lock_init(&fs_info->block_group_cache_lock);
         fs_info->block_group_cache_tree.rb_node = NULL;
  
-       extent_io_tree_init(&fs_info->pinned_extents,
+       extent_io_tree_init(&fs_info->freed_extents[0],
                              fs_info->btree_inode->i_mapping, GFP_NOFS);
+       extent_io_tree_init(&fs_info->freed_extents[1],
+                            fs_info->btree_inode->i_mapping, GFP_NOFS);
+       fs_info->pinned_extents = &fs_info->freed_extents[0];
         fs_info->do_barriers = 1;
  
-       BTRFS_I(fs_info->btree_inode)->root = tree_root;
-       memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
-              sizeof(struct btrfs_key));
-       insert_inode_hash(fs_info->btree_inode);
  
         mutex_init(&fs_info->trans_mutex);
         mutex_init(&fs_info->ordered_operations_mutex);
         mutex_init(&fs_info->tree_log_mutex);
-       mutex_init(&fs_info->drop_mutex);
         mutex_init(&fs_info->chunk_mutex);
         mutex_init(&fs_info->transaction_kthread_mutex);
         mutex_init(&fs_info->cleaner_mutex);
         mutex_init(&fs_info->volume_mutex);
-       mutex_init(&fs_info->tree_reloc_mutex);
         init_rwsem(&fs_info->extent_commit_sem);
+       init_rwsem(&fs_info->subvol_sem);
  
         btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
         btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -1700,7 +1746,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                 err = -EINVAL;
                 goto fail_iput;
         }
-
+printk("thread pool is %d\n", fs_info->thread_pool_size);
         /*
          * we need to start all the end_io workers up front because the
          * queue work function gets called at interrupt time, and so it
@@ -1745,20 +1791,22 @@ struct btrfs_root *open_ctree(struct super_block *sb,
         fs_info->endio_workers.idle_thresh = 4;
         fs_info->endio_meta_workers.idle_thresh = 4;
  
-       fs_info->endio_write_workers.idle_thresh = 64;
-       fs_info->endio_meta_write_workers.idle_thresh = 64;
+       fs_info->endio_write_workers.idle_thresh = 2;
+       fs_info->endio_meta_write_workers.idle_thresh = 2;
+
+       fs_info->endio_workers.atomic_worker_start = 1;
+       fs_info->endio_meta_workers.atomic_worker_start = 1;
+       fs_info->endio_write_workers.atomic_worker_start = 1;
+       fs_info->endio_meta_write_workers.atomic_worker_start = 1;
  
         btrfs_start_workers(&fs_info->workers, 1);
         btrfs_start_workers(&fs_info->submit_workers, 1);
         btrfs_start_workers(&fs_info->delalloc_workers, 1);
         btrfs_start_workers(&fs_info->fixup_workers, 1);
-       btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
-       btrfs_start_workers(&fs_info->endio_meta_workers,
-                           fs_info->thread_pool_size);
-       btrfs_start_workers(&fs_info->endio_meta_write_workers,
-                           fs_info->thread_pool_size);
-       btrfs_start_workers(&fs_info->endio_write_workers,
-                           fs_info->thread_pool_size);
+       btrfs_start_workers(&fs_info->endio_workers, 1);
+       btrfs_start_workers(&fs_info->endio_meta_workers, 1);
+       btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
+       btrfs_start_workers(&fs_info->endio_write_workers, 1);
  
         fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
         fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1918,6 +1966,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                 }
         }
  
+       ret = btrfs_find_orphan_roots(tree_root);
+       BUG_ON(ret);
+
         if (!(sb->s_flags & MS_RDONLY)) {
                 ret = btrfs_recover_relocation(tree_root);
                 BUG_ON(ret);
@@ -1977,6 +2028,8 @@ fail_iput:
         btrfs_mapping_tree_free(&fs_info->mapping_tree);
  fail_bdi:
         bdi_destroy(&fs_info->bdi);
+fail_srcu:
+       cleanup_srcu_struct(&fs_info->subvol_srcu);
  fail:
         kfree(extent_root);
         kfree(tree_root);
@@ -2236,20 +2289,29 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
  
  int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
  {
-       WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
+       spin_lock(&fs_info->fs_roots_radix_lock);
         radix_tree_delete(&fs_info->fs_roots_radix,
                           (unsigned long)root->root_key.objectid);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
+
+       if (btrfs_root_refs(&root->root_item) == 0)
+               synchronize_srcu(&fs_info->subvol_srcu);
+
+       free_fs_root(root);
+       return 0;
+}
+
+static void free_fs_root(struct btrfs_root *root)
+{
+       WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
         if (root->anon_super.s_dev) {
                 down_write(&root->anon_super.s_umount);
                 kill_anon_super(&root->anon_super);
         }
-       if (root->node)
-               free_extent_buffer(root->node);
-       if (root->commit_root)
-               free_extent_buffer(root->commit_root);
+       free_extent_buffer(root->node);
+       free_extent_buffer(root->commit_root);
         kfree(root->name);
         kfree(root);
-       return 0;
  }
  
  static int del_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2258,6 +2320,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info)
         struct btrfs_root *gang[8];
         int i;
  
+       while (!list_empty(&fs_info->dead_roots)) {
+               gang[0] = list_entry(fs_info->dead_roots.next,
+                                    struct btrfs_root, root_list);
+               list_del(&gang[0]->root_list);
+
+               if (gang[0]->in_radix) {
+                       btrfs_free_fs_root(fs_info, gang[0]);
+               } else {
+                       free_extent_buffer(gang[0]->node);
+                       free_extent_buffer(gang[0]->commit_root);
+                       kfree(gang[0]);
+               }
+       }
+
         while (1) {
                 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
                                              (void **)gang, 0,
@@ -2287,9 +2363,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
                 root_objectid = gang[ret - 1]->root_key.objectid + 1;
                 for (i = 0; i < ret; i++) {
                         root_objectid = gang[i]->root_key.objectid;
-                       ret = btrfs_find_dead_roots(fs_info->tree_root,
-                                                   root_objectid);
-                       BUG_ON(ret);
                         btrfs_orphan_cleanup(gang[i]);
                 }
                 root_objectid++;
@@ -2359,7 +2432,6 @@ int close_ctree(struct btrfs_root *root)
         free_extent_buffer(root->fs_info->csum_root->commit_root);
  
         btrfs_free_block_groups(root->fs_info);
-       btrfs_free_pinned_extents(root->fs_info);
  
         del_fs_roots(fs_info);
  
@@ -2378,6 +2450,7 @@ int close_ctree(struct btrfs_root *root)
         btrfs_mapping_tree_free(&fs_info->mapping_tree);
  
         bdi_destroy(&fs_info->bdi);
+       cleanup_srcu_struct(&fs_info->subvol_srcu);
  
         kfree(fs_info->extent_root);
         kfree(fs_info->tree_root);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c

index 9596b40caa4ea3dd307405a68153a49a3f143483..ba5c3fd5ab8c89e3057aa612f1bd929c1589b2b3 100644 (file)
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -28,7 +28,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
         len  = BTRFS_FID_SIZE_NON_CONNECTABLE;
         type = FILEID_BTRFS_WITHOUT_PARENT;
  
-       fid->objectid = BTRFS_I(inode)->location.objectid;
+       fid->objectid = inode->i_ino;
         fid->root_objectid = BTRFS_I(inode)->root->objectid;
         fid->gen = inode->i_generation;
  
@@ -60,34 +60,61 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
  }
  
  static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
-                                      u64 root_objectid, u32 generation)
+                                      u64 root_objectid, u32 generation,
+                                      int check_generation)
  {
+       struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info;
         struct btrfs_root *root;
+       struct dentry *dentry;
         struct inode *inode;
         struct btrfs_key key;
+       int index;
+       int err = 0;
+
+       if (objectid < BTRFS_FIRST_FREE_OBJECTID)
+               return ERR_PTR(-ESTALE);
  
         key.objectid = root_objectid;
         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
         key.offset = (u64)-1;
  
-       root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key);
-       if (IS_ERR(root))
-               return ERR_CAST(root);
+       index = srcu_read_lock(&fs_info->subvol_srcu);
+
+       root = btrfs_read_fs_root_no_name(fs_info, &key);
+       if (IS_ERR(root)) {
+               err = PTR_ERR(root);
+               goto fail;
+       }
+
+       if (btrfs_root_refs(&root->root_item) == 0) {
+               err = -ENOENT;
+               goto fail;
+       }
  
         key.objectid = objectid;
         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
         key.offset = 0;
  
         inode = btrfs_iget(sb, &key, root);
-       if (IS_ERR(inode))
-               return (void *)inode;
+       if (IS_ERR(inode)) {
+               err = PTR_ERR(inode);
+               goto fail;
+       }
+
+       srcu_read_unlock(&fs_info->subvol_srcu, index);
  
-       if (generation != inode->i_generation) {
+       if (check_generation && generation != inode->i_generation) {
                 iput(inode);
                 return ERR_PTR(-ESTALE);
         }
  
-       return d_obtain_alias(inode);
+       dentry = d_obtain_alias(inode);
+       if (!IS_ERR(dentry))
+               dentry->d_op = &btrfs_dentry_operations;
+       return dentry;
+fail:
+       srcu_read_unlock(&fs_info->subvol_srcu, index);
+       return ERR_PTR(err);
  }
  
  static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
@@ -111,7 +138,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
         objectid = fid->parent_objectid;
         generation = fid->parent_gen;
  
-       return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+       return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
  }
  
  static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
@@ -133,66 +160,76 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
         root_objectid = fid->root_objectid;
         generation = fid->gen;
  
-       return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+       return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
  }
  
  static struct dentry *btrfs_get_parent(struct dentry *child)
  {
         struct inode *dir = child->d_inode;
+       static struct dentry *dentry;
         struct btrfs_root *root = BTRFS_I(dir)->root;
-       struct btrfs_key key;
         struct btrfs_path *path;
         struct extent_buffer *leaf;
-       int slot;
-       u64 objectid;
+       struct btrfs_root_ref *ref;
+       struct btrfs_key key;
+       struct btrfs_key found_key;
         int ret;
  
         path = btrfs_alloc_path();
  
-       key.objectid = dir->i_ino;
-       btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
-       key.offset = (u64)-1;
+       if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+               key.objectid = root->root_key.objectid;
+               key.type = BTRFS_ROOT_BACKREF_KEY;
+               key.offset = (u64)-1;
+               root = root->fs_info->tree_root;
+       } else {
+               key.objectid = dir->i_ino;
+               key.type = BTRFS_INODE_REF_KEY;
+               key.offset = (u64)-1;
+       }
  
         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-       if (ret < 0) {
-               /* Error */
-               btrfs_free_path(path);
-               return ERR_PTR(ret);
+       if (ret < 0)
+               goto fail;
+
+       BUG_ON(ret == 0);
+       if (path->slots[0] == 0) {
+               ret = -ENOENT;
+               goto fail;
         }
+
+       path->slots[0]--;
         leaf = path->nodes[0];
-       slot = path->slots[0];
-       if (ret) {
-               /* btrfs_search_slot() returns the slot where we'd want to
-                  insert a backref for parent inode #0xFFFFFFFFFFFFFFFF.
-                  The _real_ backref, telling us what the parent inode
-                  _actually_ is, will be in the slot _before_ the one
-                  that btrfs_search_slot() returns. */
-               if (!slot) {
-                       /* Unless there is _no_ key in the tree before... */
-                       btrfs_free_path(path);
-                       return ERR_PTR(-EIO);
-               }
-               slot--;
+
+       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+       if (found_key.objectid != key.objectid || found_key.type != key.type) {
+               ret = -ENOENT;
+               goto fail;
         }
  
-       btrfs_item_key_to_cpu(leaf, &key, slot);
+       if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
+               ref = btrfs_item_ptr(leaf, path->slots[0],
+                                    struct btrfs_root_ref);
+               key.objectid = btrfs_root_ref_dirid(leaf, ref);
+       } else {
+               key.objectid = found_key.offset;
+       }
         btrfs_free_path(path);
  
-       if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
-               return ERR_PTR(-EINVAL);
-
-       objectid = key.offset;
-
-       /* If we are already at the root of a subvol, return the real root */
-       if (objectid == dir->i_ino)
-               return dget(dir->i_sb->s_root);
+       if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
+               return btrfs_get_dentry(root->fs_info->sb, key.objectid,
+                                       found_key.offset, 0, 0);
+       }
  
-       /* Build a new key for the inode item */
-       key.objectid = objectid;
-       btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+       key.type = BTRFS_INODE_ITEM_KEY;
         key.offset = 0;
-
-       return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
+       dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
+       if (!IS_ERR(dentry))
+               dentry->d_op = &btrfs_dentry_operations;
+       return dentry;
+fail:
+       btrfs_free_path(path);
+       return ERR_PTR(ret);
  }
  
  const struct export_operations btrfs_export_ops = {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 535f85ba104f41fea1c28820533da7e1af1512c3..993f93ff7ba695c97b490f5e2d5d6d3b5939980c 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -32,12 +32,12 @@
  #include "locking.h"
  #include "free-space-cache.h"
  
-static int update_reserved_extents(struct btrfs_root *root,
-                                  u64 bytenr, u64 num, int reserve);
  static int update_block_group(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
                               u64 bytenr, u64 num_bytes, int alloc,
                               int mark_free);
+static int update_reserved_extents(struct btrfs_block_group_cache *cache,
+                                  u64 num_bytes, int reserve);
  static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 u64 bytenr, u64 num_bytes, u64 parent,
@@ -57,10 +57,17 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
                                      u64 parent, u64 root_objectid,
                                      u64 flags, struct btrfs_disk_key *key,
                                      int level, struct btrfs_key *ins);
-
  static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                           struct btrfs_root *extent_root, u64 alloc_bytes,
                           u64 flags, int force);
+static int pin_down_bytes(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root,
+                         struct btrfs_path *path,
+                         u64 bytenr, u64 num_bytes,
+                         int is_data, int reserved,
+                         struct extent_buffer **must_clean);
+static int find_next_key(struct btrfs_path *path, int level,
+                        struct btrfs_key *key);
  
  static noinline int
  block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -153,34 +160,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
         return ret;
  }
  
-/*
- * We always set EXTENT_LOCKED for the super mirror extents so we don't
- * overwrite them, so those bits need to be unset.  Also, if we are unmounting
- * with pinned extents still sitting there because we had a block group caching,
- * we need to clear those now, since we are done.
- */
-void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
+static int add_excluded_extent(struct btrfs_root *root,
+                              u64 start, u64 num_bytes)
  {
-       u64 start, end, last = 0;
-       int ret;
+       u64 end = start + num_bytes - 1;
+       set_extent_bits(&root->fs_info->freed_extents[0],
+                       start, end, EXTENT_UPTODATE, GFP_NOFS);
+       set_extent_bits(&root->fs_info->freed_extents[1],
+                       start, end, EXTENT_UPTODATE, GFP_NOFS);
+       return 0;
+}
  
-       while (1) {
-               ret = find_first_extent_bit(&info->pinned_extents, last,
-                                           &start, &end,
-                                           EXTENT_LOCKED|EXTENT_DIRTY);
-               if (ret)
-                       break;
+static void free_excluded_extents(struct btrfs_root *root,
+                                 struct btrfs_block_group_cache *cache)
+{
+       u64 start, end;
  
-               clear_extent_bits(&info->pinned_extents, start, end,
-                                 EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS);
-               last = end+1;
-       }
+       start = cache->key.objectid;
+       end = start + cache->key.offset - 1;
+
+       clear_extent_bits(&root->fs_info->freed_extents[0],
+                         start, end, EXTENT_UPTODATE, GFP_NOFS);
+       clear_extent_bits(&root->fs_info->freed_extents[1],
+                         start, end, EXTENT_UPTODATE, GFP_NOFS);
  }
  
-static int remove_sb_from_cache(struct btrfs_root *root,
-                               struct btrfs_block_group_cache *cache)
+static int exclude_super_stripes(struct btrfs_root *root,
+                                struct btrfs_block_group_cache *cache)
  {
-       struct btrfs_fs_info *fs_info = root->fs_info;
         u64 bytenr;
         u64 *logical;
         int stripe_len;
@@ -192,17 +199,42 @@ static int remove_sb_from_cache(struct btrfs_root *root,
                                        cache->key.objectid, bytenr,
                                        0, &logical, &nr, &stripe_len);
                 BUG_ON(ret);
+
                 while (nr--) {
-                       try_lock_extent(&fs_info->pinned_extents,
-                                       logical[nr],
-                                       logical[nr] + stripe_len - 1, GFP_NOFS);
+                       cache->bytes_super += stripe_len;
+                       ret = add_excluded_extent(root, logical[nr],
+                                                 stripe_len);
+                       BUG_ON(ret);
                 }
+
                 kfree(logical);
         }
-
         return 0;
  }
  
+static struct btrfs_caching_control *
+get_caching_control(struct btrfs_block_group_cache *cache)
+{
+       struct btrfs_caching_control *ctl;
+
+       spin_lock(&cache->lock);
+       if (cache->cached != BTRFS_CACHE_STARTED) {
+               spin_unlock(&cache->lock);
+               return NULL;
+       }
+
+       ctl = cache->caching_ctl;
+       atomic_inc(&ctl->count);
+       spin_unlock(&cache->lock);
+       return ctl;
+}
+
+static void put_caching_control(struct btrfs_caching_control *ctl)
+{
+       if (atomic_dec_and_test(&ctl->count))
+               kfree(ctl);
+}
+
  /*
   * this is only called by cache_block_group, since we could have freed extents
   * we need to check the pinned_extents for any extents that can't be used yet
@@ -215,9 +247,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
         int ret;
  
         while (start < end) {
-               ret = find_first_extent_bit(&info->pinned_extents, start,
+               ret = find_first_extent_bit(info->pinned_extents, start,
                                             &extent_start, &extent_end,
-                                           EXTENT_DIRTY|EXTENT_LOCKED);
+                                           EXTENT_DIRTY | EXTENT_UPTODATE);
                 if (ret)
                         break;
  
@@ -249,22 +281,27 @@ static int caching_kthread(void *data)
  {
         struct btrfs_block_group_cache *block_group = data;
         struct btrfs_fs_info *fs_info = block_group->fs_info;
-       u64 last = 0;
+       struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
+       struct btrfs_root *extent_root = fs_info->extent_root;
         struct btrfs_path *path;
-       int ret = 0;
-       struct btrfs_key key;
         struct extent_buffer *leaf;
-       int slot;
+       struct btrfs_key key;
         u64 total_found = 0;
-
-       BUG_ON(!fs_info);
+       u64 last = 0;
+       u32 nritems;
+       int ret = 0;
  
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
  
-       atomic_inc(&block_group->space_info->caching_threads);
+       exclude_super_stripes(extent_root, block_group);
+       spin_lock(&block_group->space_info->lock);
+       block_group->space_info->bytes_super += block_group->bytes_super;
+       spin_unlock(&block_group->space_info->lock);
+
         last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+
         /*
          * We don't want to deadlock with somebody trying to allocate a new
          * extent for the extent root while also trying to search the extent
@@ -277,74 +314,64 @@ static int caching_kthread(void *data)
  
         key.objectid = last;
         key.offset = 0;
-       btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+       key.type = BTRFS_EXTENT_ITEM_KEY;
  again:
+       mutex_lock(&caching_ctl->mutex);
         /* need to make sure the commit_root doesn't disappear */
         down_read(&fs_info->extent_commit_sem);
  
-       ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
+       ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
         if (ret < 0)
                 goto err;
  
+       leaf = path->nodes[0];
+       nritems = btrfs_header_nritems(leaf);
+
         while (1) {
                 smp_mb();
-               if (block_group->fs_info->closing > 1) {
+               if (fs_info->closing > 1) {
                         last = (u64)-1;
                         break;
                 }
  
-               leaf = path->nodes[0];
-               slot = path->slots[0];
-               if (slot >= btrfs_header_nritems(leaf)) {
-                       ret = btrfs_next_leaf(fs_info->extent_root, path);
-                       if (ret < 0)
-                               goto err;
-                       else if (ret)
+               if (path->slots[0] < nritems) {
+                       btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+               } else {
+                       ret = find_next_key(path, 0, &key);
+                       if (ret)
                                 break;
  
-                       if (need_resched() ||
-                           btrfs_transaction_in_commit(fs_info)) {
-                               leaf = path->nodes[0];
-
-                               /* this shouldn't happen, but if the
-                                * leaf is empty just move on.
-                                */
-                               if (btrfs_header_nritems(leaf) == 0)
-                                       break;
-                               /*
-                                * we need to copy the key out so that
-                                * we are sure the next search advances
-                                * us forward in the btree.
-                                */
-                               btrfs_item_key_to_cpu(leaf, &key, 0);
-                               btrfs_release_path(fs_info->extent_root, path);
-                               up_read(&fs_info->extent_commit_sem);
+                       caching_ctl->progress = last;
+                       btrfs_release_path(extent_root, path);
+                       up_read(&fs_info->extent_commit_sem);
+                       mutex_unlock(&caching_ctl->mutex);
+                       if (btrfs_transaction_in_commit(fs_info))
                                 schedule_timeout(1);
-                               goto again;
-                       }
+                       else
+                               cond_resched();
+                       goto again;
+               }
  
+               if (key.objectid < block_group->key.objectid) {
+                       path->slots[0]++;
                         continue;
                 }
-               btrfs_item_key_to_cpu(leaf, &key, slot);
-               if (key.objectid < block_group->key.objectid)
-                       goto next;
  
                 if (key.objectid >= block_group->key.objectid +
                     block_group->key.offset)
                         break;
  
-               if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
+               if (key.type == BTRFS_EXTENT_ITEM_KEY) {
                         total_found += add_new_free_space(block_group,
                                                           fs_info, last,
                                                           key.objectid);
                         last = key.objectid + key.offset;
-               }
  
-               if (total_found > (1024 * 1024 * 2)) {
-                       total_found = 0;
-                       wake_up(&block_group->caching_q);
+                       if (total_found > (1024 * 1024 * 2)) {
+                               total_found = 0;
+                               wake_up(&caching_ctl->wait);
+                       }
                 }
-next:
                 path->slots[0]++;
         }
         ret = 0;
@@ -352,33 +379,65 @@ next:
         total_found += add_new_free_space(block_group, fs_info, last,
                                           block_group->key.objectid +
                                           block_group->key.offset);
+       caching_ctl->progress = (u64)-1;
  
         spin_lock(&block_group->lock);
+       block_group->caching_ctl = NULL;
         block_group->cached = BTRFS_CACHE_FINISHED;
         spin_unlock(&block_group->lock);
  
  err:
         btrfs_free_path(path);
         up_read(&fs_info->extent_commit_sem);
-       atomic_dec(&block_group->space_info->caching_threads);
-       wake_up(&block_group->caching_q);
  
+       free_excluded_extents(extent_root, block_group);
+
+       mutex_unlock(&caching_ctl->mutex);
+       wake_up(&caching_ctl->wait);
+
+       put_caching_control(caching_ctl);
+       atomic_dec(&block_group->space_info->caching_threads);
         return 0;
  }
  
  static int cache_block_group(struct btrfs_block_group_cache *cache)
  {
+       struct btrfs_fs_info *fs_info = cache->fs_info;
+       struct btrfs_caching_control *caching_ctl;
         struct task_struct *tsk;
         int ret = 0;
  
+       smp_mb();
+       if (cache->cached != BTRFS_CACHE_NO)
+               return 0;
+
+       caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
+       BUG_ON(!caching_ctl);
+
+       INIT_LIST_HEAD(&caching_ctl->list);
+       mutex_init(&caching_ctl->mutex);
+       init_waitqueue_head(&caching_ctl->wait);
+       caching_ctl->block_group = cache;
+       caching_ctl->progress = cache->key.objectid;
+       /* one for caching kthread, one for caching block group list */
+       atomic_set(&caching_ctl->count, 2);
+
         spin_lock(&cache->lock);
         if (cache->cached != BTRFS_CACHE_NO) {
                 spin_unlock(&cache->lock);
-               return ret;
+               kfree(caching_ctl);
+               return 0;
         }
+       cache->caching_ctl = caching_ctl;
         cache->cached = BTRFS_CACHE_STARTED;
         spin_unlock(&cache->lock);
  
+       down_write(&fs_info->extent_commit_sem);
+       list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
+       up_write(&fs_info->extent_commit_sem);
+
+       atomic_inc(&cache->space_info->caching_threads);
+
         tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
                           cache->key.objectid);
         if (IS_ERR(tsk)) {
@@ -1657,7 +1716,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
                                                  parent, ref_root, flags,
                                                  ref->objectid, ref->offset,
                                                  &ins, node->ref_mod);
-               update_reserved_extents(root, ins.objectid, ins.offset, 0);
         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
                 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
                                              node->num_bytes, parent,
@@ -1783,7 +1841,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
                                                 extent_op->flags_to_set,
                                                 &extent_op->key,
                                                 ref->level, &ins);
-               update_reserved_extents(root, ins.objectid, ins.offset, 0);
         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
                 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
                                              node->num_bytes, parent, ref_root,
@@ -1818,16 +1875,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
                 BUG_ON(extent_op);
                 head = btrfs_delayed_node_to_head(node);
                 if (insert_reserved) {
+                       int mark_free = 0;
+                       struct extent_buffer *must_clean = NULL;
+
+                       ret = pin_down_bytes(trans, root, NULL,
+                                            node->bytenr, node->num_bytes,
+                                            head->is_data, 1, &must_clean);
+                       if (ret > 0)
+                               mark_free = 1;
+
+                       if (must_clean) {
+                               clean_tree_block(NULL, root, must_clean);
+                               btrfs_tree_unlock(must_clean);
+                               free_extent_buffer(must_clean);
+                       }
                         if (head->is_data) {
                                 ret = btrfs_del_csums(trans, root,
                                                       node->bytenr,
                                                       node->num_bytes);
                                 BUG_ON(ret);
                         }
-                       btrfs_update_pinned_extents(root, node->bytenr,
-                                                   node->num_bytes, 1);
-                       update_reserved_extents(root, node->bytenr,
-                                               node->num_bytes, 0);
+                       if (mark_free) {
+                               ret = btrfs_free_reserved_extent(root,
+                                                       node->bytenr,
+                                                       node->num_bytes);
+                               BUG_ON(ret);
+                       }
                 }
                 mutex_unlock(&head->mutex);
                 return 0;
@@ -2706,6 +2779,8 @@ int btrfs_check_metadata_free_space(struct btrfs_root *root)
         /* get the space info for where the metadata will live */
         alloc_target = btrfs_get_alloc_profile(root, 0);
         meta_sinfo = __find_space_info(info, alloc_target);
+       if (!meta_sinfo)
+               goto alloc;
  
  again:
         spin_lock(&meta_sinfo->lock);
@@ -2717,12 +2792,13 @@ again:
         do_div(thresh, 100);
  
         if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
-           meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
+           meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
+           meta_sinfo->bytes_super > thresh) {
                 struct btrfs_trans_handle *trans;
                 if (!meta_sinfo->full) {
                         meta_sinfo->force_alloc = 1;
                         spin_unlock(&meta_sinfo->lock);
-
+alloc:
                         trans = btrfs_start_transaction(root, 1);
                         if (!trans)
                                 return -ENOMEM;
@@ -2730,6 +2806,10 @@ again:
                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
                                              2 * 1024 * 1024, alloc_target, 0);
                         btrfs_end_transaction(trans, root);
+                       if (!meta_sinfo) {
+                               meta_sinfo = __find_space_info(info,
+                                                              alloc_target);
+                       }
                         goto again;
                 }
                 spin_unlock(&meta_sinfo->lock);
@@ -2765,13 +2845,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
         bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
  
         data_sinfo = BTRFS_I(inode)->space_info;
+       if (!data_sinfo)
+               goto alloc;
+
  again:
         /* make sure we have enough space to handle the data first */
         spin_lock(&data_sinfo->lock);
         if (data_sinfo->total_bytes - data_sinfo->bytes_used -
             data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
             data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
-           data_sinfo->bytes_may_use < bytes) {
+           data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) {
                 struct btrfs_trans_handle *trans;
  
                 /*
@@ -2783,7 +2866,7 @@ again:
  
                         data_sinfo->force_alloc = 1;
                         spin_unlock(&data_sinfo->lock);
-
+alloc:
                         alloc_target = btrfs_get_alloc_profile(root, 1);
                         trans = btrfs_start_transaction(root, 1);
                         if (!trans)
@@ -2795,6 +2878,11 @@ again:
                         btrfs_end_transaction(trans, root);
                         if (ret)
                                 return ret;
+
+                       if (!data_sinfo) {
+                               btrfs_set_inode_space_info(root, inode);
+                               data_sinfo = BTRFS_I(inode)->space_info;
+                       }
                         goto again;
                 }
                 spin_unlock(&data_sinfo->lock);
@@ -3009,10 +3097,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                 num_bytes = min(total, cache->key.offset - byte_in_group);
                 if (alloc) {
                         old_val += num_bytes;
+                       btrfs_set_block_group_used(&cache->item, old_val);
+                       cache->reserved -= num_bytes;
                         cache->space_info->bytes_used += num_bytes;
+                       cache->space_info->bytes_reserved -= num_bytes;
                         if (cache->ro)
                                 cache->space_info->bytes_readonly -= num_bytes;
-                       btrfs_set_block_group_used(&cache->item, old_val);
                         spin_unlock(&cache->lock);
                         spin_unlock(&cache->space_info->lock);
                 } else {
@@ -3057,127 +3147,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
         return bytenr;
  }
  
-int btrfs_update_pinned_extents(struct btrfs_root *root,
-                               u64 bytenr, u64 num, int pin)
+/*
+ * this function must be called within transaction
+ */
+int btrfs_pin_extent(struct btrfs_root *root,
+                    u64 bytenr, u64 num_bytes, int reserved)
  {
-       u64 len;
-       struct btrfs_block_group_cache *cache;
         struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_block_group_cache *cache;
  
-       if (pin)
-               set_extent_dirty(&fs_info->pinned_extents,
-                               bytenr, bytenr + num - 1, GFP_NOFS);
-
-       while (num > 0) {
-               cache = btrfs_lookup_block_group(fs_info, bytenr);
-               BUG_ON(!cache);
-               len = min(num, cache->key.offset -
-                         (bytenr - cache->key.objectid));
-               if (pin) {
-                       spin_lock(&cache->space_info->lock);
-                       spin_lock(&cache->lock);
-                       cache->pinned += len;
-                       cache->space_info->bytes_pinned += len;
-                       spin_unlock(&cache->lock);
-                       spin_unlock(&cache->space_info->lock);
-                       fs_info->total_pinned += len;
-               } else {
-                       int unpin = 0;
+       cache = btrfs_lookup_block_group(fs_info, bytenr);
+       BUG_ON(!cache);
  
-                       /*
-                        * in order to not race with the block group caching, we
-                        * only want to unpin the extent if we are cached.  If
-                        * we aren't cached, we want to start async caching this
-                        * block group so we can free the extent the next time
-                        * around.
-                        */
-                       spin_lock(&cache->space_info->lock);
-                       spin_lock(&cache->lock);
-                       unpin = (cache->cached == BTRFS_CACHE_FINISHED);
-                       if (likely(unpin)) {
-                               cache->pinned -= len;
-                               cache->space_info->bytes_pinned -= len;
-                               fs_info->total_pinned -= len;
-                       }
-                       spin_unlock(&cache->lock);
-                       spin_unlock(&cache->space_info->lock);
+       spin_lock(&cache->space_info->lock);
+       spin_lock(&cache->lock);
+       cache->pinned += num_bytes;
+       cache->space_info->bytes_pinned += num_bytes;
+       if (reserved) {
+               cache->reserved -= num_bytes;
+               cache->space_info->bytes_reserved -= num_bytes;
+       }
+       spin_unlock(&cache->lock);
+       spin_unlock(&cache->space_info->lock);
  
-                       if (likely(unpin))
-                               clear_extent_dirty(&fs_info->pinned_extents,
-                                                  bytenr, bytenr + len -1,
-                                                  GFP_NOFS);
-                       else
-                               cache_block_group(cache);
+       btrfs_put_block_group(cache);
  
-                       if (unpin)
-                               btrfs_add_free_space(cache, bytenr, len);
-               }
-               btrfs_put_block_group(cache);
-               bytenr += len;
-               num -= len;
+       set_extent_dirty(fs_info->pinned_extents,
+                        bytenr, bytenr + num_bytes - 1, GFP_NOFS);
+       return 0;
+}
+
+static int update_reserved_extents(struct btrfs_block_group_cache *cache,
+                                  u64 num_bytes, int reserve)
+{
+       spin_lock(&cache->space_info->lock);
+       spin_lock(&cache->lock);
+       if (reserve) {
+               cache->reserved += num_bytes;
+               cache->space_info->bytes_reserved += num_bytes;
+       } else {
+               cache->reserved -= num_bytes;
+               cache->space_info->bytes_reserved -= num_bytes;
         }
+       spin_unlock(&cache->lock);
+       spin_unlock(&cache->space_info->lock);
         return 0;
  }
  
-static int update_reserved_extents(struct btrfs_root *root,
-                                  u64 bytenr, u64 num, int reserve)
+int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root)
  {
-       u64 len;
-       struct btrfs_block_group_cache *cache;
         struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_caching_control *next;
+       struct btrfs_caching_control *caching_ctl;
+       struct btrfs_block_group_cache *cache;
  
-       while (num > 0) {
-               cache = btrfs_lookup_block_group(fs_info, bytenr);
-               BUG_ON(!cache);
-               len = min(num, cache->key.offset -
-                         (bytenr - cache->key.objectid));
+       down_write(&fs_info->extent_commit_sem);
  
-               spin_lock(&cache->space_info->lock);
-               spin_lock(&cache->lock);
-               if (reserve) {
-                       cache->reserved += len;
-                       cache->space_info->bytes_reserved += len;
+       list_for_each_entry_safe(caching_ctl, next,
+                                &fs_info->caching_block_groups, list) {
+               cache = caching_ctl->block_group;
+               if (block_group_cache_done(cache)) {
+                       cache->last_byte_to_unpin = (u64)-1;
+                       list_del_init(&caching_ctl->list);
+                       put_caching_control(caching_ctl);
                 } else {
-                       cache->reserved -= len;
-                       cache->space_info->bytes_reserved -= len;
+                       cache->last_byte_to_unpin = caching_ctl->progress;
                 }
-               spin_unlock(&cache->lock);
-               spin_unlock(&cache->space_info->lock);
-               btrfs_put_block_group(cache);
-               bytenr += len;
-               num -= len;
         }
+
+       if (fs_info->pinned_extents == &fs_info->freed_extents[0])
+               fs_info->pinned_extents = &fs_info->freed_extents[1];
+       else
+               fs_info->pinned_extents = &fs_info->freed_extents[0];
+
+       up_write(&fs_info->extent_commit_sem);
         return 0;
  }
  
-int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
  {
-       u64 last = 0;
-       u64 start;
-       u64 end;
-       struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
-       int ret;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_block_group_cache *cache = NULL;
+       u64 len;
  
-       while (1) {
-               ret = find_first_extent_bit(pinned_extents, last,
-                                           &start, &end, EXTENT_DIRTY);
-               if (ret)
-                       break;
+       while (start <= end) {
+               if (!cache ||
+                   start >= cache->key.objectid + cache->key.offset) {
+                       if (cache)
+                               btrfs_put_block_group(cache);
+                       cache = btrfs_lookup_block_group(fs_info, start);
+                       BUG_ON(!cache);
+               }
+
+               len = cache->key.objectid + cache->key.offset - start;
+               len = min(len, end + 1 - start);
+
+               if (start < cache->last_byte_to_unpin) {
+                       len = min(len, cache->last_byte_to_unpin - start);
+                       btrfs_add_free_space(cache, start, len);
+               }
+
+               spin_lock(&cache->space_info->lock);
+               spin_lock(&cache->lock);
+               cache->pinned -= len;
+               cache->space_info->bytes_pinned -= len;
+               spin_unlock(&cache->lock);
+               spin_unlock(&cache->space_info->lock);
  
-               set_extent_dirty(copy, start, end, GFP_NOFS);
-               last = end + 1;
+               start += len;
         }
+
+       if (cache)
+               btrfs_put_block_group(cache);
         return 0;
  }
  
  int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
-                              struct btrfs_root *root,
-                              struct extent_io_tree *unpin)
+                              struct btrfs_root *root)
  {
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct extent_io_tree *unpin;
         u64 start;
         u64 end;
         int ret;
  
+       if (fs_info->pinned_extents == &fs_info->freed_extents[0])
+               unpin = &fs_info->freed_extents[1];
+       else
+               unpin = &fs_info->freed_extents[0];
+
         while (1) {
                 ret = find_first_extent_bit(unpin, 0, &start, &end,
                                             EXTENT_DIRTY);
@@ -3186,10 +3285,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
  
                 ret = btrfs_discard_extent(root, start, end + 1 - start);
  
-               /* unlocks the pinned mutex */
-               btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
-
+               unpin_extent_range(root, start, end);
                 cond_resched();
         }
  
@@ -3199,7 +3296,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
  static int pin_down_bytes(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct btrfs_path *path,
-                         u64 bytenr, u64 num_bytes, int is_data,
+                         u64 bytenr, u64 num_bytes,
+                         int is_data, int reserved,
                           struct extent_buffer **must_clean)
  {
         int err = 0;
@@ -3231,15 +3329,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
         }
         free_extent_buffer(buf);
  pinit:
-       btrfs_set_path_blocking(path);
+       if (path)
+               btrfs_set_path_blocking(path);
         /* unlocks the pinned mutex */
-       btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
+       btrfs_pin_extent(root, bytenr, num_bytes, reserved);
  
         BUG_ON(err < 0);
         return 0;
  }
  
-
  static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 u64 bytenr, u64 num_bytes, u64 parent,
@@ -3413,7 +3511,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                 }
  
                 ret = pin_down_bytes(trans, root, path, bytenr,
-                                    num_bytes, is_data, &must_clean);
+                                    num_bytes, is_data, 0, &must_clean);
                 if (ret > 0)
                         mark_free = 1;
                 BUG_ON(ret < 0);
@@ -3544,8 +3642,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
         if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
                 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
                 /* unlocks the pinned mutex */
-               btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
-               update_reserved_extents(root, bytenr, num_bytes, 0);
+               btrfs_pin_extent(root, bytenr, num_bytes, 1);
                 ret = 0;
         } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
                 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
@@ -3585,19 +3682,33 @@ static noinline int
  wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
                                 u64 num_bytes)
  {
+       struct btrfs_caching_control *caching_ctl;
         DEFINE_WAIT(wait);
  
-       prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE);
-
-       if (block_group_cache_done(cache)) {
-               finish_wait(&cache->caching_q, &wait);
+       caching_ctl = get_caching_control(cache);
+       if (!caching_ctl)
                 return 0;
-       }
-       schedule();
-       finish_wait(&cache->caching_q, &wait);
  
-       wait_event(cache->caching_q, block_group_cache_done(cache) ||
+       wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
                    (cache->free_space >= num_bytes));
+
+       put_caching_control(caching_ctl);
+       return 0;
+}
+
+static noinline int
+wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
+{
+       struct btrfs_caching_control *caching_ctl;
+       DEFINE_WAIT(wait);
+
+       caching_ctl = get_caching_control(cache);
+       if (!caching_ctl)
+               return 0;
+
+       wait_event(caching_ctl->wait, block_group_cache_done(cache));
+
+       put_caching_control(caching_ctl);
         return 0;
  }
  
@@ -3635,6 +3746,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
         int last_ptr_loop = 0;
         int loop = 0;
         bool found_uncached_bg = false;
+       bool failed_cluster_refill = false;
  
         WARN_ON(num_bytes < root->sectorsize);
         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3732,7 +3844,16 @@ have_block_group:
                 if (unlikely(block_group->ro))
                         goto loop;
  
-               if (last_ptr) {
+               /*
+                * Ok we want to try and use the cluster allocator, so lets look
+                * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
+                * have tried the cluster allocator plenty of times at this
+                * point and not have found anything, so we are likely way too
+                * fragmented for the clustering stuff to find anything, so lets
+                * just skip it and let the allocator find whatever block it can
+                * find
+                */
+               if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
                         /*
                          * the refill lock keeps out other
                          * people trying to start a new cluster
@@ -3807,9 +3928,11 @@ refill_cluster:
                                         spin_unlock(&last_ptr->refill_lock);
                                         goto checks;
                                 }
-                       } else if (!cached && loop > LOOP_CACHING_NOWAIT) {
+                       } else if (!cached && loop > LOOP_CACHING_NOWAIT
+                                  && !failed_cluster_refill) {
                                 spin_unlock(&last_ptr->refill_lock);
  
+                               failed_cluster_refill = true;
                                 wait_block_group_cache_progress(block_group,
                                        num_bytes + empty_cluster + empty_size);
                                 goto have_block_group;
@@ -3821,13 +3944,9 @@ refill_cluster:
                          * cluster.  Free the cluster we've been trying
                          * to use, and go to the next block group
                          */
-                       if (loop < LOOP_NO_EMPTY_SIZE) {
-                               btrfs_return_cluster_to_free_space(NULL,
-                                                                  last_ptr);
-                               spin_unlock(&last_ptr->refill_lock);
-                               goto loop;
-                       }
+                       btrfs_return_cluster_to_free_space(NULL, last_ptr);
                         spin_unlock(&last_ptr->refill_lock);
+                       goto loop;
                 }
  
                 offset = btrfs_find_space_for_alloc(block_group, search_start,
@@ -3881,9 +4000,12 @@ checks:
                                              search_start - offset);
                 BUG_ON(offset > search_start);
  
+               update_reserved_extents(block_group, num_bytes, 1);
+
                 /* we are all good, lets return */
                 break;
  loop:
+               failed_cluster_refill = false;
                 btrfs_put_block_group(block_group);
         }
         up_read(&space_info->groups_sem);
@@ -3973,12 +4095,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
         up_read(&info->groups_sem);
  }
  
-static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
-                                 struct btrfs_root *root,
-                                 u64 num_bytes, u64 min_alloc_size,
-                                 u64 empty_size, u64 hint_byte,
-                                 u64 search_end, struct btrfs_key *ins,
-                                 u64 data)
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+                        struct btrfs_root *root,
+                        u64 num_bytes, u64 min_alloc_size,
+                        u64 empty_size, u64 hint_byte,
+                        u64 search_end, struct btrfs_key *ins,
+                        u64 data)
  {
         int ret;
         u64 search_start = 0;
@@ -4044,25 +4166,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
         ret = btrfs_discard_extent(root, start, len);
  
         btrfs_add_free_space(cache, start, len);
+       update_reserved_extents(cache, len, 0);
         btrfs_put_block_group(cache);
-       update_reserved_extents(root, start, len, 0);
-
-       return ret;
-}
-
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
-                                 struct btrfs_root *root,
-                                 u64 num_bytes, u64 min_alloc_size,
-                                 u64 empty_size, u64 hint_byte,
-                                 u64 search_end, struct btrfs_key *ins,
-                                 u64 data)
-{
-       int ret;
-       ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
-                                    empty_size, hint_byte, search_end, ins,
-                                    data);
-       if (!ret)
-               update_reserved_extents(root, ins->objectid, ins->offset, 1);
  
         return ret;
  }
@@ -4223,15 +4328,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
  {
         int ret;
         struct btrfs_block_group_cache *block_group;
+       struct btrfs_caching_control *caching_ctl;
+       u64 start = ins->objectid;
+       u64 num_bytes = ins->offset;
  
         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
         cache_block_group(block_group);
-       wait_event(block_group->caching_q,
-                  block_group_cache_done(block_group));
+       caching_ctl = get_caching_control(block_group);
  
-       ret = btrfs_remove_free_space(block_group, ins->objectid,
-                                     ins->offset);
-       BUG_ON(ret);
+       if (!caching_ctl) {
+               BUG_ON(!block_group_cache_done(block_group));
+               ret = btrfs_remove_free_space(block_group, start, num_bytes);
+               BUG_ON(ret);
+       } else {
+               mutex_lock(&caching_ctl->mutex);
+
+               if (start >= caching_ctl->progress) {
+                       ret = add_excluded_extent(root, start, num_bytes);
+                       BUG_ON(ret);
+               } else if (start + num_bytes <= caching_ctl->progress) {
+                       ret = btrfs_remove_free_space(block_group,
+                                                     start, num_bytes);
+                       BUG_ON(ret);
+               } else {
+                       num_bytes = caching_ctl->progress - start;
+                       ret = btrfs_remove_free_space(block_group,
+                                                     start, num_bytes);
+                       BUG_ON(ret);
+
+                       start = caching_ctl->progress;
+                       num_bytes = ins->objectid + ins->offset -
+                                   caching_ctl->progress;
+                       ret = add_excluded_extent(root, start, num_bytes);
+                       BUG_ON(ret);
+               }
+
+               mutex_unlock(&caching_ctl->mutex);
+               put_caching_control(caching_ctl);
+       }
+
+       update_reserved_extents(block_group, ins->offset, 1);
         btrfs_put_block_group(block_group);
         ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
                                          0, owner, offset, ins, 1);
@@ -4255,9 +4391,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
         int ret;
         u64 flags = 0;
  
-       ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
-                                    empty_size, hint_byte, search_end,
-                                    ins, 0);
+       ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
+                                  empty_size, hint_byte, search_end,
+                                  ins, 0);
         if (ret)
                 return ret;
  
@@ -4268,7 +4404,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
         } else
                 BUG_ON(parent > 0);
  
-       update_reserved_extents(root, ins->objectid, ins->offset, 1);
         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
                 struct btrfs_delayed_extent_op *extent_op;
                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
@@ -4347,452 +4482,99 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
         return buf;
  }
  
-#if 0
-int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
-                       struct btrfs_root *root, struct extent_buffer *leaf)
+struct walk_control {
+       u64 refs[BTRFS_MAX_LEVEL];
+       u64 flags[BTRFS_MAX_LEVEL];
+       struct btrfs_key update_progress;
+       int stage;
+       int level;
+       int shared_level;
+       int update_ref;
+       int keep_locks;
+       int reada_slot;
+       int reada_count;
+};
+
+#define DROP_REFERENCE 1
+#define UPDATE_BACKREF 2
+
+static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
+                                    struct btrfs_root *root,
+                                    struct walk_control *wc,
+                                    struct btrfs_path *path)
  {
-       u64 disk_bytenr;
-       u64 num_bytes;
-       struct btrfs_key key;
-       struct btrfs_file_extent_item *fi;
+       u64 bytenr;
+       u64 generation;
+       u64 refs;
+       u64 last = 0;
         u32 nritems;
-       int i;
+       u32 blocksize;
+       struct btrfs_key key;
+       struct extent_buffer *eb;
         int ret;
+       int slot;
+       int nread = 0;
  
-       BUG_ON(!btrfs_is_leaf(leaf));
-       nritems = btrfs_header_nritems(leaf);
+       if (path->slots[wc->level] < wc->reada_slot) {
+               wc->reada_count = wc->reada_count * 2 / 3;
+               wc->reada_count = max(wc->reada_count, 2);
+       } else {
+               wc->reada_count = wc->reada_count * 3 / 2;
+               wc->reada_count = min_t(int, wc->reada_count,
+                                       BTRFS_NODEPTRS_PER_BLOCK(root));
+       }
  
-       for (i = 0; i < nritems; i++) {
-               cond_resched();
-               btrfs_item_key_to_cpu(leaf, &key, i);
+       eb = path->nodes[wc->level];
+       nritems = btrfs_header_nritems(eb);
+       blocksize = btrfs_level_size(root, wc->level - 1);
  
-               /* only extents have references, skip everything else */
-               if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
-                       continue;
-
-               fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
-
-               /* inline extents live in the btree, they don't have refs */
-               if (btrfs_file_extent_type(leaf, fi) ==
-                   BTRFS_FILE_EXTENT_INLINE)
-                       continue;
-
-               disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
-
-               /* holes don't have refs */
-               if (disk_bytenr == 0)
-                       continue;
-
-               num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
-               ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
-                                       leaf->start, 0, key.objectid, 0);
-               BUG_ON(ret);
-       }
-       return 0;
-}
-
-static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
-                                       struct btrfs_root *root,
-                                       struct btrfs_leaf_ref *ref)
-{
-       int i;
-       int ret;
-       struct btrfs_extent_info *info;
-       struct refsort *sorted;
-
-       if (ref->nritems == 0)
-               return 0;
-
-       sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS);
-       for (i = 0; i < ref->nritems; i++) {
-               sorted[i].bytenr = ref->extents[i].bytenr;
-               sorted[i].slot = i;
-       }
-       sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL);
+       for (slot = path->slots[wc->level]; slot < nritems; slot++) {
+               if (nread >= wc->reada_count)
+                       break;
  
-       /*
-        * the items in the ref were sorted when the ref was inserted
-        * into the ref cache, so this is already in order
-        */
-       for (i = 0; i < ref->nritems; i++) {
-               info = ref->extents + sorted[i].slot;
-               ret = btrfs_free_extent(trans, root, info->bytenr,
-                                         info->num_bytes, ref->bytenr,
-                                         ref->owner, ref->generation,
-                                         info->objectid, 0);
-
-               atomic_inc(&root->fs_info->throttle_gen);
-               wake_up(&root->fs_info->transaction_throttle);
                 cond_resched();
+               bytenr = btrfs_node_blockptr(eb, slot);
+               generation = btrfs_node_ptr_generation(eb, slot);
  
-               BUG_ON(ret);
-               info++;
-       }
-
-       kfree(sorted);
-       return 0;
-}
-
-
-static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
-                                    struct btrfs_root *root, u64 start,
-                                    u64 len, u32 *refs)
-{
-       int ret;
-
-       ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
-       BUG_ON(ret);
-
-#if 0 /* some debugging code in case we see problems here */
-       /* if the refs count is one, it won't get increased again.  But
-        * if the ref count is > 1, someone may be decreasing it at
-        * the same time we are.
-        */
-       if (*refs != 1) {
-               struct extent_buffer *eb = NULL;
-               eb = btrfs_find_create_tree_block(root, start, len);
-               if (eb)
-                       btrfs_tree_lock(eb);
-
-               mutex_lock(&root->fs_info->alloc_mutex);
-               ret = lookup_extent_ref(NULL, root, start, len, refs);
-               BUG_ON(ret);
-               mutex_unlock(&root->fs_info->alloc_mutex);
-
-               if (eb) {
-                       btrfs_tree_unlock(eb);
-                       free_extent_buffer(eb);
-               }
-               if (*refs == 1) {
-                       printk(KERN_ERR "btrfs block %llu went down to one "
-                              "during drop_snap\n", (unsigned long long)start);
-               }
-
-       }
-#endif
-
-       cond_resched();
-       return ret;
-}
-
+               if (slot == path->slots[wc->level])
+                       goto reada;
  
-/*
- * this is used while deleting old snapshots, and it drops the refs
- * on a whole subtree starting from a level 1 node.
- *
- * The idea is to sort all the leaf pointers, and then drop the
- * ref on all the leaves in order.  Most of the time the leaves
- * will have ref cache entries, so no leaf IOs will be required to
- * find the extents they have references on.
- *
- * For each leaf, any references it has are also dropped in order
- *
- * This ends up dropping the references in something close to optimal
- * order for reading and modifying the extent allocation tree.
- */
-static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
-                                       struct btrfs_root *root,
-                                       struct btrfs_path *path)
-{
-       u64 bytenr;
-       u64 root_owner;
-       u64 root_gen;
-       struct extent_buffer *eb = path->nodes[1];
-       struct extent_buffer *leaf;
-       struct btrfs_leaf_ref *ref;
-       struct refsort *sorted = NULL;
-       int nritems = btrfs_header_nritems(eb);
-       int ret;
-       int i;
-       int refi = 0;
-       int slot = path->slots[1];
-       u32 blocksize = btrfs_level_size(root, 0);
-       u32 refs;
-
-       if (nritems == 0)
-               goto out;
-
-       root_owner = btrfs_header_owner(eb);
-       root_gen = btrfs_header_generation(eb);
-       sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
-
-       /*
-        * step one, sort all the leaf pointers so we don't scribble
-        * randomly into the extent allocation tree
-        */
-       for (i = slot; i < nritems; i++) {
-               sorted[refi].bytenr = btrfs_node_blockptr(eb, i);
-               sorted[refi].slot = i;
-               refi++;
-       }
-
-       /*
-        * nritems won't be zero, but if we're picking up drop_snapshot
-        * after a crash, slot might be > 0, so double check things
-        * just in case.
-        */
-       if (refi == 0)
-               goto out;
-
-       sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
-
-       /*
-        * the first loop frees everything the leaves point to
-        */
-       for (i = 0; i < refi; i++) {
-               u64 ptr_gen;
-
-               bytenr = sorted[i].bytenr;
-
-               /*
-                * check the reference count on this leaf.  If it is > 1
-                * we just decrement it below and don't update any
-                * of the refs the leaf points to.
-                */
-               ret = drop_snap_lookup_refcount(trans, root, bytenr,
-                                               blocksize, &refs);
-               BUG_ON(ret);
-               if (refs != 1)
+               if (wc->stage == UPDATE_BACKREF &&
+                   generation <= root->root_key.offset)
                         continue;
  
-               ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot);
-
-               /*
-                * the leaf only had one reference, which means the
-                * only thing pointing to this leaf is the snapshot
-                * we're deleting.  It isn't possible for the reference
-                * count to increase again later
-                *
-                * The reference cache is checked for the leaf,
-                * and if found we'll be able to drop any refs held by
-                * the leaf without needing to read it in.
-                */
-               ref = btrfs_lookup_leaf_ref(root, bytenr);
-               if (ref && ref->generation != ptr_gen) {
-                       btrfs_free_leaf_ref(root, ref);
-                       ref = NULL;
-               }
-               if (ref) {
-                       ret = cache_drop_leaf_ref(trans, root, ref);
-                       BUG_ON(ret);
-                       btrfs_remove_leaf_ref(root, ref);
-                       btrfs_free_leaf_ref(root, ref);
-               } else {
-                       /*
-                        * the leaf wasn't in the reference cache, so
-                        * we have to read it.
-                        */
-                       leaf = read_tree_block(root, bytenr, blocksize,
-                                              ptr_gen);
-                       ret = btrfs_drop_leaf_ref(trans, root, leaf);
+               if (wc->stage == DROP_REFERENCE) {
+                       ret = btrfs_lookup_extent_info(trans, root,
+                                               bytenr, blocksize,
+                                               &refs, NULL);
                         BUG_ON(ret);
-                       free_extent_buffer(leaf);
-               }
-               atomic_inc(&root->fs_info->throttle_gen);
-               wake_up(&root->fs_info->transaction_throttle);
-               cond_resched();
-       }
-
-       /*
-        * run through the loop again to free the refs on the leaves.
-        * This is faster than doing it in the loop above because
-        * the leaves are likely to be clustered together.  We end up
-        * working in nice chunks on the extent allocation tree.
-        */
-       for (i = 0; i < refi; i++) {
-               bytenr = sorted[i].bytenr;
-               ret = btrfs_free_extent(trans, root, bytenr,
-                                       blocksize, eb->start,
-                                       root_owner, root_gen, 0, 1);
-               BUG_ON(ret);
+                       BUG_ON(refs == 0);
+                       if (refs == 1)
+                               goto reada;
  
-               atomic_inc(&root->fs_info->throttle_gen);
-               wake_up(&root->fs_info->transaction_throttle);
-               cond_resched();
-       }
-out:
-       kfree(sorted);
-
-       /*
-        * update the path to show we've processed the entire level 1
-        * node.  This will get saved into the root's drop_snapshot_progress
-        * field so these drops are not repeated again if this transaction
-        * commits.
-        */
-       path->slots[1] = nritems;
-       return 0;
-}
-
-/*
- * helper function for drop_snapshot, this walks down the tree dropping ref
- * counts as it goes.
- */
-static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
-                                  struct btrfs_root *root,
-                                  struct btrfs_path *path, int *level)
-{
-       u64 root_owner;
-       u64 root_gen;
-       u64 bytenr;
-       u64 ptr_gen;
-       struct extent_buffer *next;
-       struct extent_buffer *cur;
-       struct extent_buffer *parent;
-       u32 blocksize;
-       int ret;
-       u32 refs;
-
-       WARN_ON(*level < 0);
-       WARN_ON(*level >= BTRFS_MAX_LEVEL);
-       ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
-                               path->nodes[*level]->len, &refs);
-       BUG_ON(ret);
-       if (refs > 1)
-               goto out;
-
-       /*
-        * walk down to the last node level and free all the leaves
-        */
-       while (*level >= 0) {
-               WARN_ON(*level < 0);
-               WARN_ON(*level >= BTRFS_MAX_LEVEL);
-               cur = path->nodes[*level];
-
-               if (btrfs_header_level(cur) != *level)
-                       WARN_ON(1);
-
-               if (path->slots[*level] >=
-                   btrfs_header_nritems(cur))
-                       break;
-
-               /* the new code goes down to level 1 and does all the
-                * leaves pointed to that node in bulk.  So, this check
-                * for level 0 will always be false.
-                *
-                * But, the disk format allows the drop_snapshot_progress
-                * field in the root to leave things in a state where
-                * a leaf will need cleaning up here.  If someone crashes
-                * with the old code and then boots with the new code,
-                * we might find a leaf here.
-                */
-               if (*level == 0) {
-                       ret = btrfs_drop_leaf_ref(trans, root, cur);
-                       BUG_ON(ret);
-                       break;
+                       if (!wc->update_ref ||
+                           generation <= root->root_key.offset)
+                               continue;
+                       btrfs_node_key_to_cpu(eb, &key, slot);
+                       ret = btrfs_comp_cpu_keys(&key,
+                                                 &wc->update_progress);
+                       if (ret < 0)
+                               continue;
                 }
-
-               /*
-                * once we get to level one, process the whole node
-                * at once, including everything below it.
-                */
-               if (*level == 1) {
-                       ret = drop_level_one_refs(trans, root, path);
-                       BUG_ON(ret);
+reada:
+               ret = readahead_tree_block(root, bytenr, blocksize,
+                                          generation);
+               if (ret)
                         break;
-               }
-
-               bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
-               ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
-               blocksize = btrfs_level_size(root, *level - 1);
-
-               ret = drop_snap_lookup_refcount(trans, root, bytenr,
-                                               blocksize, &refs);
-               BUG_ON(ret);
-
-               /*
-                * if there is more than one reference, we don't need
-                * to read that node to drop any references it has.  We
-                * just drop the ref we hold on that node and move on to the
-                * next slot in this level.
-                */
-               if (refs != 1) {
-                       parent = path->nodes[*level];
-                       root_owner = btrfs_header_owner(parent);
-                       root_gen = btrfs_header_generation(parent);
-                       path->slots[*level]++;
-
-                       ret = btrfs_free_extent(trans, root, bytenr,
-                                               blocksize, parent->start,
-                                               root_owner, root_gen,
-                                               *level - 1, 1);
-                       BUG_ON(ret);
-
-                       atomic_inc(&root->fs_info->throttle_gen);
-                       wake_up(&root->fs_info->transaction_throttle);
-                       cond_resched();
-
-                       continue;
-               }
-
-               /*
-                * we need to keep freeing things in the next level down.
-                * read the block and loop around to process it
-                */
-               next = read_tree_block(root, bytenr, blocksize, ptr_gen);
-               WARN_ON(*level <= 0);
-               if (path->nodes[*level-1])
-                       free_extent_buffer(path->nodes[*level-1]);
-               path->nodes[*level-1] = next;
-               *level = btrfs_header_level(next);
-               path->slots[*level] = 0;
-               cond_resched();
+               last = bytenr + blocksize;
+               nread++;
         }
-out:
-       WARN_ON(*level < 0);
-       WARN_ON(*level >= BTRFS_MAX_LEVEL);
-
-       if (path->nodes[*level] == root->node) {
-               parent = path->nodes[*level];
-               bytenr = path->nodes[*level]->start;
-       } else {
-               parent = path->nodes[*level + 1];
-               bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
-       }
-
-       blocksize = btrfs_level_size(root, *level);
-       root_owner = btrfs_header_owner(parent);
-       root_gen = btrfs_header_generation(parent);
-
-       /*
-        * cleanup and free the reference on the last node
-        * we processed
-        */
-       ret = btrfs_free_extent(trans, root, bytenr, blocksize,
-                                 parent->start, root_owner, root_gen,
-                                 *level, 1);
-       free_extent_buffer(path->nodes[*level]);
-       path->nodes[*level] = NULL;
-
-       *level += 1;
-       BUG_ON(ret);
-
-       cond_resched();
-       return 0;
+       wc->reada_slot = slot;
  }
-#endif
-
-struct walk_control {
-       u64 refs[BTRFS_MAX_LEVEL];
-       u64 flags[BTRFS_MAX_LEVEL];
-       struct btrfs_key update_progress;
-       int stage;
-       int level;
-       int shared_level;
-       int update_ref;
-       int keep_locks;
-};
-
-#define DROP_REFERENCE 1
-#define UPDATE_BACKREF 2
  
  /*
   * hepler to process tree block while walking down the tree.
   *
- * when wc->stage == DROP_REFERENCE, this function checks
- * reference count of the block. if the block is shared and
- * we need update back refs for the subtree rooted at the
- * block, this function changes wc->stage to UPDATE_BACKREF
- *
   * when wc->stage == UPDATE_BACKREF, this function updates
   * back refs for pointers in the block.
   *
@@ -4805,7 +4587,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
  {
         int level = wc->level;
         struct extent_buffer *eb = path->nodes[level];
-       struct btrfs_key key;
         u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
         int ret;
  
@@ -4828,21 +4609,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                 BUG_ON(wc->refs[level] == 0);
         }
  
-       if (wc->stage == DROP_REFERENCE &&
-           wc->update_ref && wc->refs[level] > 1) {
-               BUG_ON(eb == root->node);
-               BUG_ON(path->slots[level] > 0);
-               if (level == 0)
-                       btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
-               else
-                       btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
-               if (btrfs_header_owner(eb) == root->root_key.objectid &&
-                   btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
-                       wc->stage = UPDATE_BACKREF;
-                       wc->shared_level = level;
-               }
-       }
-
         if (wc->stage == DROP_REFERENCE) {
                 if (wc->refs[level] > 1)
                         return 1;
@@ -4878,6 +4644,123 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
         return 0;
  }
  
+/*
+ * hepler to process tree block pointer.
+ *
+ * when wc->stage == DROP_REFERENCE, this function checks
+ * reference count of the block pointed to. if the block
+ * is shared and we need update back refs for the subtree
+ * rooted at the block, this function changes wc->stage to
+ * UPDATE_BACKREF. if the block is shared and there is no
+ * need to update back, this function drops the reference
+ * to the block.
+ *
+ * NOTE: return value 1 means we should stop walking down.
+ */
+static noinline int do_walk_down(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root,
+                                struct btrfs_path *path,
+                                struct walk_control *wc)
+{
+       u64 bytenr;
+       u64 generation;
+       u64 parent;
+       u32 blocksize;
+       struct btrfs_key key;
+       struct extent_buffer *next;
+       int level = wc->level;
+       int reada = 0;
+       int ret = 0;
+
+       generation = btrfs_node_ptr_generation(path->nodes[level],
+                                              path->slots[level]);
+       /*
+        * if the lower level block was created before the snapshot
+        * was created, we know there is no need to update back refs
+        * for the subtree
+        */
+       if (wc->stage == UPDATE_BACKREF &&
+           generation <= root->root_key.offset)
+               return 1;
+
+       bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
+       blocksize = btrfs_level_size(root, level - 1);
+
+       next = btrfs_find_tree_block(root, bytenr, blocksize);
+       if (!next) {
+               next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+               reada = 1;
+       }
+       btrfs_tree_lock(next);
+       btrfs_set_lock_blocking(next);
+
+       if (wc->stage == DROP_REFERENCE) {
+               ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+                                              &wc->refs[level - 1],
+                                              &wc->flags[level - 1]);
+               BUG_ON(ret);
+               BUG_ON(wc->refs[level - 1] == 0);
+
+               if (wc->refs[level - 1] > 1) {
+                       if (!wc->update_ref ||
+                           generation <= root->root_key.offset)
+                               goto skip;
+
+                       btrfs_node_key_to_cpu(path->nodes[level], &key,
+                                             path->slots[level]);
+                       ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
+                       if (ret < 0)
+                               goto skip;
+
+                       wc->stage = UPDATE_BACKREF;
+                       wc->shared_level = level - 1;
+               }
+       }
+
+       if (!btrfs_buffer_uptodate(next, generation)) {
+               btrfs_tree_unlock(next);
+               free_extent_buffer(next);
+               next = NULL;
+       }
+
+       if (!next) {
+               if (reada && level == 1)
+                       reada_walk_down(trans, root, wc, path);
+               next = read_tree_block(root, bytenr, blocksize, generation);
+               btrfs_tree_lock(next);
+               btrfs_set_lock_blocking(next);
+       }
+
+       level--;
+       BUG_ON(level != btrfs_header_level(next));
+       path->nodes[level] = next;
+       path->slots[level] = 0;
+       path->locks[level] = 1;
+       wc->level = level;
+       if (wc->level == 1)
+               wc->reada_slot = 0;
+       return 0;
+skip:
+       wc->refs[level - 1] = 0;
+       wc->flags[level - 1] = 0;
+
+       if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+               parent = path->nodes[level]->start;
+       } else {
+               BUG_ON(root->root_key.objectid !=
+                      btrfs_header_owner(path->nodes[level]));
+               parent = 0;
+       }
+
+       ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
+                               root->root_key.objectid, level - 1, 0);
+       BUG_ON(ret);
+
+       btrfs_tree_unlock(next);
+       free_extent_buffer(next);
+       return 1;
+}
+
  /*
   * hepler to process tree block while walking up the tree.
   *
@@ -4905,7 +4788,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                 if (level < wc->shared_level)
                         goto out;
  
-               BUG_ON(wc->refs[level] <= 1);
                 ret = find_next_key(path, level + 1, &wc->update_progress);
                 if (ret > 0)
                         wc->update_ref = 0;
@@ -4936,8 +4818,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                                 path->locks[level] = 0;
                                 return 1;
                         }
-               } else {
-                       BUG_ON(level != 0);
                 }
         }
  
@@ -4990,17 +4870,13 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
                                    struct btrfs_path *path,
                                    struct walk_control *wc)
  {
-       struct extent_buffer *next;
-       struct extent_buffer *cur;
-       u64 bytenr;
-       u64 ptr_gen;
-       u32 blocksize;
         int level = wc->level;
         int ret;
  
         while (level >= 0) {
-               cur = path->nodes[level];
-               BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
+               if (path->slots[level] >=
+                   btrfs_header_nritems(path->nodes[level]))
+                       break;
  
                 ret = walk_down_proc(trans, root, path, wc);
                 if (ret > 0)
@@ -5009,20 +4885,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
                 if (level == 0)
                         break;
  
-               bytenr = btrfs_node_blockptr(cur, path->slots[level]);
-               blocksize = btrfs_level_size(root, level - 1);
-               ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
-
-               next = read_tree_block(root, bytenr, blocksize, ptr_gen);
-               btrfs_tree_lock(next);
-               btrfs_set_lock_blocking(next);
-
-               level--;
-               BUG_ON(level != btrfs_header_level(next));
-               path->nodes[level] = next;
-               path->slots[level] = 0;
-               path->locks[level] = 1;
-               wc->level = level;
+               ret = do_walk_down(trans, root, path, wc);
+               if (ret > 0) {
+                       path->slots[level]++;
+                       continue;
+               }
+               level = wc->level;
         }
         return 0;
  }
@@ -5112,9 +4980,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
                         err = ret;
                         goto out;
                 }
-               btrfs_node_key_to_cpu(path->nodes[level], &key,
-                                     path->slots[level]);
-               WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
+               WARN_ON(ret > 0);
  
                 /*
                  * unlock our path, this is safe because only this
@@ -5149,6 +5015,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
         wc->stage = DROP_REFERENCE;
         wc->update_ref = update_ref;
         wc->keep_locks = 0;
+       wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
  
         while (1) {
                 ret = walk_down_tree(trans, root, path, wc);
@@ -5201,9 +5068,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
         ret = btrfs_del_root(trans, tree_root, &root->root_key);
         BUG_ON(ret);
  
-       free_extent_buffer(root->node);
-       free_extent_buffer(root->commit_root);
-       kfree(root);
+       if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+               ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
+                                          NULL, NULL);
+               BUG_ON(ret < 0);
+               if (ret > 0) {
+                       ret = btrfs_del_orphan_item(trans, tree_root,
+                                                   root->root_key.objectid);
+                       BUG_ON(ret);
+               }
+       }
+
+       if (root->in_radix) {
+               btrfs_free_fs_root(tree_root->fs_info, root);
+       } else {
+               free_extent_buffer(root->node);
+               free_extent_buffer(root->commit_root);
+               kfree(root);
+       }
  out:
         btrfs_end_transaction(trans, tree_root);
         kfree(wc);
@@ -5255,6 +5137,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
         wc->stage = DROP_REFERENCE;
         wc->update_ref = 0;
         wc->keep_locks = 1;
+       wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
  
         while (1) {
                 wret = walk_down_tree(trans, root, path, wc);
@@ -5397,9 +5280,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
         lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
         while (1) {
                 int ret;
-               spin_lock(&em_tree->lock);
+               write_lock(&em_tree->lock);
                 ret = add_extent_mapping(em_tree, em);
-               spin_unlock(&em_tree->lock);
+               write_unlock(&em_tree->lock);
                 if (ret != -EEXIST) {
                         free_extent_map(em);
                         break;
@@ -6842,287 +6725,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
         return 0;
  }
  
-#if 0
-static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
-                                u64 objectid, u64 size)
-{
-       struct btrfs_path *path;
-       struct btrfs_inode_item *item;
-       struct extent_buffer *leaf;
-       int ret;
-
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
-       path->leave_spinning = 1;
-       ret = btrfs_insert_empty_inode(trans, root, path, objectid);
-       if (ret)
-               goto out;
-
-       leaf = path->nodes[0];
-       item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
-       memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
-       btrfs_set_inode_generation(leaf, item, 1);
-       btrfs_set_inode_size(leaf, item, size);
-       btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
-       btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
-       btrfs_mark_buffer_dirty(leaf);
-       btrfs_release_path(root, path);
-out:
-       btrfs_free_path(path);
-       return ret;
-}
-
-static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
-                                       struct btrfs_block_group_cache *group)
+/*
+ * checks to see if its even possible to relocate this block group.
+ *
+ * @return - -1 if it's not a good idea to relocate this block group, 0 if its
+ * ok to go ahead and try.
+ */
+int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
  {
-       struct inode *inode = NULL;
-       struct btrfs_trans_handle *trans;
-       struct btrfs_root *root;
-       struct btrfs_key root_key;
-       u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
-       int err = 0;
+       struct btrfs_block_group_cache *block_group;
+       struct btrfs_space_info *space_info;
+       struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+       struct btrfs_device *device;
+       int full = 0;
+       int ret = 0;
  
-       root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
-       root_key.type = BTRFS_ROOT_ITEM_KEY;
-       root_key.offset = (u64)-1;
-       root = btrfs_read_fs_root_no_name(fs_info, &root_key);
-       if (IS_ERR(root))
-               return ERR_CAST(root);
+       block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
  
-       trans = btrfs_start_transaction(root, 1);
-       BUG_ON(!trans);
+       /* odd, couldn't find the block group, leave it alone */
+       if (!block_group)
+               return -1;
  
-       err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
-       if (err)
+       /* no bytes used, we're good */
+       if (!btrfs_block_group_used(&block_group->item))
                 goto out;
  
-       err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
-       BUG_ON(err);
-
-       err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
-                                      group->key.offset, 0, group->key.offset,
-                                      0, 0, 0);
-       BUG_ON(err);
-
-       inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
-       if (inode->i_state & I_NEW) {
-               BTRFS_I(inode)->root = root;
-               BTRFS_I(inode)->location.objectid = objectid;
-               BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
-               BTRFS_I(inode)->location.offset = 0;
-               btrfs_read_locked_inode(inode);
-               unlock_new_inode(inode);
-               BUG_ON(is_bad_inode(inode));
-       } else {
-               BUG_ON(1);
-       }
-       BTRFS_I(inode)->index_cnt = group->key.objectid;
-
-       err = btrfs_orphan_add(trans, inode);
-out:
-       btrfs_end_transaction(trans, root);
-       if (err) {
-               if (inode)
-                       iput(inode);
-               inode = ERR_PTR(err);
-       }
-       return inode;
-}
-
-int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
-{
-
-       struct btrfs_ordered_sum *sums;
-       struct btrfs_sector_sum *sector_sum;
-       struct btrfs_ordered_extent *ordered;
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct list_head list;
-       size_t offset;
-       int ret;
-       u64 disk_bytenr;
-
-       INIT_LIST_HEAD(&list);
-
-       ordered = btrfs_lookup_ordered_extent(inode, file_pos);
-       BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
-
-       disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
-       ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
-                                      disk_bytenr + len - 1, &list);
-
-       while (!list_empty(&list)) {
-               sums = list_entry(list.next, struct btrfs_ordered_sum, list);
-               list_del_init(&sums->list);
-
-               sector_sum = sums->sums;
-               sums->bytenr = ordered->start;
+       space_info = block_group->space_info;
+       spin_lock(&space_info->lock);
  
-               offset = 0;
-               while (offset < sums->len) {
-                       sector_sum->bytenr += ordered->start - disk_bytenr;
-                       sector_sum++;
-                       offset += root->sectorsize;
-               }
+       full = space_info->full;
  
-               btrfs_add_ordered_sum(inode, ordered, sums);
+       /*
+        * if this is the last block group we have in this space, we can't
+        * relocate it unless we're able to allocate a new chunk below.
+        *
+        * Otherwise, we need to make sure we have room in the space to handle
+        * all of the extents from this block group.  If we can, we're good
+        */
+       if ((space_info->total_bytes != block_group->key.offset) &&
+          (space_info->bytes_used + space_info->bytes_reserved +
+           space_info->bytes_pinned + space_info->bytes_readonly +
+           btrfs_block_group_used(&block_group->item) <
+           space_info->total_bytes)) {
+               spin_unlock(&space_info->lock);
+               goto out;
         }
-       btrfs_put_ordered_extent(ordered);
-       return 0;
-}
-
-int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
-{
-       struct btrfs_trans_handle *trans;
-       struct btrfs_path *path;
-       struct btrfs_fs_info *info = root->fs_info;
-       struct extent_buffer *leaf;
-       struct inode *reloc_inode;
-       struct btrfs_block_group_cache *block_group;
-       struct btrfs_key key;
-       u64 skipped;
-       u64 cur_byte;
-       u64 total_found;
-       u32 nritems;
-       int ret;
-       int progress;
-       int pass = 0;
-
-       root = root->fs_info->extent_root;
-
-       block_group = btrfs_lookup_block_group(info, group_start);
-       BUG_ON(!block_group);
-
-       printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n",
-              (unsigned long long)block_group->key.objectid,
-              (unsigned long long)block_group->flags);
-
-       path = btrfs_alloc_path();
-       BUG_ON(!path);
-
-       reloc_inode = create_reloc_inode(info, block_group);
-       BUG_ON(IS_ERR(reloc_inode));
-
-       __alloc_chunk_for_shrink(root, block_group, 1);
-       set_block_group_readonly(block_group);
-
-       btrfs_start_delalloc_inodes(info->tree_root);
-       btrfs_wait_ordered_extents(info->tree_root, 0);
-again:
-       skipped = 0;
-       total_found = 0;
-       progress = 0;
-       key.objectid = block_group->key.objectid;
-       key.offset = 0;
-       key.type = 0;
-       cur_byte = key.objectid;
-
-       trans = btrfs_start_transaction(info->tree_root, 1);
-       btrfs_commit_transaction(trans, info->tree_root);
+       spin_unlock(&space_info->lock);
  
-       mutex_lock(&root->fs_info->cleaner_mutex);
-       btrfs_clean_old_snapshots(info->tree_root);
-       btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
-       mutex_unlock(&root->fs_info->cleaner_mutex);
+       /*
+        * ok we don't have enough space, but maybe we have free space on our
+        * devices to allocate new chunks for relocation, so loop through our
+        * alloc devices and guess if we have enough space.  However, if we
+        * were marked as full, then we know there aren't enough chunks, and we
+        * can just return.
+        */
+       ret = -1;
+       if (full)
+               goto out;
  
-       trans = btrfs_start_transaction(info->tree_root, 1);
-       btrfs_commit_transaction(trans, info->tree_root);
+       mutex_lock(&root->fs_info->chunk_mutex);
+       list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
+               u64 min_free = btrfs_block_group_used(&block_group->item);
+               u64 dev_offset, max_avail;
  
-       while (1) {
-               ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-               if (ret < 0)
-                       goto out;
-next:
-               leaf = path->nodes[0];
-               nritems = btrfs_header_nritems(leaf);
-               if (path->slots[0] >= nritems) {
-                       ret = btrfs_next_leaf(root, path);
-                       if (ret < 0)
-                               goto out;
-                       if (ret == 1) {
-                               ret = 0;
+               /*
+                * check to make sure we can actually find a chunk with enough
+                * space to fit our block group in.
+                */
+               if (device->total_bytes > device->bytes_used + min_free) {
+                       ret = find_free_dev_extent(NULL, device, min_free,
+                                                  &dev_offset, &max_avail);
+                       if (!ret)
                                 break;
-                       }
-                       leaf = path->nodes[0];
-                       nritems = btrfs_header_nritems(leaf);
-               }
-
-               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-
-               if (key.objectid >= block_group->key.objectid +
-                   block_group->key.offset)
-                       break;
-
-               if (progress && need_resched()) {
-                       btrfs_release_path(root, path);
-                       cond_resched();
-                       progress = 0;
-                       continue;
+                       ret = -1;
                 }
-               progress = 1;
-
-               if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
-                   key.objectid + key.offset <= cur_byte) {
-                       path->slots[0]++;
-                       goto next;
-               }
-
-               total_found++;
-               cur_byte = key.objectid + key.offset;
-               btrfs_release_path(root, path);
-
-               __alloc_chunk_for_shrink(root, block_group, 0);
-               ret = relocate_one_extent(root, path, &key, block_group,
-                                         reloc_inode, pass);
-               BUG_ON(ret < 0);
-               if (ret > 0)
-                       skipped++;
-
-               key.objectid = cur_byte;
-               key.type = 0;
-               key.offset = 0;
         }
-
-       btrfs_release_path(root, path);
-
-       if (pass == 0) {
-               btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
-               invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
-       }
-
-       if (total_found > 0) {
-               printk(KERN_INFO "btrfs found %llu extents in pass %d\n",
-                      (unsigned long long)total_found, pass);
-               pass++;
-               if (total_found == skipped && pass > 2) {
-                       iput(reloc_inode);
-                       reloc_inode = create_reloc_inode(info, block_group);
-                       pass = 0;
-               }
-               goto again;
-       }
-
-       /* delete reloc_inode */
-       iput(reloc_inode);
-
-       /* unpin extents in this range */
-       trans = btrfs_start_transaction(info->tree_root, 1);
-       btrfs_commit_transaction(trans, info->tree_root);
-
-       spin_lock(&block_group->lock);
-       WARN_ON(block_group->pinned > 0);
-       WARN_ON(block_group->reserved > 0);
-       WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
-       spin_unlock(&block_group->lock);
-       btrfs_put_block_group(block_group);
-       ret = 0;
+       mutex_unlock(&root->fs_info->chunk_mutex);
  out:
-       btrfs_free_path(path);
+       btrfs_put_block_group(block_group);
         return ret;
  }
-#endif
  
  static int find_first_block_group(struct btrfs_root *root,
                 struct btrfs_path *path, struct btrfs_key *key)
@@ -7165,8 +6847,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
  {
         struct btrfs_block_group_cache *block_group;
         struct btrfs_space_info *space_info;
+       struct btrfs_caching_control *caching_ctl;
         struct rb_node *n;
  
+       down_write(&info->extent_commit_sem);
+       while (!list_empty(&info->caching_block_groups)) {
+               caching_ctl = list_entry(info->caching_block_groups.next,
+                                        struct btrfs_caching_control, list);
+               list_del(&caching_ctl->list);
+               put_caching_control(caching_ctl);
+       }
+       up_write(&info->extent_commit_sem);
+
         spin_lock(&info->block_group_cache_lock);
         while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
                 block_group = rb_entry(n, struct btrfs_block_group_cache,
@@ -7180,8 +6872,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                 up_write(&block_group->space_info->groups_sem);
  
                 if (block_group->cached == BTRFS_CACHE_STARTED)
-                       wait_event(block_group->caching_q,
-                                  block_group_cache_done(block_group));
+                       wait_block_group_cache_done(block_group);
  
                 btrfs_remove_free_space_cache(block_group);
  
@@ -7251,7 +6942,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 spin_lock_init(&cache->lock);
                 spin_lock_init(&cache->tree_lock);
                 cache->fs_info = info;
-               init_waitqueue_head(&cache->caching_q);
                 INIT_LIST_HEAD(&cache->list);
                 INIT_LIST_HEAD(&cache->cluster_list);
  
@@ -7273,8 +6963,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 cache->flags = btrfs_block_group_flags(&cache->item);
                 cache->sectorsize = root->sectorsize;
  
-               remove_sb_from_cache(root, cache);
-
                 /*
                  * check for two cases, either we are full, and therefore
                  * don't need to bother with the caching work since we won't
@@ -7283,13 +6971,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                  * time, particularly in the full case.
                  */
                 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
+                       exclude_super_stripes(root, cache);
+                       cache->last_byte_to_unpin = (u64)-1;
                         cache->cached = BTRFS_CACHE_FINISHED;
+                       free_excluded_extents(root, cache);
                 } else if (btrfs_block_group_used(&cache->item) == 0) {
+                       exclude_super_stripes(root, cache);
+                       cache->last_byte_to_unpin = (u64)-1;
                         cache->cached = BTRFS_CACHE_FINISHED;
                         add_new_free_space(cache, root->fs_info,
                                            found_key.objectid,
                                            found_key.objectid +
                                            found_key.offset);
+                       free_excluded_extents(root, cache);
                 }
  
                 ret = update_space_info(info, cache->flags, found_key.offset,
@@ -7297,6 +6991,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                                         &space_info);
                 BUG_ON(ret);
                 cache->space_info = space_info;
+               spin_lock(&cache->space_info->lock);
+               cache->space_info->bytes_super += cache->bytes_super;
+               spin_unlock(&cache->space_info->lock);
+
                 down_write(&space_info->groups_sem);
                 list_add_tail(&cache->list, &space_info->block_groups);
                 up_write(&space_info->groups_sem);
@@ -7346,7 +7044,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
         atomic_set(&cache->count, 1);
         spin_lock_init(&cache->lock);
         spin_lock_init(&cache->tree_lock);
-       init_waitqueue_head(&cache->caching_q);
         INIT_LIST_HEAD(&cache->list);
         INIT_LIST_HEAD(&cache->cluster_list);
  
@@ -7355,15 +7052,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
         cache->flags = type;
         btrfs_set_block_group_flags(&cache->item, type);
  
+       cache->last_byte_to_unpin = (u64)-1;
         cache->cached = BTRFS_CACHE_FINISHED;
-       remove_sb_from_cache(root, cache);
+       exclude_super_stripes(root, cache);
  
         add_new_free_space(cache, root->fs_info, chunk_offset,
                            chunk_offset + size);
  
+       free_excluded_extents(root, cache);
+
         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
                                 &cache->space_info);
         BUG_ON(ret);
+
+       spin_lock(&cache->space_info->lock);
+       cache->space_info->bytes_super += cache->bytes_super;
+       spin_unlock(&cache->space_info->lock);
+
         down_write(&cache->space_info->groups_sem);
         list_add_tail(&cache->list, &cache->space_info->block_groups);
         up_write(&cache->space_info->groups_sem);
@@ -7429,8 +7134,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         up_write(&block_group->space_info->groups_sem);
  
         if (block_group->cached == BTRFS_CACHE_STARTED)
-               wait_event(block_group->caching_q,
-                          block_group_cache_done(block_group));
+               wait_block_group_cache_done(block_group);
  
         btrfs_remove_free_space_cache(block_group);
  
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index 68260180f5871975b8f673df1234bf4091d91bb4..0cb88f8146ea85efa5bc944c9d95995a3d0ad772 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -367,10 +367,10 @@ static int insert_state(struct extent_io_tree *tree,
         }
         if (bits & EXTENT_DIRTY)
                 tree->dirty_bytes += end - start + 1;
-       set_state_cb(tree, state, bits);
-       state->state |= bits;
         state->start = start;
         state->end = end;
+       set_state_cb(tree, state, bits);
+       state->state |= bits;
         node = tree_insert(&tree->state, end, &state->rb_node);
         if (node) {
                 struct extent_state *found;
@@ -471,10 +471,14 @@ static int clear_state_bit(struct extent_io_tree *tree,
   * bits were already set, or zero if none of the bits were already set.
   */
  int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                    int bits, int wake, int delete, gfp_t mask)
+                    int bits, int wake, int delete,
+                    struct extent_state **cached_state,
+                    gfp_t mask)
  {
         struct extent_state *state;
+       struct extent_state *cached;
         struct extent_state *prealloc = NULL;
+       struct rb_node *next_node;
         struct rb_node *node;
         u64 last_end;
         int err;
@@ -488,6 +492,17 @@ again:
         }
  
         spin_lock(&tree->lock);
+       if (cached_state) {
+               cached = *cached_state;
+               *cached_state = NULL;
+               cached_state = NULL;
+               if (cached && cached->tree && cached->start == start) {
+                       atomic_dec(&cached->refs);
+                       state = cached;
+                       goto hit_next;
+               }
+               free_extent_state(cached);
+       }
         /*
          * this search will find the extents that end after
          * our range starts
@@ -496,6 +511,7 @@ again:
         if (!node)
                 goto out;
         state = rb_entry(node, struct extent_state, rb_node);
+hit_next:
         if (state->start > end)
                 goto out;
         WARN_ON(state->end < start);
@@ -531,8 +547,6 @@ again:
                         if (last_end == (u64)-1)
                                 goto out;
                         start = last_end + 1;
-               } else {
-                       start = state->start;
                 }
                 goto search_again;
         }
@@ -550,16 +564,28 @@ again:
  
                 if (wake)
                         wake_up(&state->wq);
+
                 set |= clear_state_bit(tree, prealloc, bits,
                                        wake, delete);
                 prealloc = NULL;
                 goto out;
         }
  
+       if (state->end < end && prealloc && !need_resched())
+               next_node = rb_next(&state->rb_node);
+       else
+               next_node = NULL;
+
         set |= clear_state_bit(tree, state, bits, wake, delete);
         if (last_end == (u64)-1)
                 goto out;
         start = last_end + 1;
+       if (start <= end && next_node) {
+               state = rb_entry(next_node, struct extent_state,
+                                rb_node);
+               if (state->start == start)
+                       goto hit_next;
+       }
         goto search_again;
  
  out:
@@ -653,28 +679,40 @@ static void set_state_bits(struct extent_io_tree *tree,
         state->state |= bits;
  }
  
+static void cache_state(struct extent_state *state,
+                       struct extent_state **cached_ptr)
+{
+       if (cached_ptr && !(*cached_ptr)) {
+               if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
+                       *cached_ptr = state;
+                       atomic_inc(&state->refs);
+               }
+       }
+}
+
  /*
- * set some bits on a range in the tree.  This may require allocations
- * or sleeping, so the gfp mask is used to indicate what is allowed.
+ * set some bits on a range in the tree.  This may require allocations or
+ * sleeping, so the gfp mask is used to indicate what is allowed.
   *
- * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
- * range already has the desired bits set.  The start of the existing
- * range is returned in failed_start in this case.
+ * If any of the exclusive bits are set, this will fail with -EEXIST if some
+ * part of the range already has the desired bits set.  The start of the
+ * existing range is returned in failed_start in this case.
   *
- * [start, end] is inclusive
- * This takes the tree lock.
+ * [start, end] is inclusive This takes the tree lock.
   */
+
  static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                         int bits, int exclusive, u64 *failed_start,
+                         int bits, int exclusive_bits, u64 *failed_start,
+                         struct extent_state **cached_state,
                           gfp_t mask)
  {
         struct extent_state *state;
         struct extent_state *prealloc = NULL;
         struct rb_node *node;
         int err = 0;
-       int set;
         u64 last_start;
         u64 last_end;
+
  again:
         if (!prealloc && (mask & __GFP_WAIT)) {
                 prealloc = alloc_extent_state(mask);
@@ -683,6 +721,13 @@ again:
         }
  
         spin_lock(&tree->lock);
+       if (cached_state && *cached_state) {
+               state = *cached_state;
+               if (state->start == start && state->tree) {
+                       node = &state->rb_node;
+                       goto hit_next;
+               }
+       }
         /*
          * this search will find all the extents that end after
          * our range starts.
@@ -694,8 +739,8 @@ again:
                 BUG_ON(err == -EEXIST);
                 goto out;
         }
-
         state = rb_entry(node, struct extent_state, rb_node);
+hit_next:
         last_start = state->start;
         last_end = state->end;
  
@@ -706,17 +751,29 @@ again:
          * Just lock what we found and keep going
          */
         if (state->start == start && state->end <= end) {
-               set = state->state & bits;
-               if (set && exclusive) {
+               struct rb_node *next_node;
+               if (state->state & exclusive_bits) {
                         *failed_start = state->start;
                         err = -EEXIST;
                         goto out;
                 }
+
                 set_state_bits(tree, state, bits);
+               cache_state(state, cached_state);
                 merge_state(tree, state);
                 if (last_end == (u64)-1)
                         goto out;
+
                 start = last_end + 1;
+               if (start < end && prealloc && !need_resched()) {
+                       next_node = rb_next(node);
+                       if (next_node) {
+                               state = rb_entry(next_node, struct extent_state,
+                                                rb_node);
+                               if (state->start == start)
+                                       goto hit_next;
+                       }
+               }
                 goto search_again;
         }
  
@@ -737,8 +794,7 @@ again:
          * desired bit on it.
          */
         if (state->start < start) {
-               set = state->state & bits;
-               if (exclusive && set) {
+               if (state->state & exclusive_bits) {
                         *failed_start = start;
                         err = -EEXIST;
                         goto out;
@@ -750,12 +806,11 @@ again:
                         goto out;
                 if (state->end <= end) {
                         set_state_bits(tree, state, bits);
+                       cache_state(state, cached_state);
                         merge_state(tree, state);
                         if (last_end == (u64)-1)
                                 goto out;
                         start = last_end + 1;
-               } else {
-                       start = state->start;
                 }
                 goto search_again;
         }
@@ -774,6 +829,7 @@ again:
                         this_end = last_start - 1;
                 err = insert_state(tree, prealloc, start, this_end,
                                    bits);
+               cache_state(prealloc, cached_state);
                 prealloc = NULL;
                 BUG_ON(err == -EEXIST);
                 if (err)
@@ -788,8 +844,7 @@ again:
          * on the first half
          */
         if (state->start <= end && state->end > end) {
-               set = state->state & bits;
-               if (exclusive && set) {
+               if (state->state & exclusive_bits) {
                         *failed_start = start;
                         err = -EEXIST;
                         goto out;
@@ -798,6 +853,7 @@ again:
                 BUG_ON(err == -EEXIST);
  
                 set_state_bits(tree, prealloc, bits);
+               cache_state(prealloc, cached_state);
                 merge_state(tree, prealloc);
                 prealloc = NULL;
                 goto out;
@@ -826,86 +882,64 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
                      gfp_t mask)
  {
         return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
-                             mask);
-}
-
-int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
-                      gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
+                             NULL, mask);
  }
  
  int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                     int bits, gfp_t mask)
  {
         return set_extent_bit(tree, start, end, bits, 0, NULL,
-                             mask);
+                             NULL, mask);
  }
  
  int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                       int bits, gfp_t mask)
  {
-       return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
+       return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
  }
  
  int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
                      gfp_t mask)
  {
         return set_extent_bit(tree, start, end,
-                             EXTENT_DELALLOC | EXTENT_DIRTY,
-                             0, NULL, mask);
+                             EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
+                             0, NULL, NULL, mask);
  }
  
  int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
                        gfp_t mask)
  {
         return clear_extent_bit(tree, start, end,
-                               EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
-}
-
-int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
-                        gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
+                               EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
+                               NULL, mask);
  }
  
  int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
                      gfp_t mask)
  {
         return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
-                             mask);
+                             NULL, mask);
  }
  
  static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
                        gfp_t mask)
  {
-       return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
+       return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
+                               NULL, mask);
  }
  
  int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
                         gfp_t mask)
  {
         return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
-                             mask);
+                             NULL, mask);
  }
  
  static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
                                  u64 end, gfp_t mask)
  {
-       return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
-}
-
-static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
-                        gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
-                             0, NULL, mask);
-}
-
-static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
-                                 u64 end, gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
+       return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
+                               NULL, mask);
  }
  
  int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -917,13 +951,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
   * either insert or lock state struct between start and end use mask to tell
   * us if waiting is desired.
   */
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+                    int bits, struct extent_state **cached_state, gfp_t mask)
  {
         int err;
         u64 failed_start;
         while (1) {
-               err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
-                                    &failed_start, mask);
+               err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
+                                    EXTENT_LOCKED, &failed_start,
+                                    cached_state, mask);
                 if (err == -EEXIST && (mask & __GFP_WAIT)) {
                         wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
                         start = failed_start;
@@ -935,27 +971,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
         return err;
  }
  
+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+{
+       return lock_extent_bits(tree, start, end, 0, NULL, mask);
+}
+
  int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
                     gfp_t mask)
  {
         int err;
         u64 failed_start;
  
-       err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
-                            &failed_start, mask);
+       err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
+                            &failed_start, NULL, mask);
         if (err == -EEXIST) {
                 if (failed_start > start)
                         clear_extent_bit(tree, start, failed_start - 1,
-                                        EXTENT_LOCKED, 1, 0, mask);
+                                        EXTENT_LOCKED, 1, 0, NULL, mask);
                 return 0;
         }
         return 1;
  }
  
+int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
+                        struct extent_state **cached, gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
+                               mask);
+}
+
  int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
                   gfp_t mask)
  {
-       return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
+       return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
+                               mask);
  }
  
  /*
@@ -974,7 +1023,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
                 page_cache_release(page);
                 index++;
         }
-       set_extent_dirty(tree, start, end, GFP_NOFS);
         return 0;
  }
  
@@ -994,7 +1042,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
                 page_cache_release(page);
                 index++;
         }
-       set_extent_writeback(tree, start, end, GFP_NOFS);
         return 0;
  }
  
@@ -1232,6 +1279,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode,
         u64 delalloc_start;
         u64 delalloc_end;
         u64 found;
+       struct extent_state *cached_state = NULL;
         int ret;
         int loops = 0;
  
@@ -1269,6 +1317,7 @@ again:
                 /* some of the pages are gone, lets avoid looping by
                  * shortening the size of the delalloc range we're searching
                  */
+               free_extent_state(cached_state);
                 if (!loops) {
                         unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
                         max_bytes = PAGE_CACHE_SIZE - offset;
@@ -1282,18 +1331,21 @@ again:
         BUG_ON(ret);
  
         /* step three, lock the state bits for the whole range */
-       lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
+       lock_extent_bits(tree, delalloc_start, delalloc_end,
+                        0, &cached_state, GFP_NOFS);
  
         /* then test to make sure it is all still delalloc */
         ret = test_range_bit(tree, delalloc_start, delalloc_end,
-                            EXTENT_DELALLOC, 1);
+                            EXTENT_DELALLOC, 1, cached_state);
         if (!ret) {
-               unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
+               unlock_extent_cached(tree, delalloc_start, delalloc_end,
+                                    &cached_state, GFP_NOFS);
                 __unlock_for_delalloc(inode, locked_page,
                               delalloc_start, delalloc_end);
                 cond_resched();
                 goto again;
         }
+       free_extent_state(cached_state);
         *start = delalloc_start;
         *end = delalloc_end;
  out_failed:
@@ -1307,7 +1359,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
                                 int clear_unlock,
                                 int clear_delalloc, int clear_dirty,
                                 int set_writeback,
-                               int end_writeback)
+                               int end_writeback,
+                               int set_private2)
  {
         int ret;
         struct page *pages[16];
@@ -1325,8 +1378,9 @@ int extent_clear_unlock_delalloc(struct inode *inode,
         if (clear_delalloc)
                 clear_bits |= EXTENT_DELALLOC;
  
-       clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS);
-       if (!(unlock_pages || clear_dirty || set_writeback || end_writeback))
+       clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
+       if (!(unlock_pages || clear_dirty || set_writeback || end_writeback ||
+             set_private2))
                 return 0;
  
         while (nr_pages > 0) {
@@ -1334,6 +1388,10 @@ int extent_clear_unlock_delalloc(struct inode *inode,
                                      min_t(unsigned long,
                                      nr_pages, ARRAY_SIZE(pages)), pages);
                 for (i = 0; i < ret; i++) {
+
+                       if (set_private2)
+                               SetPagePrivate2(pages[i]);
+
                         if (pages[i] == locked_page) {
                                 page_cache_release(pages[i]);
                                 continue;
@@ -1476,14 +1534,17 @@ out:
   * range is found set.
   */
  int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                  int bits, int filled)
+                  int bits, int filled, struct extent_state *cached)
  {
         struct extent_state *state = NULL;
         struct rb_node *node;
         int bitset = 0;
  
         spin_lock(&tree->lock);
-       node = tree_search(tree, start);
+       if (cached && cached->tree && cached->start == start)
+               node = &cached->rb_node;
+       else
+               node = tree_search(tree, start);
         while (node && start <= end) {
                 state = rb_entry(node, struct extent_state, rb_node);
  
@@ -1503,6 +1564,10 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
                         bitset = 0;
                         break;
                 }
+
+               if (state->end == (u64)-1)
+                       break;
+
                 start = state->end + 1;
                 if (start > end)
                         break;
@@ -1526,7 +1591,7 @@ static int check_page_uptodate(struct extent_io_tree *tree,
  {
         u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
         u64 end = start + PAGE_CACHE_SIZE - 1;
-       if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
+       if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
                 SetPageUptodate(page);
         return 0;
  }
@@ -1540,7 +1605,7 @@ static int check_page_locked(struct extent_io_tree *tree,
  {
         u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
         u64 end = start + PAGE_CACHE_SIZE - 1;
-       if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
+       if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
                 unlock_page(page);
         return 0;
  }
@@ -1552,10 +1617,7 @@ static int check_page_locked(struct extent_io_tree *tree,
  static int check_page_writeback(struct extent_io_tree *tree,
                              struct page *page)
  {
-       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
-       u64 end = start + PAGE_CACHE_SIZE - 1;
-       if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
-               end_page_writeback(page);
+       end_page_writeback(page);
         return 0;
  }
  
@@ -1613,13 +1675,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
                 }
  
                 if (!uptodate) {
-                       clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
+                       clear_extent_uptodate(tree, start, end, GFP_NOFS);
                         ClearPageUptodate(page);
                         SetPageError(page);
                 }
  
-               clear_extent_writeback(tree, start, end, GFP_ATOMIC);
-
                 if (whole_page)
                         end_page_writeback(page);
                 else
@@ -1983,7 +2043,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                         continue;
                 }
                 /* the get_extent function already copied into the page */
-               if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
+               if (test_range_bit(tree, cur, cur_end,
+                                  EXTENT_UPTODATE, 1, NULL)) {
                         check_page_uptodate(tree, page);
                         unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
                         cur = cur + iosize;
@@ -2078,6 +2139,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
         u64 iosize;
         u64 unlock_start;
         sector_t sector;
+       struct extent_state *cached_state = NULL;
         struct extent_map *em;
         struct block_device *bdev;
         int ret;
@@ -2124,6 +2186,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
         delalloc_end = 0;
         page_started = 0;
         if (!epd->extent_locked) {
+               u64 delalloc_to_write = 0;
                 /*
                  * make sure the wbc mapping index is at least updated
                  * to this page.
@@ -2143,8 +2206,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                         tree->ops->fill_delalloc(inode, page, delalloc_start,
                                                  delalloc_end, &page_started,
                                                  &nr_written);
+                       /*
+                        * delalloc_end is already one less than the total
+                        * length, so we don't subtract one from
+                        * PAGE_CACHE_SIZE
+                        */
+                       delalloc_to_write += (delalloc_end - delalloc_start +
+                                             PAGE_CACHE_SIZE) >>
+                                             PAGE_CACHE_SHIFT;
                         delalloc_start = delalloc_end + 1;
                 }
+               if (wbc->nr_to_write < delalloc_to_write) {
+                       int thresh = 8192;
+
+                       if (delalloc_to_write < thresh * 2)
+                               thresh = delalloc_to_write;
+                       wbc->nr_to_write = min_t(u64, delalloc_to_write,
+                                                thresh);
+               }
  
                 /* did the fill delalloc function already unlock and start
                  * the IO?
@@ -2160,15 +2239,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                         goto done_unlocked;
                 }
         }
-       lock_extent(tree, start, page_end, GFP_NOFS);
-
-       unlock_start = start;
-
         if (tree->ops && tree->ops->writepage_start_hook) {
                 ret = tree->ops->writepage_start_hook(page, start,
                                                       page_end);
                 if (ret == -EAGAIN) {
-                       unlock_extent(tree, start, page_end, GFP_NOFS);
                         redirty_page_for_writepage(wbc, page);
                         update_nr_written(page, wbc, nr_written);
                         unlock_page(page);
@@ -2184,12 +2258,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
         update_nr_written(page, wbc, nr_written + 1);
  
         end = page_end;
-       if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
-               printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
-
         if (last_byte <= start) {
-               clear_extent_dirty(tree, start, page_end, GFP_NOFS);
-               unlock_extent(tree, start, page_end, GFP_NOFS);
                 if (tree->ops && tree->ops->writepage_end_io_hook)
                         tree->ops->writepage_end_io_hook(page, start,
                                                          page_end, NULL, 1);
@@ -2197,13 +2266,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                 goto done;
         }
  
-       set_extent_uptodate(tree, start, page_end, GFP_NOFS);
         blocksize = inode->i_sb->s_blocksize;
  
         while (cur <= end) {
                 if (cur >= last_byte) {
-                       clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
-                       unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
                         if (tree->ops && tree->ops->writepage_end_io_hook)
                                 tree->ops->writepage_end_io_hook(page, cur,
                                                          page_end, NULL, 1);
@@ -2235,12 +2301,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                  */
                 if (compressed || block_start == EXTENT_MAP_HOLE ||
                     block_start == EXTENT_MAP_INLINE) {
-                       clear_extent_dirty(tree, cur,
-                                          cur + iosize - 1, GFP_NOFS);
-
-                       unlock_extent(tree, unlock_start, cur + iosize - 1,
-                                     GFP_NOFS);
-
                         /*
                          * end_io notification does not happen here for
                          * compressed extents
@@ -2265,13 +2325,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                 }
                 /* leave this out until we have a page_mkwrite call */
                 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
-                                  EXTENT_DIRTY, 0)) {
+                                  EXTENT_DIRTY, 0, NULL)) {
                         cur = cur + iosize;
                         pg_offset += iosize;
                         continue;
                 }
  
-               clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
                 if (tree->ops && tree->ops->writepage_io_hook) {
                         ret = tree->ops->writepage_io_hook(page, cur,
                                                 cur + iosize - 1);
@@ -2309,12 +2368,12 @@ done:
                 set_page_writeback(page);
                 end_page_writeback(page);
         }
-       if (unlock_start <= page_end)
-               unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
         unlock_page(page);
  
  done_unlocked:
  
+       /* drop our reference on any cached states */
+       free_extent_state(cached_state);
         return 0;
  }
  
@@ -2339,9 +2398,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
                              writepage_t writepage, void *data,
                              void (*flush_fn)(void *))
  {
-       struct backing_dev_info *bdi = mapping->backing_dev_info;
         int ret = 0;
         int done = 0;
+       int nr_to_write_done = 0;
         struct pagevec pvec;
         int nr_pages;
         pgoff_t index;
@@ -2361,7 +2420,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
                 scanned = 1;
         }
  retry:
-       while (!done && (index <= end) &&
+       while (!done && !nr_to_write_done && (index <= end) &&
                (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
                               PAGECACHE_TAG_DIRTY, min(end - index,
                                   (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
@@ -2412,12 +2471,15 @@ retry:
                                 unlock_page(page);
                                 ret = 0;
                         }
-                       if (ret || wbc->nr_to_write <= 0)
-                               done = 1;
-                       if (wbc->nonblocking && bdi_write_congested(bdi)) {
-                               wbc->encountered_congestion = 1;
+                       if (ret)
                                 done = 1;
-                       }
+
+                       /*
+                        * the filesystem may choose to bump up nr_to_write.
+                        * We have to make sure to honor the new nr_to_write
+                        * at any time
+                        */
+                       nr_to_write_done = wbc->nr_to_write <= 0;
                 }
                 pagevec_release(&pvec);
                 cond_resched();
@@ -2604,10 +2666,10 @@ int extent_invalidatepage(struct extent_io_tree *tree,
                 return 0;
  
         lock_extent(tree, start, end, GFP_NOFS);
-       wait_on_extent_writeback(tree, start, end);
+       wait_on_page_writeback(page);
         clear_extent_bit(tree, start, end,
                          EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
-                        1, 1, GFP_NOFS);
+                        1, 1, NULL, GFP_NOFS);
         return 0;
  }
  
@@ -2687,7 +2749,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
                     !isnew && !PageUptodate(page) &&
                     (block_off_end > to || block_off_start < from) &&
                     !test_range_bit(tree, block_start, cur_end,
-                                   EXTENT_UPTODATE, 1)) {
+                                   EXTENT_UPTODATE, 1, NULL)) {
                         u64 sector;
                         u64 extent_offset = block_start - em->start;
                         size_t iosize;
@@ -2701,7 +2763,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
                          */
                         set_extent_bit(tree, block_start,
                                        block_start + iosize - 1,
-                                      EXTENT_LOCKED, 0, NULL, GFP_NOFS);
+                                      EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
                         ret = submit_extent_page(READ, tree, page,
                                          sector, iosize, page_offset, em->bdev,
                                          NULL, 1,
@@ -2742,13 +2804,18 @@ int try_release_extent_state(struct extent_map_tree *map,
         int ret = 1;
  
         if (test_range_bit(tree, start, end,
-                          EXTENT_IOBITS | EXTENT_ORDERED, 0))
+                          EXTENT_IOBITS, 0, NULL))
                 ret = 0;
         else {
                 if ((mask & GFP_NOFS) == GFP_NOFS)
                         mask = GFP_NOFS;
-               clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
-                                1, 1, mask);
+               /*
+                * at this point we can safely clear everything except the
+                * locked bit and the nodatasum bit
+                */
+               clear_extent_bit(tree, start, end,
+                                ~(EXTENT_LOCKED | EXTENT_NODATASUM),
+                                0, 0, NULL, mask);
         }
         return ret;
  }
@@ -2771,29 +2838,28 @@ int try_release_extent_mapping(struct extent_map_tree *map,
                 u64 len;
                 while (start <= end) {
                         len = end - start + 1;
-                       spin_lock(&map->lock);
+                       write_lock(&map->lock);
                         em = lookup_extent_mapping(map, start, len);
                         if (!em || IS_ERR(em)) {
-                               spin_unlock(&map->lock);
+                               write_unlock(&map->lock);
                                 break;
                         }
                         if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
                             em->start != start) {
-                               spin_unlock(&map->lock);
+                               write_unlock(&map->lock);
                                 free_extent_map(em);
                                 break;
                         }
                         if (!test_range_bit(tree, em->start,
                                             extent_map_end(em) - 1,
-                                           EXTENT_LOCKED | EXTENT_WRITEBACK |
-                                           EXTENT_ORDERED,
-                                           0)) {
+                                           EXTENT_LOCKED | EXTENT_WRITEBACK,
+                                           0, NULL)) {
                                 remove_extent_mapping(map, em);
                                 /* once for the rb tree */
                                 free_extent_map(em);
                         }
                         start = extent_map_end(em);
-                       spin_unlock(&map->lock);
+                       write_unlock(&map->lock);
  
                         /* once for us */
                         free_extent_map(em);
@@ -3203,7 +3269,7 @@ int extent_range_uptodate(struct extent_io_tree *tree,
         int uptodate;
         unsigned long index;
  
-       ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
+       ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
         if (ret)
                 return 1;
         while (start <= end) {
@@ -3233,7 +3299,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
                 return 1;
  
         ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1);
+                          EXTENT_UPTODATE, 1, NULL);
         if (ret)
                 return ret;
  
@@ -3269,7 +3335,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
                 return 0;
  
         if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1)) {
+                          EXTENT_UPTODATE, 1, NULL)) {
                 return 0;
         }
  
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h

index 5bc20abf3f3d340b22248802e4c8d7d72f4dc4bb..14ed16fd862df22a93b7286c4c4811a8c12fa6ba 100644 (file)
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -13,10 +13,8 @@
  #define EXTENT_DEFRAG (1 << 6)
  #define EXTENT_DEFRAG_DONE (1 << 7)
  #define EXTENT_BUFFER_FILLED (1 << 8)
-#define EXTENT_ORDERED (1 << 9)
-#define EXTENT_ORDERED_METADATA (1 << 10)
-#define EXTENT_BOUNDARY (1 << 11)
-#define EXTENT_NODATASUM (1 << 12)
+#define EXTENT_BOUNDARY (1 << 9)
+#define EXTENT_NODATASUM (1 << 10)
  #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
  
  /* flags for bio submission */
@@ -142,6 +140,8 @@ int try_release_extent_state(struct extent_map_tree *map,
                              struct extent_io_tree *tree, struct page *page,
                              gfp_t mask);
  int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
+int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+                    int bits, struct extent_state **cached, gfp_t mask);
  int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
  int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
                     gfp_t mask);
@@ -155,11 +155,12 @@ u64 count_range_bits(struct extent_io_tree *tree,
                      u64 max_bytes, unsigned long bits);
  
  int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                  int bits, int filled);
+                  int bits, int filled, struct extent_state *cached_state);
  int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                       int bits, gfp_t mask);
  int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                    int bits, int wake, int delete, gfp_t mask);
+                    int bits, int wake, int delete, struct extent_state **cached,
+                    gfp_t mask);
  int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                     int bits, gfp_t mask);
  int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
@@ -282,5 +283,6 @@ int extent_clear_unlock_delalloc(struct inode *inode,
                                 int clear_unlock,
                                 int clear_delalloc, int clear_dirty,
                                 int set_writeback,
-                               int end_writeback);
+                               int end_writeback,
+                               int set_private2);
  #endif
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c

index 30c9365861e69602f96e4debc96bacf8889028e1..2c726b7b9faa17af725b8a5b8319148758b7dd91 100644 (file)
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -36,7 +36,7 @@ void extent_map_exit(void)
  void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
  {
         tree->map.rb_node = NULL;
-       spin_lock_init(&tree->lock);
+       rwlock_init(&tree->lock);
  }
  
  /**
@@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
         return 0;
  }
  
+int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
+{
+       int ret = 0;
+       struct extent_map *merge = NULL;
+       struct rb_node *rb;
+       struct extent_map *em;
+
+       write_lock(&tree->lock);
+       em = lookup_extent_mapping(tree, start, len);
+
+       WARN_ON(em->start != start || !em);
+
+       if (!em)
+               goto out;
+
+       clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+       if (em->start != 0) {
+               rb = rb_prev(&em->rb_node);
+               if (rb)
+                       merge = rb_entry(rb, struct extent_map, rb_node);
+               if (rb && mergable_maps(merge, em)) {
+                       em->start = merge->start;
+                       em->len += merge->len;
+                       em->block_len += merge->block_len;
+                       em->block_start = merge->block_start;
+                       merge->in_tree = 0;
+                       rb_erase(&merge->rb_node, &tree->map);
+                       free_extent_map(merge);
+               }
+       }
+
+       rb = rb_next(&em->rb_node);
+       if (rb)
+               merge = rb_entry(rb, struct extent_map, rb_node);
+       if (rb && mergable_maps(em, merge)) {
+               em->len += merge->len;
+               em->block_len += merge->len;
+               rb_erase(&merge->rb_node, &tree->map);
+               merge->in_tree = 0;
+               free_extent_map(merge);
+       }
+
+       free_extent_map(em);
+out:
+       write_unlock(&tree->lock);
+       return ret;
+
+}
+
  /**
   * add_extent_mapping - add new extent map to the extent tree
   * @tree:      tree to insert new map in
@@ -222,7 +272,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
                 ret = -EEXIST;
                 goto out;
         }
-       assert_spin_locked(&tree->lock);
         rb = tree_insert(&tree->map, em->start, &em->rb_node);
         if (rb) {
                 ret = -EEXIST;
@@ -285,7 +334,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
         struct rb_node *next = NULL;
         u64 end = range_end(start, len);
  
-       assert_spin_locked(&tree->lock);
         rb_node = __tree_search(&tree->map, start, &prev, &next);
         if (!rb_node && prev) {
                 em = rb_entry(prev, struct extent_map, rb_node);
@@ -318,6 +366,54 @@ out:
         return em;
  }
  
+/**
+ * search_extent_mapping - find a nearby extent map
+ * @tree:      tree to lookup in
+ * @start:     byte offset to start the search
+ * @len:       length of the lookup range
+ *
+ * Find and return the first extent_map struct in @tree that intersects the
+ * [start, len] range.
+ *
+ * If one can't be found, any nearby extent may be returned
+ */
+struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
+                                        u64 start, u64 len)
+{
+       struct extent_map *em;
+       struct rb_node *rb_node;
+       struct rb_node *prev = NULL;
+       struct rb_node *next = NULL;
+
+       rb_node = __tree_search(&tree->map, start, &prev, &next);
+       if (!rb_node && prev) {
+               em = rb_entry(prev, struct extent_map, rb_node);
+               goto found;
+       }
+       if (!rb_node && next) {
+               em = rb_entry(next, struct extent_map, rb_node);
+               goto found;
+       }
+       if (!rb_node) {
+               em = NULL;
+               goto out;
+       }
+       if (IS_ERR(rb_node)) {
+               em = ERR_PTR(PTR_ERR(rb_node));
+               goto out;
+       }
+       em = rb_entry(rb_node, struct extent_map, rb_node);
+       goto found;
+
+       em = NULL;
+       goto out;
+
+found:
+       atomic_inc(&em->refs);
+out:
+       return em;
+}
+
  /**
   * remove_extent_mapping - removes an extent_map from the extent tree
   * @tree:      extent tree to remove from
@@ -331,7 +427,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
         int ret = 0;
  
         WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
-       assert_spin_locked(&tree->lock);
         rb_erase(&em->rb_node, &tree->map);
         em->in_tree = 0;
         return ret;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h

index fb6eeef06bb0a88abcf9297c2c6d9156a35af552..ab6d74b6e6477dcfb1bf65df494c5749cc8d80d9 100644 (file)
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -31,7 +31,7 @@ struct extent_map {
  
  struct extent_map_tree {
         struct rb_root map;
-       spinlock_t lock;
+       rwlock_t lock;
  };
  
  static inline u64 extent_map_end(struct extent_map *em)
@@ -59,4 +59,7 @@ struct extent_map *alloc_extent_map(gfp_t mask);
  void free_extent_map(struct extent_map *em);
  int __init extent_map_init(void);
  void extent_map_exit(void);
+int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len);
+struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
+                                        u64 start, u64 len);
  #endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c

index 4b833972273a75218eb775cf8caf64dc1be80ed7..571ad3c13b47be3de640f4ce84253f3ce593b26d 100644 (file)
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -112,8 +112,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
         int err = 0;
         int i;
         struct inode *inode = fdentry(file)->d_inode;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
-       u64 hint_byte;
         u64 num_bytes;
         u64 start_pos;
         u64 end_of_last_block;
@@ -125,22 +123,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
                     root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
  
         end_of_last_block = start_pos + num_bytes - 1;
-
-       lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-       trans = btrfs_join_transaction(root, 1);
-       if (!trans) {
-               err = -ENOMEM;
-               goto out_unlock;
-       }
-       btrfs_set_trans_block_group(trans, inode);
-       hint_byte = 0;
-
-       set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-
-       /* check for reserved extents on each page, we don't want
-        * to reset the delalloc bit on things that already have
-        * extents reserved.
-        */
         btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
         for (i = 0; i < num_pages; i++) {
                 struct page *p = pages[i];
@@ -155,9 +137,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
                  * at this time.
                  */
         }
-       err = btrfs_end_transaction(trans, root);
-out_unlock:
-       unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
         return err;
  }
  
@@ -189,18 +168,18 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                 if (!split2)
                         split2 = alloc_extent_map(GFP_NOFS);
  
-               spin_lock(&em_tree->lock);
+               write_lock(&em_tree->lock);
                 em = lookup_extent_mapping(em_tree, start, len);
                 if (!em) {
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                         break;
                 }
                 flags = em->flags;
                 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
-                       spin_unlock(&em_tree->lock);
                         if (em->start <= start &&
                             (!testend || em->start + em->len >= start + len)) {
                                 free_extent_map(em);
+                               write_unlock(&em_tree->lock);
                                 break;
                         }
                         if (start < em->start) {
@@ -210,6 +189,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                                 start = em->start + em->len;
                         }
                         free_extent_map(em);
+                       write_unlock(&em_tree->lock);
                         continue;
                 }
                 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
@@ -260,7 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                         free_extent_map(split);
                         split = NULL;
                 }
-               spin_unlock(&em_tree->lock);
+               write_unlock(&em_tree->lock);
  
                 /* once for us */
                 free_extent_map(em);
@@ -289,7 +269,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
  noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root, struct inode *inode,
                        u64 start, u64 end, u64 locked_end,
-                      u64 inline_limit, u64 *hint_byte)
+                      u64 inline_limit, u64 *hint_byte, int drop_cache)
  {
         u64 extent_end = 0;
         u64 search_start = start;
@@ -314,7 +294,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
         int ret;
  
         inline_limit = 0;
-       btrfs_drop_extent_cache(inode, start, end - 1, 0);
+       if (drop_cache)
+               btrfs_drop_extent_cache(inode, start, end - 1, 0);
  
         path = btrfs_alloc_path();
         if (!path)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c

index 5edcee3a617f44e4608cefd709a6e8d7c38e14a5..5c2caad76212d2b5bfcc4329e255978525d6f5ed 100644 (file)
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -259,7 +259,9 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
  
  static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
  {
-       u64 max_bytes, possible_bytes;
+       u64 max_bytes;
+       u64 bitmap_bytes;
+       u64 extent_bytes;
  
         /*
          * The goal is to keep the total amount of memory used per 1gb of space
@@ -269,22 +271,27 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
         max_bytes = MAX_CACHE_BYTES_PER_GIG *
                 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
  
-       possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) +
-               (sizeof(struct btrfs_free_space) *
-                block_group->extents_thresh);
+       /*
+        * we want to account for 1 more bitmap than what we have so we can make
+        * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
+        * we add more bitmaps.
+        */
+       bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
  
-       if (possible_bytes > max_bytes) {
-               int extent_bytes = max_bytes -
-                       (block_group->total_bitmaps * PAGE_CACHE_SIZE);
+       if (bitmap_bytes >= max_bytes) {
+               block_group->extents_thresh = 0;
+               return;
+       }
  
-               if (extent_bytes <= 0) {
-                       block_group->extents_thresh = 0;
-                       return;
-               }
+       /*
+        * we want the extent entry threshold to always be at most 1/2 the maxw
+        * bytes we can have, or whatever is less than that.
+        */
+       extent_bytes = max_bytes - bitmap_bytes;
+       extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
  
-               block_group->extents_thresh = extent_bytes /
-                       (sizeof(struct btrfs_free_space));
-       }
+       block_group->extents_thresh =
+               div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
  }
  
  static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
@@ -403,6 +410,7 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
         BUG_ON(block_group->total_bitmaps >= max_bitmaps);
  
         info->offset = offset_to_bitmap(block_group, offset);
+       info->bytes = 0;
         link_free_space(block_group, info);
         block_group->total_bitmaps++;
  
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c

index 6b627c6118081ccea87d1acaf92000f995dda946..72ce3c173d6a3d111826e40ce8421b8f7f57f9b1 100644 (file)
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -149,6 +149,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
                 ptr = (unsigned long)(ref + 1);
                 ret = 0;
         } else if (ret < 0) {
+               if (ret == -EOVERFLOW)
+                       ret = -EMLINK;
                 goto out;
         } else {
                 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -177,8 +179,6 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
  
         ret = btrfs_insert_empty_item(trans, root, path, &key,
                                       sizeof(struct btrfs_inode_item));
-       if (ret == 0 && objectid > root->highest_inode)
-               root->highest_inode = objectid;
         return ret;
  }
  
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c

index 9abbced1123dc67139e2de6af415c0b41cfcfda5..c56eb5909172956da6354eef29ea36f7f698a8fc 100644 (file)
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -43,9 +43,10 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid)
                 slot = path->slots[0] - 1;
                 l = path->nodes[0];
                 btrfs_item_key_to_cpu(l, &found_key, slot);
-               *objectid = found_key.objectid;
+               *objectid = max_t(u64, found_key.objectid,
+                                 BTRFS_FIRST_FREE_OBJECTID - 1);
         } else {
-               *objectid = BTRFS_FIRST_FREE_OBJECTID;
+               *objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
         }
         ret = 0;
  error:
@@ -53,91 +54,27 @@ error:
         return ret;
  }
  
-/*
- * walks the btree of allocated inodes and find a hole.
- */
  int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 dirid, u64 *objectid)
  {
-       struct btrfs_path *path;
-       struct btrfs_key key;
         int ret;
-       int slot = 0;
-       u64 last_ino = 0;
-       int start_found;
-       struct extent_buffer *l;
-       struct btrfs_key search_key;
-       u64 search_start = dirid;
-
         mutex_lock(&root->objectid_mutex);
-       if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID &&
-           root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) {
-               *objectid = ++root->last_inode_alloc;
-               mutex_unlock(&root->objectid_mutex);
-               return 0;
-       }
-       path = btrfs_alloc_path();
-       BUG_ON(!path);
-       search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID);
-       search_key.objectid = search_start;
-       search_key.type = 0;
-       search_key.offset = 0;
-
-       start_found = 0;
-       ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0);
-       if (ret < 0)
-               goto error;
  
-       while (1) {
-               l = path->nodes[0];
-               slot = path->slots[0];
-               if (slot >= btrfs_header_nritems(l)) {
-                       ret = btrfs_next_leaf(root, path);
-                       if (ret == 0)
-                               continue;
-                       if (ret < 0)
-                               goto error;
-                       if (!start_found) {
-                               *objectid = search_start;
-                               start_found = 1;
-                               goto found;
-                       }
-                       *objectid = last_ino > search_start ?
-                               last_ino : search_start;
-                       goto found;
-               }
-               btrfs_item_key_to_cpu(l, &key, slot);
-               if (key.objectid >= search_start) {
-                       if (start_found) {
-                               if (last_ino < search_start)
-                                       last_ino = search_start;
-                               if (key.objectid > last_ino) {
-                                       *objectid = last_ino;
-                                       goto found;
-                               }
-                       } else if (key.objectid > search_start) {
-                               *objectid = search_start;
-                               goto found;
-                       }
-               }
-               if (key.objectid >= BTRFS_LAST_FREE_OBJECTID)
-                       break;
+       if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
+               ret = btrfs_find_highest_inode(root, &root->highest_objectid);
+               if (ret)
+                       goto out;
+       }
  
-               start_found = 1;
-               last_ino = key.objectid + 1;
-               path->slots[0]++;
+       if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
+               ret = -ENOSPC;
+               goto out;
         }
-       BUG_ON(1);
-found:
-       btrfs_release_path(root, path);
-       btrfs_free_path(path);
-       BUG_ON(*objectid < search_start);
-       mutex_unlock(&root->objectid_mutex);
-       return 0;
-error:
-       btrfs_release_path(root, path);
-       btrfs_free_path(path);
+
+       *objectid = ++root->highest_objectid;
+       ret = 0;
+out:
         mutex_unlock(&root->objectid_mutex);
         return ret;
  }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 9096fd0ca3ca447a7195f489f03d862340173494..e9b76bcd1c129e0d1f4664f54fefd33caba72e08 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
         }
  
         ret = btrfs_drop_extents(trans, root, inode, start,
-                                aligned_end, aligned_end, start, &hint_byte);
+                                aligned_end, aligned_end, start,
+                                &hint_byte, 1);
         BUG_ON(ret);
  
         if (isize > actual_end)
@@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
                                    inline_len, compressed_size,
                                    compressed_pages);
         BUG_ON(ret);
-       btrfs_drop_extent_cache(inode, start, aligned_end, 0);
+       btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
         return 0;
  }
  
@@ -425,7 +426,7 @@ again:
                         extent_clear_unlock_delalloc(inode,
                                                      &BTRFS_I(inode)->io_tree,
                                                      start, end, NULL, 1, 0,
-                                                    0, 1, 1, 1);
+                                                    0, 1, 1, 1, 0);
                         ret = 0;
                         goto free_pages_out;
                 }
@@ -611,9 +612,9 @@ static noinline int submit_compressed_extents(struct inode *inode,
                 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
  
                 while (1) {
-                       spin_lock(&em_tree->lock);
+                       write_lock(&em_tree->lock);
                         ret = add_extent_mapping(em_tree, em);
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                         if (ret != -EEXIST) {
                                 free_extent_map(em);
                                 break;
@@ -640,7 +641,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
                                              async_extent->start,
                                              async_extent->start +
                                              async_extent->ram_size - 1,
-                                            NULL, 1, 1, 0, 1, 1, 0);
+                                            NULL, 1, 1, 0, 1, 1, 0, 0);
  
                 ret = btrfs_submit_compressed_write(inode,
                                     async_extent->start,
@@ -713,7 +714,7 @@ static noinline int cow_file_range(struct inode *inode,
                         extent_clear_unlock_delalloc(inode,
                                                      &BTRFS_I(inode)->io_tree,
                                                      start, end, NULL, 1, 1,
-                                                    1, 1, 1, 1);
+                                                    1, 1, 1, 1, 0);
                         *nr_written = *nr_written +
                              (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
                         *page_started = 1;
@@ -725,6 +726,15 @@ static noinline int cow_file_range(struct inode *inode,
         BUG_ON(disk_num_bytes >
                btrfs_super_total_bytes(&root->fs_info->super_copy));
  
+
+       read_lock(&BTRFS_I(inode)->extent_tree.lock);
+       em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
+                                  start, num_bytes);
+       if (em) {
+               alloc_hint = em->block_start;
+               free_extent_map(em);
+       }
+       read_unlock(&BTRFS_I(inode)->extent_tree.lock);
         btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
  
         while (disk_num_bytes > 0) {
@@ -737,7 +747,6 @@ static noinline int cow_file_range(struct inode *inode,
                 em = alloc_extent_map(GFP_NOFS);
                 em->start = start;
                 em->orig_start = em->start;
-
                 ram_size = ins.offset;
                 em->len = ins.offset;
  
@@ -747,9 +756,9 @@ static noinline int cow_file_range(struct inode *inode,
                 set_bit(EXTENT_FLAG_PINNED, &em->flags);
  
                 while (1) {
-                       spin_lock(&em_tree->lock);
+                       write_lock(&em_tree->lock);
                         ret = add_extent_mapping(em_tree, em);
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                         if (ret != -EEXIST) {
                                 free_extent_map(em);
                                 break;
@@ -776,11 +785,14 @@ static noinline int cow_file_range(struct inode *inode,
                 /* we're not doing compressed IO, don't unlock the first
                  * page (which the caller expects to stay locked), don't
                  * clear any dirty bits and don't set any writeback bits
+                *
+                * Do set the Private2 bit so we know this page was properly
+                * setup for writepage
                  */
                 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
                                              start, start + ram_size - 1,
                                              locked_page, unlock, 1,
-                                            1, 0, 0, 0);
+                                            1, 0, 0, 0, 1);
                 disk_num_bytes -= cur_alloc_size;
                 num_bytes -= cur_alloc_size;
                 alloc_hint = ins.objectid + ins.offset;
@@ -853,7 +865,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
         int limit = 10 * 1024 * 1042;
  
         clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
-                        EXTENT_DELALLOC, 1, 0, GFP_NOFS);
+                        EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS);
         while (start < end) {
                 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
                 async_cow->inode = inode;
@@ -1080,9 +1092,9 @@ out_check:
                         em->bdev = root->fs_info->fs_devices->latest_bdev;
                         set_bit(EXTENT_FLAG_PINNED, &em->flags);
                         while (1) {
-                               spin_lock(&em_tree->lock);
+                               write_lock(&em_tree->lock);
                                 ret = add_extent_mapping(em_tree, em);
-                               spin_unlock(&em_tree->lock);
+                               write_unlock(&em_tree->lock);
                                 if (ret != -EEXIST) {
                                         free_extent_map(em);
                                         break;
@@ -1101,7 +1113,7 @@ out_check:
  
                 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
                                         cur_offset, cur_offset + num_bytes - 1,
-                                       locked_page, 1, 1, 1, 0, 0, 0);
+                                       locked_page, 1, 1, 1, 0, 0, 0, 1);
                 cur_offset = extent_end;
                 if (cur_offset > end)
                         break;
@@ -1374,10 +1386,8 @@ again:
         lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
  
         /* already ordered? We're done */
-       if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
-                            EXTENT_ORDERED, 0)) {
+       if (PagePrivate2(page))
                 goto out;
-       }
  
         ordered = btrfs_lookup_ordered_extent(inode, page_start);
         if (ordered) {
@@ -1413,11 +1423,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
         struct inode *inode = page->mapping->host;
         struct btrfs_writepage_fixup *fixup;
         struct btrfs_root *root = BTRFS_I(inode)->root;
-       int ret;
  
-       ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
-                            EXTENT_ORDERED, 0);
-       if (ret)
+       /* this page is properly in the ordered list */
+       if (TestClearPagePrivate2(page))
                 return 0;
  
         if (PageChecked(page))
@@ -1455,9 +1463,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
         BUG_ON(!path);
  
         path->leave_spinning = 1;
+
+       /*
+        * we may be replacing one extent in the tree with another.
+        * The new extent is pinned in the extent map, and we don't want
+        * to drop it from the cache until it is completely in the btree.
+        *
+        * So, tell btrfs_drop_extents to leave this extent in the cache.
+        * the caller is expected to unpin it and allow it to be merged
+        * with the others.
+        */
         ret = btrfs_drop_extents(trans, root, inode, file_pos,
                                  file_pos + num_bytes, locked_end,
-                                file_pos, &hint);
+                                file_pos, &hint, 0);
         BUG_ON(ret);
  
         ins.objectid = inode->i_ino;
@@ -1485,7 +1503,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
         btrfs_mark_buffer_dirty(leaf);
  
         inode_add_bytes(inode, num_bytes);
-       btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);
  
         ins.objectid = disk_bytenr;
         ins.offset = disk_num_bytes;
@@ -1596,6 +1613,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                                                 ordered_extent->len,
                                                 compressed, 0, 0,
                                                 BTRFS_FILE_EXTENT_REG);
+               unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
+                                  ordered_extent->file_offset,
+                                  ordered_extent->len);
                 BUG_ON(ret);
         }
         unlock_extent(io_tree, ordered_extent->file_offset,
@@ -1623,6 +1643,7 @@ nocow:
  static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
                                 struct extent_state *state, int uptodate)
  {
+       ClearPagePrivate2(page);
         return btrfs_finish_ordered_io(page->mapping->host, start, end);
  }
  
@@ -1669,13 +1690,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
                 failrec->last_mirror = 0;
                 failrec->bio_flags = 0;
  
-               spin_lock(&em_tree->lock);
+               read_lock(&em_tree->lock);
                 em = lookup_extent_mapping(em_tree, start, failrec->len);
                 if (em->start > start || em->start + em->len < start) {
                         free_extent_map(em);
                         em = NULL;
                 }
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
  
                 if (!em || IS_ERR(em)) {
                         kfree(failrec);
@@ -1794,7 +1815,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                 return 0;
  
         if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
-           test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) {
+           test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
                 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
                                   GFP_NOFS);
                 return 0;
@@ -2352,6 +2373,69 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
         return ret;
  }
  
+int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       struct inode *dir, u64 objectid,
+                       const char *name, int name_len)
+{
+       struct btrfs_path *path;
+       struct extent_buffer *leaf;
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+       u64 index;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
+                                  name, name_len, -1);
+       BUG_ON(!di || IS_ERR(di));
+
+       leaf = path->nodes[0];
+       btrfs_dir_item_key_to_cpu(leaf, di, &key);
+       WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
+       ret = btrfs_delete_one_dir_name(trans, root, path, di);
+       BUG_ON(ret);
+       btrfs_release_path(root, path);
+
+       ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
+                                objectid, root->root_key.objectid,
+                                dir->i_ino, &index, name, name_len);
+       if (ret < 0) {
+               BUG_ON(ret != -ENOENT);
+               di = btrfs_search_dir_index_item(root, path, dir->i_ino,
+                                                name, name_len);
+               BUG_ON(!di || IS_ERR(di));
+
+               leaf = path->nodes[0];
+               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+               btrfs_release_path(root, path);
+               index = key.offset;
+       }
+
+       di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
+                                        index, name, name_len, -1);
+       BUG_ON(!di || IS_ERR(di));
+
+       leaf = path->nodes[0];
+       btrfs_dir_item_key_to_cpu(leaf, di, &key);
+       WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
+       ret = btrfs_delete_one_dir_name(trans, root, path, di);
+       BUG_ON(ret);
+       btrfs_release_path(root, path);
+
+       btrfs_i_size_write(dir, dir->i_size - name_len * 2);
+       dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+       ret = btrfs_update_inode(trans, root, dir);
+       BUG_ON(ret);
+       dir->i_sb->s_dirt = 1;
+
+       btrfs_free_path(path);
+       return 0;
+}
+
  static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
  {
         struct inode *inode = dentry->d_inode;
@@ -2361,29 +2445,31 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
         struct btrfs_trans_handle *trans;
         unsigned long nr = 0;
  
-       /*
-        * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir
-        * the root of a subvolume or snapshot
-        */
         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
-           inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+           inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
                 return -ENOTEMPTY;
-       }
  
         trans = btrfs_start_transaction(root, 1);
         btrfs_set_trans_block_group(trans, dir);
  
+       if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+               err = btrfs_unlink_subvol(trans, root, dir,
+                                         BTRFS_I(inode)->location.objectid,
+                                         dentry->d_name.name,
+                                         dentry->d_name.len);
+               goto out;
+       }
+
         err = btrfs_orphan_add(trans, inode);
         if (err)
-               goto fail_trans;
+               goto out;
  
         /* now the directory is empty */
         err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
                                  dentry->d_name.name, dentry->d_name.len);
         if (!err)
                 btrfs_i_size_write(inode, 0);
-
-fail_trans:
+out:
         nr = trans->blocks_used;
         ret = btrfs_end_transaction_throttle(trans, root);
         btrfs_btree_balance_dirty(root, nr);
@@ -2935,7 +3021,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
                                                  cur_offset,
                                                  cur_offset + hole_size,
                                                  block_end,
-                                                cur_offset, &hint_byte);
+                                                cur_offset, &hint_byte, 1);
                         if (err)
                                 break;
                         err = btrfs_insert_file_extent(trans, root,
@@ -3003,6 +3089,11 @@ void btrfs_delete_inode(struct inode *inode)
         }
         btrfs_wait_ordered_range(inode, 0, (u64)-1);
  
+       if (inode->i_nlink > 0) {
+               BUG_ON(btrfs_root_refs(&root->root_item) != 0);
+               goto no_delete;
+       }
+
         btrfs_i_size_write(inode, 0);
         trans = btrfs_join_transaction(root, 1);
  
@@ -3070,29 +3161,67 @@ out_err:
   * is kind of like crossing a mount point.
   */
  static int fixup_tree_root_location(struct btrfs_root *root,
-                            struct btrfs_key *location,
-                            struct btrfs_root **sub_root,
-                            struct dentry *dentry)
+                                   struct inode *dir,
+                                   struct dentry *dentry,
+                                   struct btrfs_key *location,
+                                   struct btrfs_root **sub_root)
  {
-       struct btrfs_root_item *ri;
+       struct btrfs_path *path;
+       struct btrfs_root *new_root;
+       struct btrfs_root_ref *ref;
+       struct extent_buffer *leaf;
+       int ret;
+       int err = 0;
  
-       if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
-               return 0;
-       if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
-               return 0;
+       path = btrfs_alloc_path();
+       if (!path) {
+               err = -ENOMEM;
+               goto out;
+       }
  
-       *sub_root = btrfs_read_fs_root(root->fs_info, location,
-                                       dentry->d_name.name,
-                                       dentry->d_name.len);
-       if (IS_ERR(*sub_root))
-               return PTR_ERR(*sub_root);
+       err = -ENOENT;
+       ret = btrfs_find_root_ref(root->fs_info->tree_root, path,
+                                 BTRFS_I(dir)->root->root_key.objectid,
+                                 location->objectid);
+       if (ret) {
+               if (ret < 0)
+                       err = ret;
+               goto out;
+       }
  
-       ri = &(*sub_root)->root_item;
-       location->objectid = btrfs_root_dirid(ri);
-       btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
-       location->offset = 0;
+       leaf = path->nodes[0];
+       ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
+       if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino ||
+           btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
+               goto out;
  
-       return 0;
+       ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
+                                  (unsigned long)(ref + 1),
+                                  dentry->d_name.len);
+       if (ret)
+               goto out;
+
+       btrfs_release_path(root->fs_info->tree_root, path);
+
+       new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
+       if (IS_ERR(new_root)) {
+               err = PTR_ERR(new_root);
+               goto out;
+       }
+
+       if (btrfs_root_refs(&new_root->root_item) == 0) {
+               err = -ENOENT;
+               goto out;
+       }
+
+       *sub_root = new_root;
+       location->objectid = btrfs_root_dirid(&new_root->root_item);
+       location->type = BTRFS_INODE_ITEM_KEY;
+       location->offset = 0;
+       err = 0;
+out:
+       btrfs_free_path(path);
+       return err;
  }
  
  static void inode_tree_add(struct inode *inode)
@@ -3101,11 +3230,13 @@ static void inode_tree_add(struct inode *inode)
         struct btrfs_inode *entry;
         struct rb_node **p;
         struct rb_node *parent;
-
  again:
         p = &root->inode_tree.rb_node;
         parent = NULL;
  
+       if (hlist_unhashed(&inode->i_hash))
+               return;
+
         spin_lock(&root->inode_lock);
         while (*p) {
                 parent = *p;
@@ -3132,13 +3263,87 @@ again:
  static void inode_tree_del(struct inode *inode)
  {
         struct btrfs_root *root = BTRFS_I(inode)->root;
+       int empty = 0;
  
         spin_lock(&root->inode_lock);
         if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
                 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
                 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
+               empty = RB_EMPTY_ROOT(&root->inode_tree);
         }
         spin_unlock(&root->inode_lock);
+
+       if (empty && btrfs_root_refs(&root->root_item) == 0) {
+               synchronize_srcu(&root->fs_info->subvol_srcu);
+               spin_lock(&root->inode_lock);
+               empty = RB_EMPTY_ROOT(&root->inode_tree);
+               spin_unlock(&root->inode_lock);
+               if (empty)
+                       btrfs_add_dead_root(root);
+       }
+}
+
+int btrfs_invalidate_inodes(struct btrfs_root *root)
+{
+       struct rb_node *node;
+       struct rb_node *prev;
+       struct btrfs_inode *entry;
+       struct inode *inode;
+       u64 objectid = 0;
+
+       WARN_ON(btrfs_root_refs(&root->root_item) != 0);
+
+       spin_lock(&root->inode_lock);
+again:
+       node = root->inode_tree.rb_node;
+       prev = NULL;
+       while (node) {
+               prev = node;
+               entry = rb_entry(node, struct btrfs_inode, rb_node);
+
+               if (objectid < entry->vfs_inode.i_ino)
+                       node = node->rb_left;
+               else if (objectid > entry->vfs_inode.i_ino)
+                       node = node->rb_right;
+               else
+                       break;
+       }
+       if (!node) {
+               while (prev) {
+                       entry = rb_entry(prev, struct btrfs_inode, rb_node);
+                       if (objectid <= entry->vfs_inode.i_ino) {
+                               node = prev;
+                               break;
+                       }
+                       prev = rb_next(prev);
+               }
+       }
+       while (node) {
+               entry = rb_entry(node, struct btrfs_inode, rb_node);
+               objectid = entry->vfs_inode.i_ino + 1;
+               inode = igrab(&entry->vfs_inode);
+               if (inode) {
+                       spin_unlock(&root->inode_lock);
+                       if (atomic_read(&inode->i_count) > 1)
+                               d_prune_aliases(inode);
+                       /*
+                        * btrfs_drop_inode will remove it from
+                        * the inode cache when its usage count
+                        * hits zero.
+                        */
+                       iput(inode);
+                       cond_resched();
+                       spin_lock(&root->inode_lock);
+                       goto again;
+               }
+
+               if (cond_resched_lock(&root->inode_lock))
+                       goto again;
+
+               node = rb_next(node);
+       }
+       spin_unlock(&root->inode_lock);
+       return 0;
  }
  
  static noinline void init_btrfs_i(struct inode *inode)
@@ -3225,15 +3430,41 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
         return inode;
  }
  
+static struct inode *new_simple_dir(struct super_block *s,
+                                   struct btrfs_key *key,
+                                   struct btrfs_root *root)
+{
+       struct inode *inode = new_inode(s);
+
+       if (!inode)
+               return ERR_PTR(-ENOMEM);
+
+       init_btrfs_i(inode);
+
+       BTRFS_I(inode)->root = root;
+       memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
+       BTRFS_I(inode)->dummy_inode = 1;
+
+       inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
+       inode->i_op = &simple_dir_inode_operations;
+       inode->i_fop = &simple_dir_operations;
+       inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
+       inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+       return inode;
+}
+
  struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
  {
         struct inode *inode;
-       struct btrfs_inode *bi = BTRFS_I(dir);
-       struct btrfs_root *root = bi->root;
+       struct btrfs_root *root = BTRFS_I(dir)->root;
         struct btrfs_root *sub_root = root;
         struct btrfs_key location;
+       int index;
         int ret;
  
+       dentry->d_op = &btrfs_dentry_operations;
+
         if (dentry->d_name.len > BTRFS_NAME_LEN)
                 return ERR_PTR(-ENAMETOOLONG);
  
@@ -3242,29 +3473,50 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
         if (ret < 0)
                 return ERR_PTR(ret);
  
-       inode = NULL;
-       if (location.objectid) {
-               ret = fixup_tree_root_location(root, &location, &sub_root,
-                                               dentry);
-               if (ret < 0)
-                       return ERR_PTR(ret);
-               if (ret > 0)
-                       return ERR_PTR(-ENOENT);
+       if (location.objectid == 0)
+               return NULL;
+
+       if (location.type == BTRFS_INODE_ITEM_KEY) {
+               inode = btrfs_iget(dir->i_sb, &location, root);
+               return inode;
+       }
+
+       BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
+
+       index = srcu_read_lock(&root->fs_info->subvol_srcu);
+       ret = fixup_tree_root_location(root, dir, dentry,
+                                      &location, &sub_root);
+       if (ret < 0) {
+               if (ret != -ENOENT)
+                       inode = ERR_PTR(ret);
+               else
+                       inode = new_simple_dir(dir->i_sb, &location, sub_root);
+       } else {
                 inode = btrfs_iget(dir->i_sb, &location, sub_root);
-               if (IS_ERR(inode))
-                       return ERR_CAST(inode);
         }
+       srcu_read_unlock(&root->fs_info->subvol_srcu, index);
+
         return inode;
  }
  
+static int btrfs_dentry_delete(struct dentry *dentry)
+{
+       struct btrfs_root *root;
+
+       if (!dentry->d_inode)
+               return 0;
+
+       root = BTRFS_I(dentry->d_inode)->root;
+       if (btrfs_root_refs(&root->root_item) == 0)
+               return 1;
+       return 0;
+}
+
  static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
                                    struct nameidata *nd)
  {
         struct inode *inode;
  
-       if (dentry->d_name.len > BTRFS_NAME_LEN)
-               return ERR_PTR(-ENAMETOOLONG);
-
         inode = btrfs_lookup_dentry(dir, dentry);
         if (IS_ERR(inode))
                 return ERR_CAST(inode);
@@ -3603,9 +3855,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
         if (ret != 0)
                 goto fail;
  
-       if (objectid > root->highest_inode)
-               root->highest_inode = objectid;
-
         inode->i_uid = current_fsuid();
  
         if (dir && (dir->i_mode & S_ISGID)) {
@@ -3673,26 +3922,35 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
                    struct inode *parent_inode, struct inode *inode,
                    const char *name, int name_len, int add_backref, u64 index)
  {
-       int ret;
+       int ret = 0;
         struct btrfs_key key;
         struct btrfs_root *root = BTRFS_I(parent_inode)->root;
  
-       key.objectid = inode->i_ino;
-       btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
-       key.offset = 0;
+       if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+               memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
+       } else {
+               key.objectid = inode->i_ino;
+               btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+               key.offset = 0;
+       }
+
+       if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+               ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
+                                        key.objectid, root->root_key.objectid,
+                                        parent_inode->i_ino,
+                                        index, name, name_len);
+       } else if (add_backref) {
+               ret = btrfs_insert_inode_ref(trans, root,
+                                            name, name_len, inode->i_ino,
+                                            parent_inode->i_ino, index);
+       }
  
-       ret = btrfs_insert_dir_item(trans, root, name, name_len,
-                                   parent_inode->i_ino,
-                                   &key, btrfs_inode_type(inode),
-                                   index);
         if (ret == 0) {
-               if (add_backref) {
-                       ret = btrfs_insert_inode_ref(trans, root,
-                                                    name, name_len,
-                                                    inode->i_ino,
-                                                    parent_inode->i_ino,
-                                                    index);
-               }
+               ret = btrfs_insert_dir_item(trans, root, name, name_len,
+                                           parent_inode->i_ino, &key,
+                                           btrfs_inode_type(inode), index);
+               BUG_ON(ret);
+
                 btrfs_i_size_write(parent_inode, parent_inode->i_size +
                                    name_len * 2);
                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
@@ -3875,18 +4133,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
  
         err = btrfs_add_nondir(trans, dentry, inode, 1, index);
  
-       if (err)
-               drop_inode = 1;
-
-       btrfs_update_inode_block_group(trans, dir);
-       err = btrfs_update_inode(trans, root, inode);
-
-       if (err)
+       if (err) {
                 drop_inode = 1;
+       } else {
+               btrfs_update_inode_block_group(trans, dir);
+               err = btrfs_update_inode(trans, root, inode);
+               BUG_ON(err);
+               btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
+       }
  
         nr = trans->blocks_used;
-
-       btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
         btrfs_end_transaction_throttle(trans, root);
  fail:
         if (drop_inode) {
@@ -4064,11 +4320,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
         int compressed;
  
  again:
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
         em = lookup_extent_mapping(em_tree, start, len);
         if (em)
                 em->bdev = root->fs_info->fs_devices->latest_bdev;
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
  
         if (em) {
                 if (em->start > start || em->start + em->len <= start)
@@ -4215,6 +4471,11 @@ again:
                                 map = kmap(page);
                                 read_extent_buffer(leaf, map + pg_offset, ptr,
                                                    copy_size);
+                               if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
+                                       memset(map + pg_offset + copy_size, 0,
+                                              PAGE_CACHE_SIZE - pg_offset -
+                                              copy_size);
+                               }
                                 kunmap(page);
                         }
                         flush_dcache_page(page);
@@ -4259,7 +4520,7 @@ insert:
         }
  
         err = 0;
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
         ret = add_extent_mapping(em_tree, em);
         /* it is possible that someone inserted the extent into the tree
          * while we had the lock dropped.  It is also possible that
@@ -4299,7 +4560,7 @@ insert:
                         err = 0;
                 }
         }
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
  out:
         if (path)
                 btrfs_free_path(path);
@@ -4398,13 +4659,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
         u64 page_start = page_offset(page);
         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
  
+
+       /*
+        * we have the page locked, so new writeback can't start,
+        * and the dirty bit won't be cleared while we are here.
+        *
+        * Wait for IO on this page so that we can safely clear
+        * the PagePrivate2 bit and do ordered accounting
+        */
         wait_on_page_writeback(page);
+
         tree = &BTRFS_I(page->mapping->host)->io_tree;
         if (offset) {
                 btrfs_releasepage(page, GFP_NOFS);
                 return;
         }
-
         lock_extent(tree, page_start, page_end, GFP_NOFS);
         ordered = btrfs_lookup_ordered_extent(page->mapping->host,
                                            page_offset(page));
@@ -4415,16 +4684,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
                  */
                 clear_extent_bit(tree, page_start, page_end,
                                  EXTENT_DIRTY | EXTENT_DELALLOC |
-                                EXTENT_LOCKED, 1, 0, GFP_NOFS);
-               btrfs_finish_ordered_io(page->mapping->host,
-                                       page_start, page_end);
+                                EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
+               /*
+                * whoever cleared the private bit is responsible
+                * for the finish_ordered_io
+                */
+               if (TestClearPagePrivate2(page)) {
+                       btrfs_finish_ordered_io(page->mapping->host,
+                                               page_start, page_end);
+               }
                 btrfs_put_ordered_extent(ordered);
                 lock_extent(tree, page_start, page_end, GFP_NOFS);
         }
         clear_extent_bit(tree, page_start, page_end,
-                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
-                EXTENT_ORDERED,
-                1, 1, GFP_NOFS);
+                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
+                1, 1, NULL, GFP_NOFS);
         __btrfs_releasepage(page, GFP_NOFS);
  
         ClearPageChecked(page);
@@ -4521,11 +4795,14 @@ again:
         }
         ClearPageChecked(page);
         set_page_dirty(page);
+       SetPageUptodate(page);
  
         BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
  
  out_unlock:
+       if (!ret)
+               return VM_FAULT_LOCKED;
         unlock_page(page);
  out:
         return ret;
@@ -4594,11 +4871,11 @@ out:
   * create a new subvolume directory/inode (helper for the ioctl).
   */
  int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *new_root, struct dentry *dentry,
+                            struct btrfs_root *new_root,
                              u64 new_dirid, u64 alloc_hint)
  {
         struct inode *inode;
-       int error;
+       int err;
         u64 index = 0;
  
         inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
@@ -4611,11 +4888,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
         inode->i_nlink = 1;
         btrfs_i_size_write(inode, 0);
  
-       error = btrfs_update_inode(trans, new_root, inode);
-       if (error)
-               return error;
+       err = btrfs_update_inode(trans, new_root, inode);
+       BUG_ON(err);
  
-       d_instantiate(dentry, inode);
+       iput(inode);
         return 0;
  }
  
@@ -4693,6 +4969,16 @@ void btrfs_destroy_inode(struct inode *inode)
         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
  }
  
+void btrfs_drop_inode(struct inode *inode)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+
+       if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
+               generic_delete_inode(inode);
+       else
+               generic_drop_inode(inode);
+}
+
  static void init_once(void *foo)
  {
         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
@@ -4761,31 +5047,32 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
  {
         struct btrfs_trans_handle *trans;
         struct btrfs_root *root = BTRFS_I(old_dir)->root;
+       struct btrfs_root *dest = BTRFS_I(new_dir)->root;
         struct inode *new_inode = new_dentry->d_inode;
         struct inode *old_inode = old_dentry->d_inode;
         struct timespec ctime = CURRENT_TIME;
         u64 index = 0;
+       u64 root_objectid;
         int ret;
  
-       /* we're not allowed to rename between subvolumes */
-       if (BTRFS_I(old_inode)->root->root_key.objectid !=
-           BTRFS_I(new_dir)->root->root_key.objectid)
+       if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+               return -EPERM;
+
+       /* we only allow rename subvolume link between subvolumes */
+       if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
                 return -EXDEV;
  
-       if (S_ISDIR(old_inode->i_mode) && new_inode &&
-           new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
+       if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
+           (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID))
                 return -ENOTEMPTY;
-       }
  
-       /* to rename a snapshot or subvolume, we need to juggle the
-        * backrefs.  This isn't coded yet
-        */
-       if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
-               return -EXDEV;
+       if (S_ISDIR(old_inode->i_mode) && new_inode &&
+           new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
+               return -ENOTEMPTY;
  
         ret = btrfs_check_metadata_free_space(root);
         if (ret)
-               goto out_unlock;
+               return ret;
  
         /*
          * we're using rename to replace one file with another.
@@ -4796,8 +5083,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
             old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
                 filemap_flush(old_inode->i_mapping);
  
+       /* close the racy window with snapshot create/destroy ioctl */
+       if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
+               down_read(&root->fs_info->subvol_sem);
+
         trans = btrfs_start_transaction(root, 1);
+       btrfs_set_trans_block_group(trans, new_dir);
+
+       if (dest != root)
+               btrfs_record_root_in_trans(trans, dest);
  
+       ret = btrfs_set_inode_index(new_dir, &index);
+       if (ret)
+               goto out_fail;
+
+       if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+               /* force full log commit if subvolume involved. */
+               root->fs_info->last_trans_log_full_commit = trans->transid;
+       } else {
+               ret = btrfs_insert_inode_ref(trans, dest,
+                                            new_dentry->d_name.name,
+                                            new_dentry->d_name.len,
+                                            old_inode->i_ino,
+                                            new_dir->i_ino, index);
+               if (ret)
+                       goto out_fail;
+               /*
+                * this is an ugly little race, but the rename is required
+                * to make sure that if we crash, the inode is either at the
+                * old name or the new one.  pinning the log transaction lets
+                * us make sure we don't allow a log commit to come in after
+                * we unlink the name but before we add the new name back in.
+                */
+               btrfs_pin_log_trans(root);
+       }
         /*
          * make sure the inode gets flushed if it is replacing
          * something.
@@ -4807,18 +5126,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 btrfs_add_ordered_operation(trans, root, old_inode);
         }
  
-       /*
-        * this is an ugly little race, but the rename is required to make
-        * sure that if we crash, the inode is either at the old name
-        * or the new one.  pinning the log transaction lets us make sure
-        * we don't allow a log commit to come in after we unlink the
-        * name but before we add the new name back in.
-        */
-       btrfs_pin_log_trans(root);
-
-       btrfs_set_trans_block_group(trans, new_dir);
-
-       btrfs_inc_nlink(old_dentry->d_inode);
         old_dir->i_ctime = old_dir->i_mtime = ctime;
         new_dir->i_ctime = new_dir->i_mtime = ctime;
         old_inode->i_ctime = ctime;
@@ -4826,47 +5133,58 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         if (old_dentry->d_parent != new_dentry->d_parent)
                 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
  
-       ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
-                                old_dentry->d_name.name,
-                                old_dentry->d_name.len);
-       if (ret)
-               goto out_fail;
+       if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+               root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
+               ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
+                                       old_dentry->d_name.name,
+                                       old_dentry->d_name.len);
+       } else {
+               btrfs_inc_nlink(old_dentry->d_inode);
+               ret = btrfs_unlink_inode(trans, root, old_dir,
+                                        old_dentry->d_inode,
+                                        old_dentry->d_name.name,
+                                        old_dentry->d_name.len);
+       }
+       BUG_ON(ret);
  
         if (new_inode) {
                 new_inode->i_ctime = CURRENT_TIME;
-               ret = btrfs_unlink_inode(trans, root, new_dir,
-                                        new_dentry->d_inode,
-                                        new_dentry->d_name.name,
-                                        new_dentry->d_name.len);
-               if (ret)
-                       goto out_fail;
+               if (unlikely(new_inode->i_ino ==
+                            BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+                       root_objectid = BTRFS_I(new_inode)->location.objectid;
+                       ret = btrfs_unlink_subvol(trans, dest, new_dir,
+                                               root_objectid,
+                                               new_dentry->d_name.name,
+                                               new_dentry->d_name.len);
+                       BUG_ON(new_inode->i_nlink == 0);
+               } else {
+                       ret = btrfs_unlink_inode(trans, dest, new_dir,
+                                                new_dentry->d_inode,
+                                                new_dentry->d_name.name,
+                                                new_dentry->d_name.len);
+               }
+               BUG_ON(ret);
                 if (new_inode->i_nlink == 0) {
                         ret = btrfs_orphan_add(trans, new_dentry->d_inode);
-                       if (ret)
-                               goto out_fail;
+                       BUG_ON(ret);
                 }
-
         }
-       ret = btrfs_set_inode_index(new_dir, &index);
-       if (ret)
-               goto out_fail;
  
-       ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode,
-                            old_inode, new_dentry->d_name.name,
-                            new_dentry->d_name.len, 1, index);
-       if (ret)
-               goto out_fail;
+       ret = btrfs_add_link(trans, new_dir, old_inode,
+                            new_dentry->d_name.name,
+                            new_dentry->d_name.len, 0, index);
+       BUG_ON(ret);
  
-       btrfs_log_new_name(trans, old_inode, old_dir,
-                                      new_dentry->d_parent);
+       if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
+               btrfs_log_new_name(trans, old_inode, old_dir,
+                                  new_dentry->d_parent);
+               btrfs_end_log_trans(root);
+       }
  out_fail:
-
-       /* this btrfs_end_log_trans just allows the current
-        * log-sub transaction to complete
-        */
-       btrfs_end_log_trans(root);
         btrfs_end_transaction_throttle(trans, root);
-out_unlock:
+
+       if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
+               up_read(&root->fs_info->subvol_sem);
         return ret;
  }
  
@@ -5058,6 +5376,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
                                                   0, 0, 0,
                                                   BTRFS_FILE_EXTENT_PREALLOC);
                 BUG_ON(ret);
+               btrfs_drop_extent_cache(inode, cur_offset,
+                                       cur_offset + ins.offset -1, 0);
                 num_bytes -= ins.offset;
                 cur_offset += ins.offset;
                 alloc_hint = ins.objectid + ins.offset;
@@ -5223,6 +5543,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
         .lookup         = btrfs_lookup,
         .permission     = btrfs_permission,
  };
+
  static struct file_operations btrfs_dir_file_operations = {
         .llseek         = generic_file_llseek,
         .read           = generic_read_dir,
@@ -5269,6 +5590,7 @@ static const struct address_space_operations btrfs_aops = {
         .invalidatepage = btrfs_invalidatepage,
         .releasepage    = btrfs_releasepage,
         .set_page_dirty = btrfs_set_page_dirty,
+       .error_remove_page = generic_error_remove_page,
  };
  
  static const struct address_space_operations btrfs_symlink_aops = {
@@ -5309,3 +5631,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
         .listxattr      = btrfs_listxattr,
         .removexattr    = btrfs_removexattr,
  };
+
+struct dentry_operations btrfs_dentry_operations = {
+       .d_delete       = btrfs_dentry_delete,
+};
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index bd88f25889f7c5daf94bf0aec8645041bfb2ce60..a8577a7f26ab248984357a0b8b6505013cff3dba 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -230,8 +230,8 @@ static noinline int create_subvol(struct btrfs_root *root,
         struct btrfs_root_item root_item;
         struct btrfs_inode_item *inode_item;
         struct extent_buffer *leaf;
-       struct btrfs_root *new_root = root;
-       struct inode *dir;
+       struct btrfs_root *new_root;
+       struct inode *dir = dentry->d_parent->d_inode;
         int ret;
         int err;
         u64 objectid;
@@ -241,7 +241,7 @@ static noinline int create_subvol(struct btrfs_root *root,
  
         ret = btrfs_check_metadata_free_space(root);
         if (ret)
-               goto fail_commit;
+               return ret;
  
         trans = btrfs_start_transaction(root, 1);
         BUG_ON(!trans);
@@ -304,11 +304,17 @@ static noinline int create_subvol(struct btrfs_root *root,
         if (ret)
                 goto fail;
  
+       key.offset = (u64)-1;
+       new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
+       BUG_ON(IS_ERR(new_root));
+
+       btrfs_record_root_in_trans(trans, new_root);
+
+       ret = btrfs_create_subvol_root(trans, new_root, new_dirid,
+                                      BTRFS_I(dir)->block_group);
         /*
          * insert the directory item
          */
-       key.offset = (u64)-1;
-       dir = dentry->d_parent->d_inode;
         ret = btrfs_set_inode_index(dir, &index);
         BUG_ON(ret);
  
@@ -322,44 +328,18 @@ static noinline int create_subvol(struct btrfs_root *root,
         ret = btrfs_update_inode(trans, root, dir);
         BUG_ON(ret);
  
-       /* add the backref first */
         ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
-                                objectid, BTRFS_ROOT_BACKREF_KEY,
-                                root->root_key.objectid,
+                                objectid, root->root_key.objectid,
                                  dir->i_ino, index, name, namelen);
  
         BUG_ON(ret);
  
-       /* now add the forward ref */
-       ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
-                                root->root_key.objectid, BTRFS_ROOT_REF_KEY,
-                                objectid,
-                                dir->i_ino, index, name, namelen);
-
-       BUG_ON(ret);
-
-       ret = btrfs_commit_transaction(trans, root);
-       if (ret)
-               goto fail_commit;
-
-       new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
-       BUG_ON(!new_root);
-
-       trans = btrfs_start_transaction(new_root, 1);
-       BUG_ON(!trans);
-
-       ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid,
-                                      BTRFS_I(dir)->block_group);
-       if (ret)
-               goto fail;
-
+       d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
  fail:
         nr = trans->blocks_used;
-       err = btrfs_commit_transaction(trans, new_root);
+       err = btrfs_commit_transaction(trans, root);
         if (err && !ret)
                 ret = err;
-fail_commit:
-       btrfs_btree_balance_dirty(root, nr);
         return ret;
  }
  
@@ -420,14 +400,15 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
   * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
   * inside this filesystem so it's quite a bit simpler.
   */
-static noinline int btrfs_mksubvol(struct path *parent, char *name,
-                                  int mode, int namelen,
+static noinline int btrfs_mksubvol(struct path *parent,
+                                  char *name, int namelen,
                                    struct btrfs_root *snap_src)
  {
+       struct inode *dir  = parent->dentry->d_inode;
         struct dentry *dentry;
         int error;
  
-       mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
  
         dentry = lookup_one_len(name, parent->dentry, namelen);
         error = PTR_ERR(dentry);
@@ -438,99 +419,39 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name,
         if (dentry->d_inode)
                 goto out_dput;
  
-       if (!IS_POSIXACL(parent->dentry->d_inode))
-               mode &= ~current_umask();
-
         error = mnt_want_write(parent->mnt);
         if (error)
                 goto out_dput;
  
-       error = btrfs_may_create(parent->dentry->d_inode, dentry);
+       error = btrfs_may_create(dir, dentry);
         if (error)
                 goto out_drop_write;
  
-       /*
-        * Actually perform the low-level subvolume creation after all
-        * this VFS fuzz.
-        *
-        * Eventually we want to pass in an inode under which we create this
-        * subvolume, but for now all are under the filesystem root.
-        *
-        * Also we should pass on the mode eventually to allow creating new
-        * subvolume with specific mode bits.
-        */
+       down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
+
+       if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
+               goto out_up_read;
+
         if (snap_src) {
-               struct dentry *dir = dentry->d_parent;
-               struct dentry *test = dir->d_parent;
-               struct btrfs_path *path = btrfs_alloc_path();
-               int ret;
-               u64 test_oid;
-               u64 parent_oid = BTRFS_I(dir->d_inode)->root->root_key.objectid;
-
-               test_oid = snap_src->root_key.objectid;
-
-               ret = btrfs_find_root_ref(snap_src->fs_info->tree_root,
-                                         path, parent_oid, test_oid);
-               if (ret == 0)
-                       goto create;
-               btrfs_release_path(snap_src->fs_info->tree_root, path);
-
-               /* we need to make sure we aren't creating a directory loop
-                * by taking a snapshot of something that has our current
-                * subvol in its directory tree.  So, this loops through
-                * the dentries and checks the forward refs for each subvolume
-                * to see if is references the subvolume where we are
-                * placing this new snapshot.
-                */
-               while (1) {
-                       if (!test ||
-                           dir == snap_src->fs_info->sb->s_root ||
-                           test == snap_src->fs_info->sb->s_root ||
-                           test->d_inode->i_sb != snap_src->fs_info->sb) {
-                               break;
-                       }
-                       if (S_ISLNK(test->d_inode->i_mode)) {
-                               printk(KERN_INFO "Btrfs symlink in snapshot "
-                                      "path, failed\n");
-                               error = -EMLINK;
-                               btrfs_free_path(path);
-                               goto out_drop_write;
-                       }
-                       test_oid =
-                               BTRFS_I(test->d_inode)->root->root_key.objectid;
-                       ret = btrfs_find_root_ref(snap_src->fs_info->tree_root,
-                                 path, test_oid, parent_oid);
-                       if (ret == 0) {
-                               printk(KERN_INFO "Btrfs snapshot creation "
-                                      "failed, looping\n");
-                               error = -EMLINK;
-                               btrfs_free_path(path);
-                               goto out_drop_write;
-                       }
-                       btrfs_release_path(snap_src->fs_info->tree_root, path);
-                       test = test->d_parent;
-               }
-create:
-               btrfs_free_path(path);
-               error = create_snapshot(snap_src, dentry, name, namelen);
+               error = create_snapshot(snap_src, dentry,
+                                       name, namelen);
         } else {
-               error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root,
-                                     dentry, name, namelen);
+               error = create_subvol(BTRFS_I(dir)->root, dentry,
+                                     name, namelen);
         }
-       if (error)
-               goto out_drop_write;
-
-       fsnotify_mkdir(parent->dentry->d_inode, dentry);
+       if (!error)
+               fsnotify_mkdir(dir, dentry);
+out_up_read:
+       up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
  out_drop_write:
         mnt_drop_write(parent->mnt);
  out_dput:
         dput(dentry);
  out_unlock:
-       mutex_unlock(&parent->dentry->d_inode->i_mutex);
+       mutex_unlock(&dir->i_mutex);
         return error;
  }
  
-
  static int btrfs_defrag_file(struct file *file)
  {
         struct inode *inode = fdentry(file)->d_inode;
@@ -596,9 +517,8 @@ again:
                 clear_page_dirty_for_io(page);
  
                 btrfs_set_extent_delalloc(inode, page_start, page_end);
-
-               unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
                 set_page_dirty(page);
+               unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
                 unlock_page(page);
                 page_cache_release(page);
                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
@@ -609,7 +529,8 @@ out_unlock:
         return 0;
  }
  
-static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
+static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
+                                       void __user *arg)
  {
         u64 new_size;
         u64 old_size;
@@ -718,10 +639,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
  {
         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
         struct btrfs_ioctl_vol_args *vol_args;
-       struct btrfs_dir_item *di;
-       struct btrfs_path *path;
         struct file *src_file;
-       u64 root_dirid;
         int namelen;
         int ret = 0;
  
@@ -739,32 +657,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
                 goto out;
         }
  
-       path = btrfs_alloc_path();
-       if (!path) {
-               ret = -ENOMEM;
-               goto out;
-       }
-
-       root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
-       di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
-                           path, root_dirid,
-                           vol_args->name, namelen, 0);
-       btrfs_free_path(path);
-
-       if (di && !IS_ERR(di)) {
-               ret = -EEXIST;
-               goto out;
-       }
-
-       if (IS_ERR(di)) {
-               ret = PTR_ERR(di);
-               goto out;
-       }
-
         if (subvol) {
-               ret = btrfs_mksubvol(&file->f_path, vol_args->name,
-                                    file->f_path.dentry->d_inode->i_mode,
-                                    namelen, NULL);
+               ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
+                                    NULL);
         } else {
                 struct inode *src_inode;
                 src_file = fget(vol_args->fd);
@@ -781,17 +676,156 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
                         fput(src_file);
                         goto out;
                 }
-               ret = btrfs_mksubvol(&file->f_path, vol_args->name,
-                            file->f_path.dentry->d_inode->i_mode,
-                            namelen, BTRFS_I(src_inode)->root);
+               ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
+                                    BTRFS_I(src_inode)->root);
                 fput(src_file);
         }
-
  out:
         kfree(vol_args);
         return ret;
  }
  
+/*
+ * helper to check if the subvolume references other subvolumes
+ */
+static noinline int may_destroy_subvol(struct btrfs_root *root)
+{
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = root->root_key.objectid;
+       key.type = BTRFS_ROOT_REF_KEY;
+       key.offset = (u64)-1;
+
+       ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
+                               &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+       BUG_ON(ret == 0);
+
+       ret = 0;
+       if (path->slots[0] > 0) {
+               path->slots[0]--;
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+               if (key.objectid == root->root_key.objectid &&
+                   key.type == BTRFS_ROOT_REF_KEY)
+                       ret = -ENOTEMPTY;
+       }
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static noinline int btrfs_ioctl_snap_destroy(struct file *file,
+                                            void __user *arg)
+{
+       struct dentry *parent = fdentry(file);
+       struct dentry *dentry;
+       struct inode *dir = parent->d_inode;
+       struct inode *inode;
+       struct btrfs_root *root = BTRFS_I(dir)->root;
+       struct btrfs_root *dest = NULL;
+       struct btrfs_ioctl_vol_args *vol_args;
+       struct btrfs_trans_handle *trans;
+       int namelen;
+       int ret;
+       int err = 0;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       vol_args = memdup_user(arg, sizeof(*vol_args));
+       if (IS_ERR(vol_args))
+               return PTR_ERR(vol_args);
+
+       vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+       namelen = strlen(vol_args->name);
+       if (strchr(vol_args->name, '/') ||
+           strncmp(vol_args->name, "..", namelen) == 0) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       err = mnt_want_write(file->f_path.mnt);
+       if (err)
+               goto out;
+
+       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       dentry = lookup_one_len(vol_args->name, parent, namelen);
+       if (IS_ERR(dentry)) {
+               err = PTR_ERR(dentry);
+               goto out_unlock_dir;
+       }
+
+       if (!dentry->d_inode) {
+               err = -ENOENT;
+               goto out_dput;
+       }
+
+       inode = dentry->d_inode;
+       if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
+               err = -EINVAL;
+               goto out_dput;
+       }
+
+       dest = BTRFS_I(inode)->root;
+
+       mutex_lock(&inode->i_mutex);
+       err = d_invalidate(dentry);
+       if (err)
+               goto out_unlock;
+
+       down_write(&root->fs_info->subvol_sem);
+
+       err = may_destroy_subvol(dest);
+       if (err)
+               goto out_up_write;
+
+       trans = btrfs_start_transaction(root, 1);
+       ret = btrfs_unlink_subvol(trans, root, dir,
+                               dest->root_key.objectid,
+                               dentry->d_name.name,
+                               dentry->d_name.len);
+       BUG_ON(ret);
+
+       btrfs_record_root_in_trans(trans, dest);
+
+       memset(&dest->root_item.drop_progress, 0,
+               sizeof(dest->root_item.drop_progress));
+       dest->root_item.drop_level = 0;
+       btrfs_set_root_refs(&dest->root_item, 0);
+
+       ret = btrfs_insert_orphan_item(trans,
+                               root->fs_info->tree_root,
+                               dest->root_key.objectid);
+       BUG_ON(ret);
+
+       ret = btrfs_commit_transaction(trans, root);
+       BUG_ON(ret);
+       inode->i_flags |= S_DEAD;
+out_up_write:
+       up_write(&root->fs_info->subvol_sem);
+out_unlock:
+       mutex_unlock(&inode->i_mutex);
+       if (!err) {
+               btrfs_invalidate_inodes(dest);
+               d_delete(dentry);
+       }
+out_dput:
+       dput(dentry);
+out_unlock_dir:
+       mutex_unlock(&dir->i_mutex);
+       mnt_drop_write(file->f_path.mnt);
+out:
+       kfree(vol_args);
+       return err;
+}
+
  static int btrfs_ioctl_defrag(struct file *file)
  {
         struct inode *inode = fdentry(file)->d_inode;
@@ -865,8 +899,8 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
         return ret;
  }
  
-static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
-               u64 off, u64 olen, u64 destoff)
+static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
+                                      u64 off, u64 olen, u64 destoff)
  {
         struct inode *inode = fdentry(file)->d_inode;
         struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -976,7 +1010,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
  
         /* punch hole in destination first */
         btrfs_drop_extents(trans, root, inode, off, off + len,
-                          off + len, 0, &hint_byte);
+                          off + len, 0, &hint_byte, 1);
  
         /* clone data */
         key.objectid = src->i_ino;
@@ -1071,8 +1105,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                                         datao += off - key.offset;
                                         datal -= off - key.offset;
                                 }
-                               if (key.offset + datao + datal + key.offset >
-                                   off + len)
+                               if (key.offset + datao + datal > off + len)
                                         datal = off + len - key.offset - datao;
                                 /* disko == 0 means it's a hole */
                                 if (!disko)
@@ -1258,6 +1291,8 @@ long btrfs_ioctl(struct file *file, unsigned int
                 return btrfs_ioctl_snap_create(file, argp, 0);
         case BTRFS_IOC_SUBVOL_CREATE:
                 return btrfs_ioctl_snap_create(file, argp, 1);
+       case BTRFS_IOC_SNAP_DESTROY:
+               return btrfs_ioctl_snap_destroy(file, argp);
         case BTRFS_IOC_DEFRAG:
                 return btrfs_ioctl_defrag(file);
         case BTRFS_IOC_RESIZE:
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h

index b320b103fa132dadf345168a80fff04bd41be8b3..bc49914475ebbebc5271f3d26d486d730610178d 100644 (file)
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -65,5 +65,6 @@ struct btrfs_ioctl_clone_range_args {
  
  #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
                                    struct btrfs_ioctl_vol_args)
-
+#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
+                               struct btrfs_ioctl_vol_args)
  #endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c

index 7b2f401e604e3e4706ba2b8d8b9646949cfca3db..b5d6d24726b0014463f15041bbc9ddd74fdb110e 100644 (file)
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
   *
   * len is the length of the extent
   *
- * This also sets the EXTENT_ORDERED bit on the range in the inode.
- *
   * The tree is given a single reference on the ordered extent that was
   * inserted.
   */
@@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
         entry->start = start;
         entry->len = len;
         entry->disk_len = disk_len;
+       entry->bytes_left = len;
         entry->inode = inode;
         if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
                 set_bit(type, &entry->flags);
@@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
                            &entry->rb_node);
         BUG_ON(node);
  
-       set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
-                          entry_end(entry) - 1, GFP_NOFS);
-
         spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
         list_add_tail(&entry->root_extent_list,
                       &BTRFS_I(inode)->root->fs_info->ordered_extents);
@@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
         struct btrfs_ordered_inode_tree *tree;
         struct rb_node *node;
         struct btrfs_ordered_extent *entry;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
         int ret;
  
         tree = &BTRFS_I(inode)->ordered_tree;
         mutex_lock(&tree->mutex);
-       clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
-                            GFP_NOFS);
         node = tree_search(tree, file_offset);
         if (!node) {
                 ret = 1;
@@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
                 goto out;
         }
  
-       ret = test_range_bit(io_tree, entry->file_offset,
-                            entry->file_offset + entry->len - 1,
-                            EXTENT_ORDERED, 0);
-       if (ret == 0)
+       if (io_size > entry->bytes_left) {
+               printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
+                      (unsigned long long)entry->bytes_left,
+                      (unsigned long long)io_size);
+       }
+       entry->bytes_left -= io_size;
+       if (entry->bytes_left == 0)
                 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+       else
+               ret = 1;
  out:
         mutex_unlock(&tree->mutex);
         return ret == 0;
@@ -476,6 +474,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
         u64 orig_end;
         u64 wait_end;
         struct btrfs_ordered_extent *ordered;
+       int found;
  
         if (start + len < start) {
                 orig_end = INT_LIMIT(loff_t);
@@ -502,6 +501,7 @@ again:
                                            orig_end >> PAGE_CACHE_SHIFT);
  
         end = orig_end;
+       found = 0;
         while (1) {
                 ordered = btrfs_lookup_first_ordered_extent(inode, end);
                 if (!ordered)
@@ -514,6 +514,7 @@ again:
                         btrfs_put_ordered_extent(ordered);
                         break;
                 }
+               found++;
                 btrfs_start_ordered_extent(inode, ordered, 1);
                 end = ordered->file_offset;
                 btrfs_put_ordered_extent(ordered);
@@ -521,8 +522,8 @@ again:
                         break;
                 end--;
         }
-       if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
-                          EXTENT_ORDERED | EXTENT_DELALLOC, 0)) {
+       if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
+                          EXTENT_DELALLOC, 0, NULL)) {
                 schedule_timeout(1);
                 goto again;
         }
@@ -613,7 +614,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
          */
         if (test_range_bit(io_tree, disk_i_size,
                            ordered->file_offset + ordered->len - 1,
-                          EXTENT_DELALLOC, 0)) {
+                          EXTENT_DELALLOC, 0, NULL)) {
                 goto out;
         }
         /*
@@ -664,7 +665,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
          */
         if (i_size_test > entry_end(ordered) &&
             !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
-                          EXTENT_DELALLOC, 0)) {
+                          EXTENT_DELALLOC, 0, NULL)) {
                 new_i_size = min_t(u64, i_size_test, i_size_read(inode));
         }
         BTRFS_I(inode)->disk_i_size = new_i_size;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h

index 3d31c8827b013407d6f4b14796896a6aac8ba53b..993a7ea45c702a580c784908584408684913e233 100644 (file)
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -85,6 +85,9 @@ struct btrfs_ordered_extent {
         /* extent length on disk */
         u64 disk_len;
  
+       /* number of bytes that still need writing */
+       u64 bytes_left;
+
         /* flags (described above) */
         unsigned long flags;
  
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c

index 3c0d52af4f806365bcd601208e00f25931ab29f2..79cba5fbc28ef8061e2d599110a3a525216a7bfd 100644 (file)
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -65,3 +65,23 @@ out:
         btrfs_free_path(path);
         return ret;
  }
+
+int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset)
+{
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       int ret;
+
+       key.objectid = BTRFS_ORPHAN_OBJECTID;
+       key.type = BTRFS_ORPHAN_ITEM_KEY;
+       key.offset = offset;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+
+       btrfs_free_path(path);
+       return ret;
+}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c

index c04f7f212602c4bcb71e58db938fcd0d8a9128ad..361ad323faaceb4ed3313bd1e2904ea29ea7d2f3 100644 (file)
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -121,6 +121,15 @@ struct inodevec {
         int nr;
  };
  
+#define MAX_EXTENTS 128
+
+struct file_extent_cluster {
+       u64 start;
+       u64 end;
+       u64 boundary[MAX_EXTENTS];
+       unsigned int nr;
+};
+
  struct reloc_control {
         /* block group to relocate */
         struct btrfs_block_group_cache *block_group;
@@ -2180,7 +2189,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize,
                                 struct reloc_control *rc)
  {
         if (test_range_bit(&rc->processed_blocks, bytenr,
-                          bytenr + blocksize - 1, EXTENT_DIRTY, 1))
+                          bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
                 return 1;
         return 0;
  }
@@ -2529,56 +2538,94 @@ out:
  }
  
  static noinline_for_stack
-int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
+int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
+                        u64 block_start)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map *em;
+       int ret = 0;
+
+       em = alloc_extent_map(GFP_NOFS);
+       if (!em)
+               return -ENOMEM;
+
+       em->start = start;
+       em->len = end + 1 - start;
+       em->block_len = em->len;
+       em->block_start = block_start;
+       em->bdev = root->fs_info->fs_devices->latest_bdev;
+       set_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+       lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+       while (1) {
+               write_lock(&em_tree->lock);
+               ret = add_extent_mapping(em_tree, em);
+               write_unlock(&em_tree->lock);
+               if (ret != -EEXIST) {
+                       free_extent_map(em);
+                       break;
+               }
+               btrfs_drop_extent_cache(inode, start, end, 0);
+       }
+       unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+       return ret;
+}
+
+static int relocate_file_extent_cluster(struct inode *inode,
+                                       struct file_extent_cluster *cluster)
  {
         u64 page_start;
         u64 page_end;
-       unsigned long i;
-       unsigned long first_index;
+       u64 offset = BTRFS_I(inode)->index_cnt;
+       unsigned long index;
         unsigned long last_index;
-       unsigned int total_read = 0;
-       unsigned int total_dirty = 0;
+       unsigned int dirty_page = 0;
         struct page *page;
         struct file_ra_state *ra;
-       struct btrfs_ordered_extent *ordered;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       int nr = 0;
         int ret = 0;
  
+       if (!cluster->nr)
+               return 0;
+
         ra = kzalloc(sizeof(*ra), GFP_NOFS);
         if (!ra)
                 return -ENOMEM;
  
+       index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
+       last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
+
         mutex_lock(&inode->i_mutex);
-       first_index = start >> PAGE_CACHE_SHIFT;
-       last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
  
-       /* make sure the dirty trick played by the caller work */
-       while (1) {
-               ret = invalidate_inode_pages2_range(inode->i_mapping,
-                                                   first_index, last_index);
-               if (ret != -EBUSY)
-                       break;
-               schedule_timeout(HZ/10);
-       }
+       i_size_write(inode, cluster->end + 1 - offset);
+       ret = setup_extent_mapping(inode, cluster->start - offset,
+                                  cluster->end - offset, cluster->start);
         if (ret)
                 goto out_unlock;
  
         file_ra_state_init(ra, inode->i_mapping);
  
-       for (i = first_index ; i <= last_index; i++) {
-               if (total_read % ra->ra_pages == 0) {
-                       btrfs_force_ra(inode->i_mapping, ra, NULL, i,
-                               min(last_index, ra->ra_pages + i - 1));
-               }
-               total_read++;
-again:
-               if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
-                       BUG_ON(1);
-               page = grab_cache_page(inode->i_mapping, i);
+       WARN_ON(cluster->start != cluster->boundary[0]);
+       while (index <= last_index) {
+               page = find_lock_page(inode->i_mapping, index);
                 if (!page) {
-                       ret = -ENOMEM;
-                       goto out_unlock;
+                       page_cache_sync_readahead(inode->i_mapping,
+                                                 ra, NULL, index,
+                                                 last_index + 1 - index);
+                       page = grab_cache_page(inode->i_mapping, index);
+                       if (!page) {
+                               ret = -ENOMEM;
+                               goto out_unlock;
+                       }
+               }
+
+               if (PageReadahead(page)) {
+                       page_cache_async_readahead(inode->i_mapping,
+                                                  ra, NULL, page, index,
+                                                  last_index + 1 - index);
                 }
+
                 if (!PageUptodate(page)) {
                         btrfs_readpage(NULL, page);
                         lock_page(page);
@@ -2589,75 +2636,79 @@ again:
                                 goto out_unlock;
                         }
                 }
-               wait_on_page_writeback(page);
  
                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
                 page_end = page_start + PAGE_CACHE_SIZE - 1;
-               lock_extent(io_tree, page_start, page_end, GFP_NOFS);
-
-               ordered = btrfs_lookup_ordered_extent(inode, page_start);
-               if (ordered) {
-                       unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
-                       unlock_page(page);
-                       page_cache_release(page);
-                       btrfs_start_ordered_extent(inode, ordered, 1);
-                       btrfs_put_ordered_extent(ordered);
-                       goto again;
-               }
+
+               lock_extent(&BTRFS_I(inode)->io_tree,
+                           page_start, page_end, GFP_NOFS);
+
                 set_page_extent_mapped(page);
  
-               if (i == first_index)
-                       set_extent_bits(io_tree, page_start, page_end,
+               if (nr < cluster->nr &&
+                   page_start + offset == cluster->boundary[nr]) {
+                       set_extent_bits(&BTRFS_I(inode)->io_tree,
+                                       page_start, page_end,
                                         EXTENT_BOUNDARY, GFP_NOFS);
+                       nr++;
+               }
                 btrfs_set_extent_delalloc(inode, page_start, page_end);
  
                 set_page_dirty(page);
-               total_dirty++;
+               dirty_page++;
  
-               unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
+               unlock_extent(&BTRFS_I(inode)->io_tree,
+                             page_start, page_end, GFP_NOFS);
                 unlock_page(page);
                 page_cache_release(page);
+
+               index++;
+               if (nr < cluster->nr &&
+                   page_end + 1 + offset == cluster->boundary[nr]) {
+                       balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+                                                          dirty_page);
+                       dirty_page = 0;
+               }
+       }
+       if (dirty_page) {
+               balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+                                                  dirty_page);
         }
+       WARN_ON(nr != cluster->nr);
  out_unlock:
         mutex_unlock(&inode->i_mutex);
         kfree(ra);
-       balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
         return ret;
  }
  
  static noinline_for_stack
-int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key)
+int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
+                        struct file_extent_cluster *cluster)
  {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-       struct extent_map *em;
-       u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt;
-       u64 end = start + extent_key->offset - 1;
-
-       em = alloc_extent_map(GFP_NOFS);
-       em->start = start;
-       em->len = extent_key->offset;
-       em->block_len = extent_key->offset;
-       em->block_start = extent_key->objectid;
-       em->bdev = root->fs_info->fs_devices->latest_bdev;
-       set_bit(EXTENT_FLAG_PINNED, &em->flags);
+       int ret;
  
-       /* setup extent map to cheat btrfs_readpage */
-       lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
-       while (1) {
-               int ret;
-               spin_lock(&em_tree->lock);
-               ret = add_extent_mapping(em_tree, em);
-               spin_unlock(&em_tree->lock);
-               if (ret != -EEXIST) {
-                       free_extent_map(em);
-                       break;
-               }
-               btrfs_drop_extent_cache(inode, start, end, 0);
+       if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
+               ret = relocate_file_extent_cluster(inode, cluster);
+               if (ret)
+                       return ret;
+               cluster->nr = 0;
         }
-       unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
  
-       return relocate_inode_pages(inode, start, extent_key->offset);
+       if (!cluster->nr)
+               cluster->start = extent_key->objectid;
+       else
+               BUG_ON(cluster->nr >= MAX_EXTENTS);
+       cluster->end = extent_key->objectid + extent_key->offset - 1;
+       cluster->boundary[cluster->nr] = extent_key->objectid;
+       cluster->nr++;
+
+       if (cluster->nr >= MAX_EXTENTS) {
+               ret = relocate_file_extent_cluster(inode, cluster);
+               if (ret)
+                       return ret;
+               cluster->nr = 0;
+       }
+       return 0;
  }
  
  #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
@@ -3203,10 +3254,12 @@ static int check_extent_flags(u64 flags)
         return 0;
  }
  
+
  static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
  {
         struct rb_root blocks = RB_ROOT;
         struct btrfs_key key;
+       struct file_extent_cluster *cluster;
         struct btrfs_trans_handle *trans = NULL;
         struct btrfs_path *path;
         struct btrfs_extent_item *ei;
@@ -3216,10 +3269,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
         int ret;
         int err = 0;
  
+       cluster = kzalloc(sizeof(*cluster), GFP_NOFS);
+       if (!cluster)
+               return -ENOMEM;
+
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
  
+       rc->extents_found = 0;
+       rc->extents_skipped = 0;
+
         rc->search_start = rc->block_group->key.objectid;
         clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
                           GFP_NOFS);
@@ -3306,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
                 }
  
                 nr = trans->blocks_used;
-               btrfs_end_transaction_throttle(trans, rc->extent_root);
+               btrfs_end_transaction(trans, rc->extent_root);
                 trans = NULL;
                 btrfs_btree_balance_dirty(rc->extent_root, nr);
  
                 if (rc->stage == MOVE_DATA_EXTENTS &&
                     (flags & BTRFS_EXTENT_FLAG_DATA)) {
                         rc->found_file_extent = 1;
-                       ret = relocate_data_extent(rc->data_inode, &key);
+                       ret = relocate_data_extent(rc->data_inode,
+                                                  &key, cluster);
                         if (ret < 0) {
                                 err = ret;
                                 break;
@@ -3328,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
                 btrfs_btree_balance_dirty(rc->extent_root, nr);
         }
  
+       if (!err) {
+               ret = relocate_file_extent_cluster(rc->data_inode, cluster);
+               if (ret < 0)
+                       err = ret;
+       }
+
+       kfree(cluster);
+
         rc->create_reloc_root = 0;
         smp_mb();
  
@@ -3348,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
  }
  
  static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
-                                u64 objectid, u64 size)
+                                struct btrfs_root *root, u64 objectid)
  {
         struct btrfs_path *path;
         struct btrfs_inode_item *item;
@@ -3368,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
         memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
         btrfs_set_inode_generation(leaf, item, 1);
-       btrfs_set_inode_size(leaf, item, size);
+       btrfs_set_inode_size(leaf, item, 0);
         btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
         btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
         btrfs_mark_buffer_dirty(leaf);
@@ -3404,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
         if (err)
                 goto out;
  
-       err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
-       BUG_ON(err);
-
-       err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
-                                      group->key.offset, 0, group->key.offset,
-                                      0, 0, 0);
+       err = __insert_orphan_inode(trans, root, objectid);
         BUG_ON(err);
  
         key.objectid = objectid;
@@ -3475,14 +3538,15 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
         btrfs_wait_ordered_extents(fs_info->tree_root, 0);
  
         while (1) {
-               mutex_lock(&fs_info->cleaner_mutex);
-               btrfs_clean_old_snapshots(fs_info->tree_root);
-               mutex_unlock(&fs_info->cleaner_mutex);
-
                 rc->extents_found = 0;
                 rc->extents_skipped = 0;
  
+               mutex_lock(&fs_info->cleaner_mutex);
+
+               btrfs_clean_old_snapshots(fs_info->tree_root);
                 ret = relocate_block_group(rc);
+
+               mutex_unlock(&fs_info->cleaner_mutex);
                 if (ret < 0) {
                         err = ret;
                         break;
@@ -3514,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
                 }
         }
  
-       filemap_fdatawrite_range(fs_info->btree_inode->i_mapping,
-                                rc->block_group->key.objectid,
-                                rc->block_group->key.objectid +
-                                rc->block_group->key.offset - 1);
+       filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
+                                    rc->block_group->key.objectid,
+                                    rc->block_group->key.objectid +
+                                    rc->block_group->key.offset - 1);
  
         WARN_ON(rc->block_group->pinned > 0);
         WARN_ON(rc->block_group->reserved > 0);
@@ -3530,6 +3594,26 @@ out:
         return err;
  }
  
+static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
+{
+       struct btrfs_trans_handle *trans;
+       int ret;
+
+       trans = btrfs_start_transaction(root->fs_info->tree_root, 1);
+
+       memset(&root->root_item.drop_progress, 0,
+               sizeof(root->root_item.drop_progress));
+       root->root_item.drop_level = 0;
+       btrfs_set_root_refs(&root->root_item, 0);
+       ret = btrfs_update_root(trans, root->fs_info->tree_root,
+                               &root->root_key, &root->root_item);
+       BUG_ON(ret);
+
+       ret = btrfs_end_transaction(trans, root->fs_info->tree_root);
+       BUG_ON(ret);
+       return 0;
+}
+
  /*
   * recover relocation interrupted by system crash.
   *
@@ -3589,8 +3673,12 @@ int btrfs_recover_relocation(struct btrfs_root *root)
                         fs_root = read_fs_root(root->fs_info,
                                                reloc_root->root_key.offset);
                         if (IS_ERR(fs_root)) {
-                               err = PTR_ERR(fs_root);
-                               goto out;
+                               ret = PTR_ERR(fs_root);
+                               if (ret != -ENOENT) {
+                                       err = ret;
+                                       goto out;
+                               }
+                               mark_garbage_root(reloc_root);
                         }
                 }
  
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c

index 0ddc6d61c55a7135bd94c15c76644011c6e9fd4c..9351428f30e2129343e7c9b882b1fbb26e56c874 100644 (file)
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -94,17 +94,23 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
                 goto out;
  
         BUG_ON(ret == 0);
+       if (path->slots[0] == 0) {
+               ret = 1;
+               goto out;
+       }
         l = path->nodes[0];
-       BUG_ON(path->slots[0] == 0);
         slot = path->slots[0] - 1;
         btrfs_item_key_to_cpu(l, &found_key, slot);
-       if (found_key.objectid != objectid) {
+       if (found_key.objectid != objectid ||
+           found_key.type != BTRFS_ROOT_ITEM_KEY) {
                 ret = 1;
                 goto out;
         }
-       read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
-                          sizeof(*item));
-       memcpy(key, &found_key, sizeof(found_key));
+       if (item)
+               read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
+                                  sizeof(*item));
+       if (key)
+               memcpy(key, &found_key, sizeof(found_key));
         ret = 0;
  out:
         btrfs_free_path(path);
@@ -249,6 +255,59 @@ err:
         return ret;
  }
  
+int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
+{
+       struct extent_buffer *leaf;
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       int err = 0;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = BTRFS_ORPHAN_OBJECTID;
+       key.type = BTRFS_ORPHAN_ITEM_KEY;
+       key.offset = 0;
+
+       while (1) {
+               ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+               if (ret < 0) {
+                       err = ret;
+                       break;
+               }
+
+               leaf = path->nodes[0];
+               if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(tree_root, path);
+                       if (ret < 0)
+                               err = ret;
+                       if (ret != 0)
+                               break;
+                       leaf = path->nodes[0];
+               }
+
+               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+               btrfs_release_path(tree_root, path);
+
+               if (key.objectid != BTRFS_ORPHAN_OBJECTID ||
+                   key.type != BTRFS_ORPHAN_ITEM_KEY)
+                       break;
+
+               ret = btrfs_find_dead_roots(tree_root, key.offset);
+               if (ret) {
+                       err = ret;
+                       break;
+               }
+
+               key.offset++;
+       }
+
+       btrfs_free_path(path);
+       return err;
+}
+
  /* drop the root item for 'key' from 'root' */
  int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                    struct btrfs_key *key)
@@ -278,31 +337,57 @@ out:
         return ret;
  }
  
-#if 0 /* this will get used when snapshot deletion is implemented */
  int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
                        struct btrfs_root *tree_root,
-                      u64 root_id, u8 type, u64 ref_id)
+                      u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
+                      const char *name, int name_len)
+
  {
+       struct btrfs_path *path;
+       struct btrfs_root_ref *ref;
+       struct extent_buffer *leaf;
         struct btrfs_key key;
+       unsigned long ptr;
+       int err = 0;
         int ret;
-       struct btrfs_path *path;
  
         path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
  
         key.objectid = root_id;
-       key.type = type;
+       key.type = BTRFS_ROOT_BACKREF_KEY;
         key.offset = ref_id;
-
+again:
         ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
-       BUG_ON(ret);
-
-       ret = btrfs_del_item(trans, tree_root, path);
-       BUG_ON(ret);
+       BUG_ON(ret < 0);
+       if (ret == 0) {
+               leaf = path->nodes[0];
+               ref = btrfs_item_ptr(leaf, path->slots[0],
+                                    struct btrfs_root_ref);
+
+               WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
+               WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
+               ptr = (unsigned long)(ref + 1);
+               WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
+               *sequence = btrfs_root_ref_sequence(leaf, ref);
+
+               ret = btrfs_del_item(trans, tree_root, path);
+               BUG_ON(ret);
+       } else
+               err = -ENOENT;
+
+       if (key.type == BTRFS_ROOT_BACKREF_KEY) {
+               btrfs_release_path(tree_root, path);
+               key.objectid = ref_id;
+               key.type = BTRFS_ROOT_REF_KEY;
+               key.offset = root_id;
+               goto again;
+       }
  
         btrfs_free_path(path);
-       return ret;
+       return err;
  }
-#endif
  
  int btrfs_find_root_ref(struct btrfs_root *tree_root,
                    struct btrfs_path *path,
@@ -319,7 +404,6 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root,
         return ret;
  }
  
-
  /*
   * add a btrfs_root_ref item.  type is either BTRFS_ROOT_REF_KEY
   * or BTRFS_ROOT_BACKREF_KEY.
@@ -335,8 +419,7 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root,
   */
  int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
                        struct btrfs_root *tree_root,
-                      u64 root_id, u8 type, u64 ref_id,
-                      u64 dirid, u64 sequence,
+                      u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
                        const char *name, int name_len)
  {
         struct btrfs_key key;
@@ -346,13 +429,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
         struct extent_buffer *leaf;
         unsigned long ptr;
  
-
         path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
  
         key.objectid = root_id;
-       key.type = type;
+       key.type = BTRFS_ROOT_BACKREF_KEY;
         key.offset = ref_id;
-
+again:
         ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
                                       sizeof(*ref) + name_len);
         BUG_ON(ret);
@@ -366,6 +450,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
         write_extent_buffer(leaf, name, ptr, name_len);
         btrfs_mark_buffer_dirty(leaf);
  
+       if (key.type == BTRFS_ROOT_BACKREF_KEY) {
+               btrfs_release_path(tree_root, path);
+               key.objectid = ref_id;
+               key.type = BTRFS_ROOT_REF_KEY;
+               key.offset = root_id;
+               goto again;
+       }
+
         btrfs_free_path(path);
-       return ret;
+       return 0;
  }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c

index 2db17cd66fc55d32ba19a77cee7ea54b170efa34..67035385444cf29c31a59951311af66702fa80a8 100644 (file)
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -676,6 +676,7 @@ static int btrfs_unfreeze(struct super_block *sb)
  }
  
  static const struct super_operations btrfs_super_ops = {
+       .drop_inode     = btrfs_drop_inode,
         .delete_inode   = btrfs_delete_inode,
         .put_super      = btrfs_put_super,
         .sync_fs        = btrfs_sync_fs,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index cdbb5022da52df1e2b33593650401021077ba280..88f866f85e7affa4b387cc3cc72fae5a472bb786 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -104,7 +104,6 @@ static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
  {
         if (root->ref_cows && root->last_trans < trans->transid) {
                 WARN_ON(root == root->fs_info->extent_root);
-               WARN_ON(root->root_item.refs == 0);
                 WARN_ON(root->commit_root != root->node);
  
                 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
@@ -720,7 +719,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
  
         key.objectid = objectid;
-       key.offset = 0;
+       /* record when the snapshot was created in key.offset */
+       key.offset = trans->transid;
         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
  
         old = btrfs_lock_root_node(root);
@@ -778,24 +778,14 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
         ret = btrfs_update_inode(trans, parent_root, parent_inode);
         BUG_ON(ret);
  
-       /* add the backref first */
         ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
                                  pending->root_key.objectid,
-                                BTRFS_ROOT_BACKREF_KEY,
                                  parent_root->root_key.objectid,
                                  parent_inode->i_ino, index, pending->name,
                                  namelen);
  
         BUG_ON(ret);
  
-       /* now add the forward ref */
-       ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
-                                parent_root->root_key.objectid,
-                                BTRFS_ROOT_REF_KEY,
-                                pending->root_key.objectid,
-                                parent_inode->i_ino, index, pending->name,
-                                namelen);
-
         inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
         d_instantiate(pending->dentry, inode);
  fail:
@@ -874,7 +864,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
         unsigned long timeout = 1;
         struct btrfs_transaction *cur_trans;
         struct btrfs_transaction *prev_trans = NULL;
-       struct extent_io_tree *pinned_copy;
         DEFINE_WAIT(wait);
         int ret;
         int should_grow = 0;
@@ -915,13 +904,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                 return 0;
         }
  
-       pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS);
-       if (!pinned_copy)
-               return -ENOMEM;
-
-       extent_io_tree_init(pinned_copy,
-                            root->fs_info->btree_inode->i_mapping, GFP_NOFS);
-
         trans->transaction->in_commit = 1;
         trans->transaction->blocked = 1;
         if (cur_trans->list.prev != &root->fs_info->trans_list) {
@@ -1019,6 +1001,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
         ret = commit_cowonly_roots(trans, root);
         BUG_ON(ret);
  
+       btrfs_prepare_extent_commit(trans, root);
+
         cur_trans = root->fs_info->running_transaction;
         spin_lock(&root->fs_info->new_trans_lock);
         root->fs_info->running_transaction = NULL;
@@ -1042,8 +1026,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
         memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
                sizeof(root->fs_info->super_copy));
  
-       btrfs_copy_pinned(root, pinned_copy);
-
         trans->transaction->blocked = 0;
  
         wake_up(&root->fs_info->transaction_wait);
@@ -1059,8 +1041,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
          */
         mutex_unlock(&root->fs_info->tree_log_mutex);
  
-       btrfs_finish_extent_commit(trans, root, pinned_copy);
-       kfree(pinned_copy);
+       btrfs_finish_extent_commit(trans, root);
  
         /* do the directory inserts of any pending snapshot creations */
         finish_pending_snapshots(trans, root->fs_info);
@@ -1096,8 +1077,13 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
  
         while (!list_empty(&list)) {
                 root = list_entry(list.next, struct btrfs_root, root_list);
-               list_del_init(&root->root_list);
-               btrfs_drop_snapshot(root, 0);
+               list_del(&root->root_list);
+
+               if (btrfs_header_backref_rev(root->node) <
+                   BTRFS_MIXED_BACKREF_REV)
+                       btrfs_drop_snapshot(root, 0);
+               else
+                       btrfs_drop_snapshot(root, 1);
         }
         return 0;
  }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index 30c0d45c1b5e6d7bfda1a07748ac8d86b9dd0b7d..7827841b55cbd5399606cd02820055f2ff6eded2 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -263,8 +263,8 @@ static int process_one_buffer(struct btrfs_root *log,
                               struct walk_control *wc, u64 gen)
  {
         if (wc->pin)
-               btrfs_update_pinned_extents(log->fs_info->extent_root,
-                                           eb->start, eb->len, 1);
+               btrfs_pin_extent(log->fs_info->extent_root,
+                                eb->start, eb->len, 0);
  
         if (btrfs_buffer_uptodate(eb, gen)) {
                 if (wc->write)
@@ -534,7 +534,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
         saved_nbytes = inode_get_bytes(inode);
         /* drop any overlapping extents */
         ret = btrfs_drop_extents(trans, root, inode,
-                        start, extent_end, extent_end, start, &alloc_hint);
+                        start, extent_end, extent_end, start, &alloc_hint, 1);
         BUG_ON(ret);
  
         if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -2841,7 +2841,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
                         break;
  
-               if (parent == sb->s_root)
+               if (IS_ROOT(parent))
                         break;
  
                 parent = parent->d_parent;
@@ -2880,6 +2880,12 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                 goto end_no_trans;
         }
  
+       if (root != BTRFS_I(inode)->root ||
+           btrfs_root_refs(&root->root_item) == 0) {
+               ret = 1;
+               goto end_no_trans;
+       }
+
         ret = check_parent_dirs_for_sync(trans, inode, parent,
                                          sb, last_committed);
         if (ret)
@@ -2907,12 +2913,15 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                         break;
  
                 inode = parent->d_inode;
+               if (root != BTRFS_I(inode)->root)
+                       break;
+
                 if (BTRFS_I(inode)->generation >
                     root->fs_info->last_trans_committed) {
                         ret = btrfs_log_inode(trans, root, inode, inode_only);
                         BUG_ON(ret);
                 }
-               if (parent == sb->s_root)
+               if (IS_ROOT(parent))
                         break;
  
                 parent = parent->d_parent;
@@ -2951,7 +2960,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
         struct btrfs_key tmp_key;
         struct btrfs_root *log;
         struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
-       u64 highest_inode;
         struct walk_control wc = {
                 .process_func = process_one_buffer,
                 .stage = 0,
@@ -3010,11 +3018,6 @@ again:
                                                       path);
                         BUG_ON(ret);
                 }
-               ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode);
-               if (ret == 0) {
-                       wc.replay_dest->highest_inode = highest_inode;
-                       wc.replay_dest->last_inode_alloc = highest_inode;
-               }
  
                 key.offset = found_key.offset - 1;
                 wc.replay_dest->log_root = NULL;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 5cf405b0828d8295d1fcd514841dbe6b21afe1c4..23e7d36ff32554eb7e781ab7c8e9b8102fddc235 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -276,7 +276,7 @@ loop_lock:
                  * is now congested.  Back off and let other work structs
                  * run instead
                  */
-               if (pending && bdi_write_congested(bdi) && batch_run > 32 &&
+               if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
                     fs_info->fs_devices->open_devices > 1) {
                         struct io_context *ioc;
  
@@ -719,10 +719,9 @@ error:
   * called very infrequently and that a given device has a small number
   * of extents
   */
-static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
-                                        struct btrfs_device *device,
-                                        u64 num_bytes, u64 *start,
-                                        u64 *max_avail)
+int find_free_dev_extent(struct btrfs_trans_handle *trans,
+                        struct btrfs_device *device, u64 num_bytes,
+                        u64 *start, u64 *max_avail)
  {
         struct btrfs_key key;
         struct btrfs_root *root = device->dev_root;
@@ -1736,6 +1735,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
         extent_root = root->fs_info->extent_root;
         em_tree = &root->fs_info->mapping_tree.map_tree;
  
+       ret = btrfs_can_relocate(extent_root, chunk_offset);
+       if (ret)
+               return -ENOSPC;
+
         /* step one, relocate all the extents inside this chunk */
         ret = btrfs_relocate_block_group(extent_root, chunk_offset);
         BUG_ON(ret);
@@ -1749,9 +1752,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
          * step two, delete the device extents and the
          * chunk tree entries
          */
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
         em = lookup_extent_mapping(em_tree, chunk_offset, 1);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
  
         BUG_ON(em->start > chunk_offset ||
                em->start + em->len < chunk_offset);
@@ -1780,9 +1783,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
         ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
         BUG_ON(ret);
  
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
         remove_extent_mapping(em_tree, em);
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
  
         kfree(map);
         em->bdev = NULL;
@@ -1807,12 +1810,15 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
         struct btrfs_key found_key;
         u64 chunk_tree = chunk_root->root_key.objectid;
         u64 chunk_type;
+       bool retried = false;
+       int failed = 0;
         int ret;
  
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
  
+again:
         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
         key.offset = (u64)-1;
         key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -1842,7 +1848,10 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
                         ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
                                                    found_key.objectid,
                                                    found_key.offset);
-                       BUG_ON(ret);
+                       if (ret == -ENOSPC)
+                               failed++;
+                       else if (ret)
+                               BUG();
                 }
  
                 if (found_key.offset == 0)
@@ -1850,6 +1859,14 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
                 key.offset = found_key.offset - 1;
         }
         ret = 0;
+       if (failed && !retried) {
+               failed = 0;
+               retried = true;
+               goto again;
+       } else if (failed && retried) {
+               WARN_ON(1);
+               ret = -ENOSPC;
+       }
  error:
         btrfs_free_path(path);
         return ret;
@@ -1894,6 +1911,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
                         continue;
  
                 ret = btrfs_shrink_device(device, old_size - size_to_free);
+               if (ret == -ENOSPC)
+                       break;
                 BUG_ON(ret);
  
                 trans = btrfs_start_transaction(dev_root, 1);
@@ -1938,9 +1957,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
                 chunk = btrfs_item_ptr(path->nodes[0],
                                        path->slots[0],
                                        struct btrfs_chunk);
-               key.offset = found_key.offset;
                 /* chunk zero is special */
-               if (key.offset == 0)
+               if (found_key.offset == 0)
                         break;
  
                 btrfs_release_path(chunk_root, path);
@@ -1948,7 +1966,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
                                            chunk_root->root_key.objectid,
                                            found_key.objectid,
                                            found_key.offset);
-               BUG_ON(ret);
+               BUG_ON(ret && ret != -ENOSPC);
+               key.offset = found_key.offset - 1;
         }
         ret = 0;
  error:
@@ -1974,10 +1993,13 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
         u64 chunk_offset;
         int ret;
         int slot;
+       int failed = 0;
+       bool retried = false;
         struct extent_buffer *l;
         struct btrfs_key key;
         struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
         u64 old_total = btrfs_super_total_bytes(super_copy);
+       u64 old_size = device->total_bytes;
         u64 diff = device->total_bytes - new_size;
  
         if (new_size >= device->total_bytes)
@@ -1987,12 +2009,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
         if (!path)
                 return -ENOMEM;
  
-       trans = btrfs_start_transaction(root, 1);
-       if (!trans) {
-               ret = -ENOMEM;
-               goto done;
-       }
-
         path->reada = 2;
  
         lock_chunks(root);
@@ -2001,8 +2017,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
         if (device->writeable)
                 device->fs_devices->total_rw_bytes -= diff;
         unlock_chunks(root);
-       btrfs_end_transaction(trans, root);
  
+again:
         key.objectid = device->devid;
         key.offset = (u64)-1;
         key.type = BTRFS_DEV_EXTENT_KEY;
@@ -2017,6 +2033,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                         goto done;
                 if (ret) {
                         ret = 0;
+                       btrfs_release_path(root, path);
                         break;
                 }
  
@@ -2024,14 +2041,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                 slot = path->slots[0];
                 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
  
-               if (key.objectid != device->devid)
+               if (key.objectid != device->devid) {
+                       btrfs_release_path(root, path);
                         break;
+               }
  
                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
                 length = btrfs_dev_extent_length(l, dev_extent);
  
-               if (key.offset + length <= new_size)
+               if (key.offset + length <= new_size) {
+                       btrfs_release_path(root, path);
                         break;
+               }
  
                 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
                 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -2040,8 +2061,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
  
                 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
                                            chunk_offset);
-               if (ret)
+               if (ret && ret != -ENOSPC)
                         goto done;
+               if (ret == -ENOSPC)
+                       failed++;
+               key.offset -= 1;
+       }
+
+       if (failed && !retried) {
+               failed = 0;
+               retried = true;
+               goto again;
+       } else if (failed && retried) {
+               ret = -ENOSPC;
+               lock_chunks(root);
+
+               device->total_bytes = old_size;
+               if (device->writeable)
+                       device->fs_devices->total_rw_bytes += diff;
+               unlock_chunks(root);
+               goto done;
         }
  
         /* Shrinking succeeded, else we would be at "done". */
@@ -2294,9 +2333,9 @@ again:
         em->block_len = em->len;
  
         em_tree = &extent_root->fs_info->mapping_tree.map_tree;
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
         ret = add_extent_mapping(em_tree, em);
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
         BUG_ON(ret);
         free_extent_map(em);
  
@@ -2491,9 +2530,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
         int readonly = 0;
         int i;
  
-       spin_lock(&map_tree->map_tree.lock);
+       read_lock(&map_tree->map_tree.lock);
         em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
-       spin_unlock(&map_tree->map_tree.lock);
+       read_unlock(&map_tree->map_tree.lock);
         if (!em)
                 return 1;
  
@@ -2518,11 +2557,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
         struct extent_map *em;
  
         while (1) {
-               spin_lock(&tree->map_tree.lock);
+               write_lock(&tree->map_tree.lock);
                 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
                 if (em)
                         remove_extent_mapping(&tree->map_tree, em);
-               spin_unlock(&tree->map_tree.lock);
+               write_unlock(&tree->map_tree.lock);
                 if (!em)
                         break;
                 kfree(em->bdev);
@@ -2540,9 +2579,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
         struct extent_map_tree *em_tree = &map_tree->map_tree;
         int ret;
  
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
         em = lookup_extent_mapping(em_tree, logical, len);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
         BUG_ON(!em);
  
         BUG_ON(em->start > logical || em->start + em->len < logical);
@@ -2604,9 +2643,9 @@ again:
                 atomic_set(&multi->error, 0);
         }
  
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
         em = lookup_extent_mapping(em_tree, logical, *length);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
  
         if (!em && unplug_page)
                 return 0;
@@ -2763,9 +2802,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
         u64 stripe_nr;
         int i, j, nr = 0;
  
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
         em = lookup_extent_mapping(em_tree, chunk_start, 1);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
  
         BUG_ON(!em || em->start != chunk_start);
         map = (struct map_lookup *)em->bdev;
@@ -3053,9 +3092,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
         logical = key->offset;
         length = btrfs_chunk_length(leaf, chunk);
  
-       spin_lock(&map_tree->map_tree.lock);
+       read_lock(&map_tree->map_tree.lock);
         em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
-       spin_unlock(&map_tree->map_tree.lock);
+       read_unlock(&map_tree->map_tree.lock);
  
         /* already mapped? */
         if (em && em->start <= logical && em->start + em->len > logical) {
@@ -3114,9 +3153,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                 map->stripes[i].dev->in_fs_metadata = 1;
         }
  
-       spin_lock(&map_tree->map_tree.lock);
+       write_lock(&map_tree->map_tree.lock);
         ret = add_extent_mapping(&map_tree->map_tree, em);
-       spin_unlock(&map_tree->map_tree.lock);
+       write_unlock(&map_tree->map_tree.lock);
         BUG_ON(ret);
         free_extent_map(em);
  
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h

index 5139a833f721353a32f13ed1c52c3647c0951776..31b0fabdd2ea7da5489a4bffaa065c59e9282119 100644 (file)
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -181,4 +181,7 @@ int btrfs_balance(struct btrfs_root *dev_root);
  void btrfs_unlock_volumes(void);
  void btrfs_lock_volumes(void);
  int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
+int find_free_dev_extent(struct btrfs_trans_handle *trans,
+                        struct btrfs_device *device, u64 num_bytes,
+                        u64 *start, u64 *max_avail);
  #endif
diff --git a/fs/buffer.c b/fs/buffer.c

index 209f7f15f5f801b4023f6e121643b3744ce1b11e..24afd7422ae866851ecfbd12d7e1c231c5bffda5 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2239,16 +2239,10 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
         struct address_space *mapping = inode->i_mapping;
         struct page *page;
         void *fsdata;
-       unsigned long limit;
         int err;
  
-       err = -EFBIG;
-        limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-       if (limit != RLIM_INFINITY && size > (loff_t)limit) {
-               send_sig(SIGXFSZ, current, 0);
-               goto out;
-       }
-       if (size > inode->i_sb->s_maxbytes)
+       err = inode_newsize_ok(inode, size);
+       if (err)
                 goto out;
  
         err = pagecache_write_begin(NULL, mapping, size, 0,
diff --git a/fs/char_dev.c b/fs/char_dev.c

index 3cbc57f932d26a32d8fc8761e3bfe8465ddab9ae..d6db933df2b27ce43c0fe31d6e35bb0968598891 100644 (file)
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -264,7 +264,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
  {
         struct char_device_struct *cd;
         struct cdev *cdev;
-       char *s;
         int err = -ENOMEM;
  
         cd = __register_chrdev_region(major, baseminor, count, name);
@@ -278,8 +277,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
         cdev->owner = fops->owner;
         cdev->ops = fops;
         kobject_set_name(&cdev->kobj, "%s", name);
-       for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/'))
-               *s = '!';
                 
         err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
         if (err)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c

index d79ce2e95c2357a92cf8e252ed27cf7d59557dc5..90c5b39f03135cf0763bdba37701154cc9b40a61 100644 (file)
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -185,8 +185,7 @@ out_mount_failed:
                         cifs_sb->mountdata = NULL;
                 }
  #endif
-               if (cifs_sb->local_nls)
-                       unload_nls(cifs_sb->local_nls);
+               unload_nls(cifs_sb->local_nls);
                 kfree(cifs_sb);
         }
         return rc;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c

index 1f09c7619319d794dd10a3d58a3543aa34e36ca9..5e2492535daa4ca626ec1e45e812ebd194fa7a2b 100644 (file)
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1557,57 +1557,24 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
  
  static int cifs_vmtruncate(struct inode *inode, loff_t offset)
  {
-       struct address_space *mapping = inode->i_mapping;
-       unsigned long limit;
+       loff_t oldsize;
+       int err;
  
         spin_lock(&inode->i_lock);
-       if (inode->i_size < offset)
-               goto do_expand;
-       /*
-        * truncation of in-use swapfiles is disallowed - it would cause
-        * subsequent swapout to scribble on the now-freed blocks.
-        */
-       if (IS_SWAPFILE(inode)) {
-               spin_unlock(&inode->i_lock);
-               goto out_busy;
-       }
-       i_size_write(inode, offset);
-       spin_unlock(&inode->i_lock);
-       /*
-        * unmap_mapping_range is called twice, first simply for efficiency
-        * so that truncate_inode_pages does fewer single-page unmaps. However
-        * after this first call, and before truncate_inode_pages finishes,
-        * it is possible for private pages to be COWed, which remain after
-        * truncate_inode_pages finishes, hence the second unmap_mapping_range
-        * call must be made for correctness.
-        */
-       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       truncate_inode_pages(mapping, offset);
-       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       goto out_truncate;
-
-do_expand:
-       limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-       if (limit != RLIM_INFINITY && offset > limit) {
+       err = inode_newsize_ok(inode, offset);
+       if (err) {
                 spin_unlock(&inode->i_lock);
-               goto out_sig;
-       }
-       if (offset > inode->i_sb->s_maxbytes) {
-               spin_unlock(&inode->i_lock);
-               goto out_big;
+               goto out;
         }
+
+       oldsize = inode->i_size;
         i_size_write(inode, offset);
         spin_unlock(&inode->i_lock);
-out_truncate:
+       truncate_pagecache(inode, oldsize, offset);
         if (inode->i_op->truncate)
                 inode->i_op->truncate(inode);
-       return 0;
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out_big:
-       return -EFBIG;
-out_busy:
-       return -ETXTBSY;
+out:
+       return err;
  }
  
  static int
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h

index 8ccd5ed81d9cae6713e8fda58c25723a61c49c9d..d99860a33890d7ae9a0734b17c5fc4e0263eebdf 100644 (file)
--- a/fs/coda/coda_int.h
+++ b/fs/coda/coda_int.h
@@ -2,6 +2,7 @@
  #define _CODA_INT_
  
  struct dentry;
+struct file;
  
  extern struct file_system_type coda_fs_type;
  extern unsigned long coda_timeout;
diff --git a/fs/compat.c b/fs/compat.c

index 3aa48834a222b87819eb5302a63a507b1b910071..d576b552e8e2eb90a98324e79b7d8917fb96c04c 100644 (file)
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -768,13 +768,13 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
                                  char __user * type, unsigned long flags,
                                  void __user * data)
  {
-       unsigned long type_page;
+       char *kernel_type;
         unsigned long data_page;
-       unsigned long dev_page;
+       char *kernel_dev;
         char *dir_page;
         int retval;
  
-       retval = copy_mount_options (type, &type_page);
+       retval = copy_mount_string(type, &kernel_type);
         if (retval < 0)
                 goto out;
  
@@ -783,38 +783,38 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
         if (IS_ERR(dir_page))
                 goto out1;
  
-       retval = copy_mount_options (dev_name, &dev_page);
+       retval = copy_mount_string(dev_name, &kernel_dev);
         if (retval < 0)
                 goto out2;
  
-       retval = copy_mount_options (data, &data_page);
+       retval = copy_mount_options(data, &data_page);
         if (retval < 0)
                 goto out3;
  
         retval = -EINVAL;
  
-       if (type_page && data_page) {
-               if (!strcmp((char *)type_page, SMBFS_NAME)) {
+       if (kernel_type && data_page) {
+               if (!strcmp(kernel_type, SMBFS_NAME)) {
                         do_smb_super_data_conv((void *)data_page);
-               } else if (!strcmp((char *)type_page, NCPFS_NAME)) {
+               } else if (!strcmp(kernel_type, NCPFS_NAME)) {
                         do_ncp_super_data_conv((void *)data_page);
-               } else if (!strcmp((char *)type_page, NFS4_NAME)) {
+               } else if (!strcmp(kernel_type, NFS4_NAME)) {
                         if (do_nfs4_super_data_conv((void *) data_page))
                                 goto out4;
                 }
         }
  
-       retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
+       retval = do_mount(kernel_dev, dir_page, kernel_type,
                         flags, (void*)data_page);
  
   out4:
         free_page(data_page);
   out3:
-       free_page(dev_page);
+       kfree(kernel_dev);
   out2:
         putname(dir_page);
   out1:
-       free_page(type_page);
+       kfree(kernel_type);
   out:
         return retval;
  }
diff --git a/fs/drop_caches.c b/fs/drop_caches.c

index a2edb79134472170e95d5fe835e7ad6a783e00db..31f4b0e6d72c333bf1633ef51118ad6ccfc31069 100644 (file)
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -63,9 +63,9 @@ static void drop_slab(void)
  }
  
  int drop_caches_sysctl_handler(ctl_table *table, int write,
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
  {
-       proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       proc_dointvec_minmax(table, write, buffer, length, ppos);
         if (write) {
                 if (sysctl_drop_caches & 1)
                         drop_pagecache();
diff --git a/fs/exec.c b/fs/exec.c

index 5c833c18d0d47bef760c9a4053f5188e34781105..d49be6bc1793b57fdbfb5efc958311dd8f94801e 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
  #include <linux/kmod.h>
  #include <linux/fsnotify.h>
  #include <linux/fs_struct.h>
+#include <linux/pipe_fs_i.h>
  
  #include <asm/uaccess.h>
  #include <asm/mmu_context.h>
@@ -63,6 +64,7 @@
  
  int core_uses_pid;
  char core_pattern[CORENAME_MAX_SIZE] = "core";
+unsigned int core_pipe_limit;
  int suid_dumpable = 0;
  
  /* The maximal length of core_pattern is also specified in sysctl.c */
@@ -1393,18 +1395,16 @@ out_ret:
         return retval;
  }
  
-int set_binfmt(struct linux_binfmt *new)
+void set_binfmt(struct linux_binfmt *new)
  {
-       struct linux_binfmt *old = current->binfmt;
+       struct mm_struct *mm = current->mm;
  
-       if (new) {
-               if (!try_module_get(new->module))
-                       return -1;
-       }
-       current->binfmt = new;
-       if (old)
-               module_put(old->module);
-       return 0;
+       if (mm->binfmt)
+               module_put(mm->binfmt->module);
+
+       mm->binfmt = new;
+       if (new)
+               __module_get(new->module);
  }
  
  EXPORT_SYMBOL(set_binfmt);
@@ -1728,6 +1728,29 @@ int get_dumpable(struct mm_struct *mm)
         return (ret >= 2) ? 2 : ret;
  }
  
+static void wait_for_dump_helpers(struct file *file)
+{
+       struct pipe_inode_info *pipe;
+
+       pipe = file->f_path.dentry->d_inode->i_pipe;
+
+       pipe_lock(pipe);
+       pipe->readers++;
+       pipe->writers--;
+
+       while ((pipe->readers > 1) && (!signal_pending(current))) {
+               wake_up_interruptible_sync(&pipe->wait);
+               kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+               pipe_wait(pipe);
+       }
+
+       pipe->readers--;
+       pipe->writers++;
+       pipe_unlock(pipe);
+
+}
+
+
  void do_coredump(long signr, int exit_code, struct pt_regs *regs)
  {
         struct core_state core_state;
@@ -1744,11 +1767,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
         unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
         char **helper_argv = NULL;
         int helper_argc = 0;
-       char *delimit;
+       int dump_count = 0;
+       static atomic_t core_dump_count = ATOMIC_INIT(0);
  
         audit_core_dumps(signr);
  
-       binfmt = current->binfmt;
+       binfmt = mm->binfmt;
         if (!binfmt || !binfmt->core_dump)
                 goto fail;
  
@@ -1799,54 +1823,63 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
         lock_kernel();
         ispipe = format_corename(corename, signr);
         unlock_kernel();
-       /*
-        * Don't bother to check the RLIMIT_CORE value if core_pattern points
-        * to a pipe.  Since we're not writing directly to the filesystem
-        * RLIMIT_CORE doesn't really apply, as no actual core file will be
-        * created unless the pipe reader choses to write out the core file
-        * at which point file size limits and permissions will be imposed
-        * as it does with any other process
-        */
+
         if ((!ispipe) && (core_limit < binfmt->min_coredump))
                 goto fail_unlock;
  
         if (ispipe) {
+               if (core_limit == 0) {
+                       /*
+                        * Normally core limits are irrelevant to pipes, since
+                        * we're not writing to the file system, but we use
+                        * core_limit of 0 here as a speacial value. Any
+                        * non-zero limit gets set to RLIM_INFINITY below, but
+                        * a limit of 0 skips the dump.  This is a consistent
+                        * way to catch recursive crashes.  We can still crash
+                        * if the core_pattern binary sets RLIM_CORE =  !0
+                        * but it runs as root, and can do lots of stupid things
+                        * Note that we use task_tgid_vnr here to grab the pid
+                        * of the process group leader.  That way we get the
+                        * right pid if a thread in a multi-threaded
+                        * core_pattern process dies.
+                        */
+                       printk(KERN_WARNING
+                               "Process %d(%s) has RLIMIT_CORE set to 0\n",
+                               task_tgid_vnr(current), current->comm);
+                       printk(KERN_WARNING "Aborting core\n");
+                       goto fail_unlock;
+               }
+
+               dump_count = atomic_inc_return(&core_dump_count);
+               if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+                       printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
+                              task_tgid_vnr(current), current->comm);
+                       printk(KERN_WARNING "Skipping core dump\n");
+                       goto fail_dropcount;
+               }
+
                 helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
                 if (!helper_argv) {
                         printk(KERN_WARNING "%s failed to allocate memory\n",
                                __func__);
-                       goto fail_unlock;
-               }
-               /* Terminate the string before the first option */
-               delimit = strchr(corename, ' ');
-               if (delimit)
-                       *delimit = '\0';
-               delimit = strrchr(helper_argv[0], '/');
-               if (delimit)
-                       delimit++;
-               else
-                       delimit = helper_argv[0];
-               if (!strcmp(delimit, current->comm)) {
-                       printk(KERN_NOTICE "Recursive core dump detected, "
-                                       "aborting\n");
-                       goto fail_unlock;
+                       goto fail_dropcount;
                 }
  
                 core_limit = RLIM_INFINITY;
  
                 /* SIGPIPE can happen, but it's just never processed */
-               if (call_usermodehelper_pipe(corename+1, helper_argv, NULL,
+               if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
                                 &file)) {
                         printk(KERN_INFO "Core dump to %s pipe failed\n",
                                corename);
-                       goto fail_unlock;
+                       goto fail_dropcount;
                 }
         } else
                 file = filp_open(corename,
                                  O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
                                  0600);
         if (IS_ERR(file))
-               goto fail_unlock;
+               goto fail_dropcount;
         inode = file->f_path.dentry->d_inode;
         if (inode->i_nlink > 1)
                 goto close_fail;        /* multiple links - don't dump */
@@ -1875,7 +1908,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
         if (retval)
                 current->signal->group_exit_code |= 0x80;
  close_fail:
+       if (ispipe && core_pipe_limit)
+               wait_for_dump_helpers(file);
         filp_close(file, NULL);
+fail_dropcount:
+       if (dump_count)
+               atomic_dec(&core_dump_count);
  fail_unlock:
         if (helper_argv)
                 argv_free(helper_argv);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c

index 5ab10c3bbebec0dd1ba2ab81c340b6be415dcafd..9f500dec3b5901982adcc2618670f72ca4b3c01a 100644 (file)
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -214,7 +214,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
         }
  
         lock_super(sb);
-       lock_kernel();
         sbi = sb->s_fs_info;
         fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
         fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
@@ -245,7 +244,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
  out:
         if (or)
                 osd_end_request(or);
-       unlock_kernel();
         unlock_super(sb);
         kfree(fscb);
         return ret;
@@ -268,8 +266,6 @@ static void exofs_put_super(struct super_block *sb)
         int num_pend;
         struct exofs_sb_info *sbi = sb->s_fs_info;
  
-       lock_kernel();
-
         if (sb->s_dirt)
                 exofs_write_super(sb);
  
@@ -286,8 +282,6 @@ static void exofs_put_super(struct super_block *sb)
         osduld_put_device(sbi->s_dev);
         kfree(sb->s_fs_info);
         sb->s_fs_info = NULL;
-
-       unlock_kernel();
  }
  
  /*
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c

index 1c1638f873a4ac6d0d1e92179c76721ac2c55c4a..ade634076d0ab75371a852f0ec3c50d198a9012e 100644 (file)
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -819,6 +819,7 @@ const struct address_space_operations ext2_aops = {
         .writepages             = ext2_writepages,
         .migratepage            = buffer_migrate_page,
         .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
  };
  
  const struct address_space_operations ext2_aops_xip = {
@@ -837,6 +838,7 @@ const struct address_space_operations ext2_nobh_aops = {
         .direct_IO              = ext2_direct_IO,
         .writepages             = ext2_writepages,
         .migratepage            = buffer_migrate_page,
+       .error_remove_page      = generic_error_remove_page,
  };
  
  /*
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c

index cd098a7b77fc04b7255fe5586248faa67dbbfb07..acf1b14233275e891fd5e1d55560fed331add18c 100644 (file)
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1830,6 +1830,7 @@ static const struct address_space_operations ext3_ordered_aops = {
         .direct_IO              = ext3_direct_IO,
         .migratepage            = buffer_migrate_page,
         .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
  };
  
  static const struct address_space_operations ext3_writeback_aops = {
@@ -1845,6 +1846,7 @@ static const struct address_space_operations ext3_writeback_aops = {
         .direct_IO              = ext3_direct_IO,
         .migratepage            = buffer_migrate_page,
         .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
  };
  
  static const struct address_space_operations ext3_journalled_aops = {
@@ -1859,6 +1861,7 @@ static const struct address_space_operations ext3_journalled_aops = {
         .invalidatepage         = ext3_invalidatepage,
         .releasepage            = ext3_releasepage,
         .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
  };
  
  void ext3_set_aops(struct inode *inode)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 3a798737e305756a493e6ad13f865b302f9174a8..064746fad5812e693ef6d3ef2578822a3007cadb 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3386,6 +3386,7 @@ static const struct address_space_operations ext4_ordered_aops = {
         .direct_IO              = ext4_direct_IO,
         .migratepage            = buffer_migrate_page,
         .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
  };
  
  static const struct address_space_operations ext4_writeback_aops = {
@@ -3401,6 +3402,7 @@ static const struct address_space_operations ext4_writeback_aops = {
         .direct_IO              = ext4_direct_IO,
         .migratepage            = buffer_migrate_page,
         .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
  };
  
  static const struct address_space_operations ext4_journalled_aops = {
@@ -3415,6 +3417,7 @@ static const struct address_space_operations ext4_journalled_aops = {
         .invalidatepage         = ext4_invalidatepage,
         .releasepage            = ext4_releasepage,
         .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
  };
  
  static const struct address_space_operations ext4_da_aops = {
@@ -3431,6 +3434,7 @@ static const struct address_space_operations ext4_da_aops = {
         .direct_IO              = ext4_direct_IO,
         .migratepage            = buffer_migrate_page,
         .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
  };
  
  void ext4_set_aops(struct inode *inode)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c

index 8970d8c49bb00eaa791a390f5dfd87b56a438294..04629d1302fc45e42aef40b180cfd1fcc4b0f73b 100644 (file)
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -470,19 +470,11 @@ static void fat_put_super(struct super_block *sb)
  
         iput(sbi->fat_inode);
  
-       if (sbi->nls_disk) {
-               unload_nls(sbi->nls_disk);
-               sbi->nls_disk = NULL;
-               sbi->options.codepage = fat_default_codepage;
-       }
-       if (sbi->nls_io) {
-               unload_nls(sbi->nls_io);
-               sbi->nls_io = NULL;
-       }
-       if (sbi->options.iocharset != fat_default_iocharset) {
+       unload_nls(sbi->nls_disk);
+       unload_nls(sbi->nls_io);
+
+       if (sbi->options.iocharset != fat_default_iocharset)
                 kfree(sbi->options.iocharset);
-               sbi->options.iocharset = fat_default_iocharset;
-       }
  
         sb->s_fs_info = NULL;
         kfree(sbi);
diff --git a/fs/fcntl.c b/fs/fcntl.c

index ae413086db978093123b2a59947d78e23a02a693..fc089f2f7f56ccbbd5662eb63e2ec9832d768004 100644 (file)
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp)
         return pid;
  }
  
+static int f_setown_ex(struct file *filp, unsigned long arg)
+{
+       struct f_owner_ex * __user owner_p = (void * __user)arg;
+       struct f_owner_ex owner;
+       struct pid *pid;
+       int type;
+       int ret;
+
+       ret = copy_from_user(&owner, owner_p, sizeof(owner));
+       if (ret)
+               return ret;
+
+       switch (owner.type) {
+       case F_OWNER_TID:
+               type = PIDTYPE_MAX;
+               break;
+
+       case F_OWNER_PID:
+               type = PIDTYPE_PID;
+               break;
+
+       case F_OWNER_GID:
+               type = PIDTYPE_PGID;
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       rcu_read_lock();
+       pid = find_vpid(owner.pid);
+       if (owner.pid && !pid)
+               ret = -ESRCH;
+       else
+               ret = __f_setown(filp, pid, type, 1);
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static int f_getown_ex(struct file *filp, unsigned long arg)
+{
+       struct f_owner_ex * __user owner_p = (void * __user)arg;
+       struct f_owner_ex owner;
+       int ret = 0;
+
+       read_lock(&filp->f_owner.lock);
+       owner.pid = pid_vnr(filp->f_owner.pid);
+       switch (filp->f_owner.pid_type) {
+       case PIDTYPE_MAX:
+               owner.type = F_OWNER_TID;
+               break;
+
+       case PIDTYPE_PID:
+               owner.type = F_OWNER_PID;
+               break;
+
+       case PIDTYPE_PGID:
+               owner.type = F_OWNER_GID;
+               break;
+
+       default:
+               WARN_ON(1);
+               ret = -EINVAL;
+               break;
+       }
+       read_unlock(&filp->f_owner.lock);
+
+       if (!ret)
+               ret = copy_to_user(owner_p, &owner, sizeof(owner));
+       return ret;
+}
+
  static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
                 struct file *filp)
  {
@@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
         case F_SETOWN:
                 err = f_setown(filp, arg, 1);
                 break;
+       case F_GETOWN_EX:
+               err = f_getown_ex(filp, arg);
+               break;
+       case F_SETOWN_EX:
+               err = f_setown_ex(filp, arg);
+               break;
         case F_GETSIG:
                 err = filp->f_owner.signum;
                 break;
@@ -428,8 +507,7 @@ static inline int sigio_perm(struct task_struct *p,
  
  static void send_sigio_to_task(struct task_struct *p,
                                struct fown_struct *fown,
-                              int fd,
-                              int reason)
+                              int fd, int reason, int group)
  {
         /*
          * F_SETSIG can change ->signum lockless in parallel, make
@@ -461,11 +539,11 @@ static void send_sigio_to_task(struct task_struct *p,
                         else
                                 si.si_band = band_table[reason - POLL_IN];
                         si.si_fd    = fd;
-                       if (!group_send_sig_info(signum, &si, p))
+                       if (!do_send_sig_info(signum, &si, p, group))
                                 break;
                 /* fall-through: fall back on the old plain SIGIO signal */
                 case 0:
-                       group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
+                       do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
         }
  }
  
@@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
         struct task_struct *p;
         enum pid_type type;
         struct pid *pid;
+       int group = 1;
         
         read_lock(&fown->lock);
+
         type = fown->pid_type;
+       if (type == PIDTYPE_MAX) {
+               group = 0;
+               type = PIDTYPE_PID;
+       }
+
         pid = fown->pid;
         if (!pid)
                 goto out_unlock_fown;
         
         read_lock(&tasklist_lock);
         do_each_pid_task(pid, type, p) {
-               send_sigio_to_task(p, fown, fd, band);
+               send_sigio_to_task(p, fown, fd, band, group);
         } while_each_pid_task(pid, type, p);
         read_unlock(&tasklist_lock);
   out_unlock_fown:
@@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
  }
  
  static void send_sigurg_to_task(struct task_struct *p,
-                                struct fown_struct *fown)
+                               struct fown_struct *fown, int group)
  {
         if (sigio_perm(p, fown, SIGURG))
-               group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
+               do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
  }
  
  int send_sigurg(struct fown_struct *fown)
@@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown)
         struct task_struct *p;
         enum pid_type type;
         struct pid *pid;
+       int group = 1;
         int ret = 0;
         
         read_lock(&fown->lock);
+
         type = fown->pid_type;
+       if (type == PIDTYPE_MAX) {
+               group = 0;
+               type = PIDTYPE_PID;
+       }
+
         pid = fown->pid;
         if (!pid)
                 goto out_unlock_fown;
@@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown)
         
         read_lock(&tasklist_lock);
         do_each_pid_task(pid, type, p) {
-               send_sigurg_to_task(p, fown);
+               send_sigurg_to_task(p, fown, group);
         } while_each_pid_task(pid, type, p);
         read_unlock(&tasklist_lock);
   out_unlock_fown:
diff --git a/fs/file_table.c b/fs/file_table.c

index 334ce39881f8fea36897196a262f4164cccde1f8..8eb44042e00934dbe1fe13c2af1fd659ddd8c296 100644 (file)
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(get_max_files);
   * Handle nr_files sysctl
   */
  #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+int proc_nr_files(ctl_table *table, int write,
                       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         files_stat.nr_files = get_nr_files();
-       return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       return proc_dointvec(table, write, buffer, lenp, ppos);
  }
  #else
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+int proc_nr_files(ctl_table *table, int write,
                       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         return -ENOSYS;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c

index e703654e7f40d901975d3a0953ce353a755248d7..992f6c9410bb0f27c8e6b02f21505c1fbda26aa5 100644 (file)
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1276,14 +1276,9 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
                 return 0;
  
         if (attr->ia_valid & ATTR_SIZE) {
-               unsigned long limit;
-               if (IS_SWAPFILE(inode))
-                       return -ETXTBSY;
-               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-               if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
-                       send_sig(SIGXFSZ, current, 0);
-                       return -EFBIG;
-               }
+               err = inode_newsize_ok(inode, attr->ia_size);
+               if (err)
+                       return err;
                 is_truncate = true;
         }
  
@@ -1350,8 +1345,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
          * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
          */
         if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
-               if (outarg.attr.size < oldsize)
-                       fuse_truncate(inode->i_mapping, outarg.attr.size);
+               truncate_pagecache(inode, oldsize, outarg.attr.size);
                 invalidate_inode_pages2(inode->i_mapping);
         }
  
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h

index fc9c79feb5f7c2150edae61b4af2b56d2291f7b9..01cc462ff45d5ccdf0bc0bc0526e493ed60fc729 100644 (file)
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -606,8 +606,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
  void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
                                    u64 attr_valid);
  
-void fuse_truncate(struct address_space *mapping, loff_t offset);
-
  /**
   * Initialize the client device
   */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c

index 6da947daabda1894f55a5079226ade24b30d72e0..1a822ce2b24b7a83fd8c579921d07d769e4c2645 100644 (file)
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -140,14 +140,6 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
         return 0;
  }
  
-void fuse_truncate(struct address_space *mapping, loff_t offset)
-{
-       /* See vmtruncate() */
-       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       truncate_inode_pages(mapping, offset);
-       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-}
-
  void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
                                    u64 attr_valid)
  {
@@ -205,8 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
         spin_unlock(&fc->lock);
  
         if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
-               if (attr->size < oldsize)
-                       fuse_truncate(inode->i_mapping, attr->size);
+               truncate_pagecache(inode, oldsize, attr->size);
                 invalidate_inode_pages2(inode->i_mapping);
         }
  }
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c

index 7ebae9a4ecc01b50533cf0316dc936626a3350b9..694b5d48f0366e1b535abb102cbf456f3ae2e2c6 100644 (file)
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1135,6 +1135,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
         .direct_IO = gfs2_direct_IO,
         .migratepage = buffer_migrate_page,
         .is_partially_uptodate = block_is_partially_uptodate,
+       .error_remove_page = generic_error_remove_page,
  };
  
  static const struct address_space_operations gfs2_ordered_aops = {
@@ -1151,6 +1152,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
         .direct_IO = gfs2_direct_IO,
         .migratepage = buffer_migrate_page,
         .is_partially_uptodate = block_is_partially_uptodate,
+       .error_remove_page = generic_error_remove_page,
  };
  
  static const struct address_space_operations gfs2_jdata_aops = {
@@ -1166,6 +1168,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
         .invalidatepage = gfs2_invalidatepage,
         .releasepage = gfs2_releasepage,
         .is_partially_uptodate = block_is_partially_uptodate,
+       .error_remove_page = generic_error_remove_page,
  };
  
  void gfs2_set_aops(struct inode *inode)
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c

index c3ac180540576a7ba8f59c9ded3bb1d52b0b7a47..247436c10deb853a35f4daf7db90cf18de96099a 100644 (file)
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -12,7 +12,6 @@
  #include <linux/completion.h>
  #include <linux/buffer_head.h>
  #include <linux/namei.h>
-#include <linux/utsname.h>
  #include <linux/mm.h>
  #include <linux/xattr.h>
  #include <linux/posix_acl.h>
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c

index 7b6165f25fbefe14cacb5a8be3464d79f20e84a1..8bbe03c3f6d54e5286ff3c0c0cce4e6c5f653134 100644 (file)
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -344,10 +344,8 @@ void hfs_mdb_put(struct super_block *sb)
         brelse(HFS_SB(sb)->mdb_bh);
         brelse(HFS_SB(sb)->alt_mdb_bh);
  
-       if (HFS_SB(sb)->nls_io)
-               unload_nls(HFS_SB(sb)->nls_io);
-       if (HFS_SB(sb)->nls_disk)
-               unload_nls(HFS_SB(sb)->nls_disk);
+       unload_nls(HFS_SB(sb)->nls_io);
+       unload_nls(HFS_SB(sb)->nls_disk);
  
         free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
         kfree(HFS_SB(sb));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c

index c0759fe0855b623863eac1345c8b8e4bf35608a9..43022f3d514871d9f2405ff32eacbd1319f8e26a 100644 (file)
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -229,8 +229,7 @@ static void hfsplus_put_super(struct super_block *sb)
         iput(HFSPLUS_SB(sb).alloc_file);
         iput(HFSPLUS_SB(sb).hidden_dir);
         brelse(HFSPLUS_SB(sb).s_vhbh);
-       if (HFSPLUS_SB(sb).nls)
-               unload_nls(HFSPLUS_SB(sb).nls);
+       unload_nls(HFSPLUS_SB(sb).nls);
         kfree(sb->s_fs_info);
         sb->s_fs_info = NULL;
  
@@ -464,8 +463,7 @@ out:
  
  cleanup:
         hfsplus_put_super(sb);
-       if (nls)
-               unload_nls(nls);
+       unload_nls(nls);
         return err;
  }
  
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index eba6d552d9c901668d7803c5f3a3569bbaab081b..87a1258953b8e387fe49367acfd6d2ff5c164ca1 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode)
  
  static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
  {
-       struct super_block *sb = inode->i_sb;
-
-       if (!hlist_unhashed(&inode->i_hash)) {
-               if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-                       list_move(&inode->i_list, &inode_unused);
-               inodes_stat.nr_unused++;
-               if (!sb || (sb->s_flags & MS_ACTIVE)) {
-                       spin_unlock(&inode_lock);
-                       return;
-               }
-               inode->i_state |= I_WILL_FREE;
-               spin_unlock(&inode_lock);
-               /*
-                * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
-                * in our backing_dev_info.
-                */
-               write_inode_now(inode, 1);
-               spin_lock(&inode_lock);
-               inode->i_state &= ~I_WILL_FREE;
-               inodes_stat.nr_unused--;
-               hlist_del_init(&inode->i_hash);
+       if (generic_detach_inode(inode)) {
+               truncate_hugepages(inode, 0);
+               clear_inode(inode);
+               destroy_inode(inode);
         }
-       list_del_init(&inode->i_list);
-       list_del_init(&inode->i_sb_list);
-       inode->i_state |= I_FREEING;
-       inodes_stat.nr_inodes--;
-       spin_unlock(&inode_lock);
-       truncate_hugepages(inode, 0);
-       clear_inode(inode);
-       destroy_inode(inode);
  }
  
  static void hugetlbfs_drop_inode(struct inode *inode)
@@ -936,15 +911,9 @@ static struct file_system_type hugetlbfs_fs_type = {
  
  static struct vfsmount *hugetlbfs_vfsmount;
  
-static int can_do_hugetlb_shm(int creat_flags)
+static int can_do_hugetlb_shm(void)
  {
-       if (creat_flags != HUGETLB_SHMFS_INODE)
-               return 0;
-       if (capable(CAP_IPC_LOCK))
-               return 1;
-       if (in_group_p(sysctl_hugetlb_shm_group))
-               return 1;
-       return 0;
+       return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
  }
  
  struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
@@ -960,7 +929,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
         if (!hugetlbfs_vfsmount)
                 return ERR_PTR(-ENOENT);
  
-       if (!can_do_hugetlb_shm(creat_flags)) {
+       if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
                 *user = current_user();
                 if (user_shm_lock(size, *user)) {
                         WARN_ONCE(1,
diff --git a/fs/inode.c b/fs/inode.c

index 76582b06ab975d76afcba7f5800d6c8e3873ebc6..4d8e3be55976272732f6f42610ab7813f66b8133 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
  }
  EXPORT_SYMBOL(generic_delete_inode);
  
-static void generic_forget_inode(struct inode *inode)
+/**
+ *     generic_detach_inode - remove inode from inode lists
+ *     @inode: inode to remove
+ *
+ *     Remove inode from inode lists, write it if it's dirty. This is just an
+ *     internal VFS helper exported for hugetlbfs. Do not use!
+ *
+ *     Returns 1 if inode should be completely destroyed.
+ */
+int generic_detach_inode(struct inode *inode)
  {
         struct super_block *sb = inode->i_sb;
  
@@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
                 inodes_stat.nr_unused++;
                 if (sb->s_flags & MS_ACTIVE) {
                         spin_unlock(&inode_lock);
-                       return;
+                       return 0;
                 }
                 WARN_ON(inode->i_state & I_NEW);
                 inode->i_state |= I_WILL_FREE;
@@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
         inode->i_state |= I_FREEING;
         inodes_stat.nr_inodes--;
         spin_unlock(&inode_lock);
+       return 1;
+}
+EXPORT_SYMBOL_GPL(generic_detach_inode);
+
+static void generic_forget_inode(struct inode *inode)
+{
+       if (!generic_detach_inode(inode))
+               return;
         if (inode->i_data.nrpages)
                 truncate_inode_pages(&inode->i_data, 0);
         clear_inode(inode);
@@ -1399,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
         struct inode *inode = dentry->d_inode;
         struct timespec now;
  
-       if (mnt_want_write(mnt))
-               return;
         if (inode->i_flags & S_NOATIME)
-               goto out;
+               return;
         if (IS_NOATIME(inode))
-               goto out;
+               return;
         if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
-               goto out;
+               return;
  
         if (mnt->mnt_flags & MNT_NOATIME)
-               goto out;
+               return;
         if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
-               goto out;
+               return;
  
         now = current_fs_time(inode->i_sb);
  
         if (!relatime_need_update(mnt, inode, now))
-               goto out;
+               return;
  
         if (timespec_equal(&inode->i_atime, &now))
-               goto out;
+               return;
+
+       if (mnt_want_write(mnt))
+               return;
  
         inode->i_atime = now;
         mark_inode_dirty_sync(inode);
-out:
         mnt_drop_write(mnt);
  }
  EXPORT_SYMBOL(touch_atime);
@@ -1444,34 +1461,37 @@ void file_update_time(struct file *file)
  {
         struct inode *inode = file->f_path.dentry->d_inode;
         struct timespec now;
-       int sync_it = 0;
-       int err;
+       enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
  
+       /* First try to exhaust all avenues to not sync */
         if (IS_NOCMTIME(inode))
                 return;
  
-       err = mnt_want_write_file(file);
-       if (err)
-               return;
-
         now = current_fs_time(inode->i_sb);
-       if (!timespec_equal(&inode->i_mtime, &now)) {
-               inode->i_mtime = now;
-               sync_it = 1;
-       }
+       if (!timespec_equal(&inode->i_mtime, &now))
+               sync_it = S_MTIME;
  
-       if (!timespec_equal(&inode->i_ctime, &now)) {
-               inode->i_ctime = now;
-               sync_it = 1;
-       }
+       if (!timespec_equal(&inode->i_ctime, &now))
+               sync_it |= S_CTIME;
  
-       if (IS_I_VERSION(inode)) {
-               inode_inc_iversion(inode);
-               sync_it = 1;
-       }
+       if (IS_I_VERSION(inode))
+               sync_it |= S_VERSION;
+
+       if (!sync_it)
+               return;
  
-       if (sync_it)
-               mark_inode_dirty_sync(inode);
+       /* Finally allowed to write? Takes lock. */
+       if (mnt_want_write_file(file))
+               return;
+
+       /* Only change inode inside the lock region */
+       if (sync_it & S_VERSION)
+               inode_inc_iversion(inode);
+       if (sync_it & S_CTIME)
+               inode->i_ctime = now;
+       if (sync_it & S_MTIME)
+               inode->i_mtime = now;
+       mark_inode_dirty_sync(inode);
         mnt_drop_write(file->f_path.mnt);
  }
  EXPORT_SYMBOL(file_update_time);
@@ -1599,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
         else if (S_ISSOCK(mode))
                 inode->i_fop = &bad_sock_fops;
         else
-               printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
-                      mode);
+               printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
+                                 " inode %s:%lu\n", mode, inode->i_sb->s_id,
+                                 inode->i_ino);
  }
  EXPORT_SYMBOL(init_special_inode);
diff --git a/fs/internal.h b/fs/internal.h

index d55ef562f0bb588939d3b870d675a94e163a5a1e..515175b8b72e95f47893d3b1f32017da481e0aec 100644 (file)
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -57,6 +57,7 @@ extern int check_unsafe_exec(struct linux_binprm *);
   * namespace.c
   */
  extern int copy_mount_options(const void __user *, unsigned long *);
+extern int copy_mount_string(const void __user *, char **);
  
  extern void free_vfsmnt(struct vfsmount *);
  extern struct vfsmount *alloc_vfsmnt(const char *);
diff --git a/fs/ioctl.c b/fs/ioctl.c

index 5612880fcbe7d7436f3579c7c1add7360170e407..7b17a14396ff792152ae6da49d178355d5a5e7ea 100644 (file)
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -162,20 +162,21 @@ EXPORT_SYMBOL(fiemap_check_flags);
  static int fiemap_check_ranges(struct super_block *sb,
                                u64 start, u64 len, u64 *new_len)
  {
+       u64 maxbytes = (u64) sb->s_maxbytes;
+
         *new_len = len;
  
         if (len == 0)
                 return -EINVAL;
  
-       if (start > sb->s_maxbytes)
+       if (start > maxbytes)
                 return -EFBIG;
  
         /*
          * Shrink request scope to what the fs can actually handle.
          */
-       if ((len > sb->s_maxbytes) ||
-           (sb->s_maxbytes - len) < start)
-               *new_len = sb->s_maxbytes - start;
+       if (len > maxbytes || (maxbytes - len) < start)
+               *new_len = maxbytes - start;
  
         return 0;
  }
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c

index 85f96bc651c727ba04915138c50b45e9b63a3acd..6b4dcd4f2943e632c9d89115a2395084be95adc5 100644 (file)
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -46,10 +46,7 @@ static void isofs_put_super(struct super_block *sb)
  #ifdef CONFIG_JOLIET
         lock_kernel();
  
-       if (sbi->s_nls_iocharset) {
-               unload_nls(sbi->s_nls_iocharset);
-               sbi->s_nls_iocharset = NULL;
-       }
+       unload_nls(sbi->s_nls_iocharset);
  
         unlock_kernel();
  #endif
@@ -912,8 +909,7 @@ out_no_root:
                 printk(KERN_WARNING "%s: get root inode failed\n", __func__);
  out_no_inode:
  #ifdef CONFIG_JOLIET
-       if (sbi->s_nls_iocharset)
-               unload_nls(sbi->s_nls_iocharset);
+       unload_nls(sbi->s_nls_iocharset);
  #endif
         goto out_freesbi;
  out_no_read:
diff --git a/fs/jfs/super.c b/fs/jfs/super.c

index 37e6dcda8fc84f587508f8db58abdecee69524e1..2234c73fc5773531bd59ee81b3bbdf45d05a83ff 100644 (file)
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -178,13 +178,11 @@ static void jfs_put_super(struct super_block *sb)
         rc = jfs_umount(sb);
         if (rc)
                 jfs_err("jfs_umount failed with return code %d", rc);
-       if (sbi->nls_tab)
-               unload_nls(sbi->nls_tab);
-       sbi->nls_tab = NULL;
+
+       unload_nls(sbi->nls_tab);
  
         truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
         iput(sbi->direct_inode);
-       sbi->direct_inode = NULL;
  
         kfree(sbi);
  
@@ -347,8 +345,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
  
         if (nls_map != (void *) -1) {
                 /* Discard old (if remount) */
-               if (sbi->nls_tab)
-                       unload_nls(sbi->nls_tab);
+               unload_nls(sbi->nls_tab);
                 sbi->nls_tab = nls_map;
         }
         return 1;
diff --git a/fs/libfs.c b/fs/libfs.c

index dcec3d3ea64f944cd51d36f35b4150279c5659f2..219576c52d807e15b779d53be3a42dfa1baae9ae 100644 (file)
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -527,14 +527,18 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
                                 const void *from, size_t available)
  {
         loff_t pos = *ppos;
+       size_t ret;
+
         if (pos < 0)
                 return -EINVAL;
-       if (pos >= available)
+       if (pos >= available || !count)
                 return 0;
         if (count > available - pos)
                 count = available - pos;
-       if (copy_to_user(to, from + pos, count))
+       ret = copy_to_user(to, from + pos, count);
+       if (ret == count)
                 return -EFAULT;
+       count -= ret;
         *ppos = pos + count;
         return count;
  }
@@ -735,10 +739,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
         if (copy_from_user(attr->set_buf, buf, size))
                 goto out;
  
-       ret = len; /* claim we got the whole input */
         attr->set_buf[size] = '\0';
         val = simple_strtol(attr->set_buf, NULL, 0);
-       attr->set(attr->data, val);
+       ret = attr->set(attr->data, val);
+       if (ret == 0)
+               ret = len; /* on success, claim we got the whole input */
  out:
         mutex_unlock(&attr->mutex);
         return ret;
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c

index 0336f2beacdee7792bf4155be5d279bfd30ca756..b583ab0a4cbb27d33c3eff01ae59752c17fbfa12 100644 (file)
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -8,7 +8,6 @@
  
  #include <linux/types.h>
  #include <linux/sched.h>
-#include <linux/utsname.h>
  #include <linux/nfs.h>
  
  #include <linux/sunrpc/xdr.h>
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c

index e1d52865319266fa0a96cd09e3d00aeceb7a8561..ad9dbbc9145d463e306a3de7fc2220e9ec9ff866 100644 (file)
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -9,7 +9,6 @@
  
  #include <linux/types.h>
  #include <linux/sched.h>
-#include <linux/utsname.h>
  #include <linux/nfs.h>
  
  #include <linux/sunrpc/xdr.h>
diff --git a/fs/namespace.c b/fs/namespace.c

index 7230787d18b02979122218429b4bf8af59b24cf7..bdc3cb4fd2220c6fde0f35159a902768a75d60f6 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1640,7 +1640,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
  {
         struct vfsmount *mnt;
  
-       if (!type || !memchr(type, 0, PAGE_SIZE))
+       if (!type)
                 return -EINVAL;
  
         /* we need capabilities... */
@@ -1871,6 +1871,23 @@ int copy_mount_options(const void __user * data, unsigned long *where)
         return 0;
  }
  
+int copy_mount_string(const void __user *data, char **where)
+{
+       char *tmp;
+
+       if (!data) {
+               *where = NULL;
+               return 0;
+       }
+
+       tmp = strndup_user(data, PAGE_SIZE);
+       if (IS_ERR(tmp))
+               return PTR_ERR(tmp);
+
+       *where = tmp;
+       return 0;
+}
+
  /*
   * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
   * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
@@ -1900,8 +1917,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
  
         if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
                 return -EINVAL;
-       if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
-               return -EINVAL;
  
         if (data_page)
                 ((char *)data_page)[PAGE_SIZE - 1] = 0;
@@ -2070,40 +2085,42 @@ EXPORT_SYMBOL(create_mnt_ns);
  SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
                 char __user *, type, unsigned long, flags, void __user *, data)
  {
-       int retval;
+       int ret;
+       char *kernel_type;
+       char *kernel_dir;
+       char *kernel_dev;
         unsigned long data_page;
-       unsigned long type_page;
-       unsigned long dev_page;
-       char *dir_page;
  
-       retval = copy_mount_options(type, &type_page);
-       if (retval < 0)
-               return retval;
+       ret = copy_mount_string(type, &kernel_type);
+       if (ret < 0)
+               goto out_type;
  
-       dir_page = getname(dir_name);
-       retval = PTR_ERR(dir_page);
-       if (IS_ERR(dir_page))
-               goto out1;
+       kernel_dir = getname(dir_name);
+       if (IS_ERR(kernel_dir)) {
+               ret = PTR_ERR(kernel_dir);
+               goto out_dir;
+       }
  
-       retval = copy_mount_options(dev_name, &dev_page);
-       if (retval < 0)
-               goto out2;
+       ret = copy_mount_string(dev_name, &kernel_dev);
+       if (ret < 0)
+               goto out_dev;
  
-       retval = copy_mount_options(data, &data_page);
-       if (retval < 0)
-               goto out3;
+       ret = copy_mount_options(data, &data_page);
+       if (ret < 0)
+               goto out_data;
  
-       retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
-                         flags, (void *)data_page);
-       free_page(data_page);
+       ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
+               (void *) data_page);
  
-out3:
-       free_page(dev_page);
-out2:
-       putname(dir_page);
-out1:
-       free_page(type_page);
-       return retval;
+       free_page(data_page);
+out_data:
+       kfree(kernel_dev);
+out_dev:
+       putname(kernel_dir);
+out_dir:
+       kfree(kernel_type);
+out_type:
+       return ret;
  }
  
  /*
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c

index b99ce205b1bd7d6715b81ca7d87452d0011ecb2d..cf98da1be23e861dbbeedc76849969f0fa454aff 100644 (file)
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -746,16 +746,8 @@ static void ncp_put_super(struct super_block *sb)
  
  #ifdef CONFIG_NCPFS_NLS
         /* unload the NLS charsets */
-       if (server->nls_vol)
-       {
-               unload_nls(server->nls_vol);
-               server->nls_vol = NULL;
-       }
-       if (server->nls_io)
-       {
-               unload_nls(server->nls_io);
-               server->nls_io = NULL;
-       }
+       unload_nls(server->nls_vol);
+       unload_nls(server->nls_io);
  #endif /* CONFIG_NCPFS_NLS */
  
         if (server->info_filp)
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c

index 53a7ed7eb9c66da4b69090789afae1e3c5772e7b..0d58caf4a6e1414e51ecd55b09dc11ebbb11beec 100644 (file)
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -223,10 +223,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
         oldset_io = server->nls_io;
         server->nls_io = iocharset;
  
-       if (oldset_cp)
-               unload_nls(oldset_cp);
-       if (oldset_io)
-               unload_nls(oldset_io);
+       unload_nls(oldset_cp);
+       unload_nls(oldset_io);
  
         return 0;
  }
diff --git a/fs/nfs/client.c b/fs/nfs/client.c

index 152025358dad53ce354b662163f1a9cee0205393..63976c0ccc2539d72155d11613fbaa9322358afb 100644 (file)
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -648,8 +648,6 @@ static int nfs_start_lockd(struct nfs_server *server)
                 .hostname       = clp->cl_hostname,
                 .address        = (struct sockaddr *)&clp->cl_addr,
                 .addrlen        = clp->cl_addrlen,
-               .protocol       = server->flags & NFS_MOUNT_TCP ?
-                                               IPPROTO_TCP : IPPROTO_UDP,
                 .nfs_version    = clp->rpc_ops->version,
                 .noresvport     = server->flags & NFS_MOUNT_NORESVPORT ?
                                         1 : 0,
@@ -660,6 +658,14 @@ static int nfs_start_lockd(struct nfs_server *server)
         if (server->flags & NFS_MOUNT_NONLM)
                 return 0;
  
+       switch (clp->cl_proto) {
+               default:
+                       nlm_init.protocol = IPPROTO_TCP;
+                       break;
+               case XPRT_TRANSPORT_UDP:
+                       nlm_init.protocol = IPPROTO_UDP;
+       }
+
         host = nlmclnt_init(&nlm_init);
         if (IS_ERR(host))
                 return PTR_ERR(host);
@@ -787,7 +793,7 @@ static int nfs_init_server(struct nfs_server *server,
         dprintk("--> nfs_init_server()\n");
  
  #ifdef CONFIG_NFS_V3
-       if (data->flags & NFS_MOUNT_VER3)
+       if (data->version == 3)
                 cl_init.rpc_ops = &nfs_v3_clientops;
  #endif
  
@@ -964,6 +970,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
         target->acdirmin = source->acdirmin;
         target->acdirmax = source->acdirmax;
         target->caps = source->caps;
+       target->options = source->options;
  }
  
  /*
diff --git a/fs/nfs/file.c b/fs/nfs/file.c

index 5021b75d2d1e65910177128b719efb20ec098077..86d6b4db1096ea47529c472b819ec7bdb483672c 100644 (file)
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -525,6 +525,7 @@ const struct address_space_operations nfs_file_aops = {
         .direct_IO = nfs_direct_IO,
         .migratepage = nfs_migrate_page,
         .launder_page = nfs_launder_page,
+       .error_remove_page = generic_error_remove_page,
  };
  
  /*
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c

index 379be678cb7e00da65cc7327614cdd580136f7aa..70fad69eb9593a41894102164e26db60eb13fa56 100644 (file)
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -58,17 +58,34 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp)
  /*
   * Get the cache cookie for an NFS superblock.  We have to handle
   * uniquification here because the cache doesn't do it for us.
+ *
+ * The default uniquifier is just an empty string, but it may be overridden
+ * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
+ * superblock across an automount point of some nature.
   */
-void nfs_fscache_get_super_cookie(struct super_block *sb,
-                                 struct nfs_parsed_mount_data *data)
+void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq,
+                                 struct nfs_clone_mount *mntdata)
  {
         struct nfs_fscache_key *key, *xkey;
         struct nfs_server *nfss = NFS_SB(sb);
         struct rb_node **p, *parent;
-       const char *uniq = data->fscache_uniq ?: "";
         int diff, ulen;
  
-       ulen = strlen(uniq);
+       if (uniq) {
+               ulen = strlen(uniq);
+       } else if (mntdata) {
+               struct nfs_server *mnt_s = NFS_SB(mntdata->sb);
+               if (mnt_s->fscache_key) {
+                       uniq = mnt_s->fscache_key->key.uniquifier;
+                       ulen = mnt_s->fscache_key->key.uniq_len;
+               }
+       }
+
+       if (!uniq) {
+               uniq = "";
+               ulen = 1;
+       }
+
         key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL);
         if (!key)
                 return;
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h

index 6e809bb0ff0860cb994bcdc557ae13b26a06a426..b9c572d0679f8ced0aead467778d56229de9a03b 100644 (file)
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -74,7 +74,8 @@ extern void nfs_fscache_get_client_cookie(struct nfs_client *);
  extern void nfs_fscache_release_client_cookie(struct nfs_client *);
  
  extern void nfs_fscache_get_super_cookie(struct super_block *,
-                                        struct nfs_parsed_mount_data *);
+                                        const char *,
+                                        struct nfs_clone_mount *);
  extern void nfs_fscache_release_super_cookie(struct super_block *);
  
  extern void nfs_fscache_init_inode_cookie(struct inode *);
@@ -173,7 +174,8 @@ static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
  
  static inline void nfs_fscache_get_super_cookie(
         struct super_block *sb,
-       struct nfs_parsed_mount_data *data)
+       const char *uniq,
+       struct nfs_clone_mount *mntdata)
  {
  }
  static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c

index 060022b4651c38744c533413ed355f161bd05aad..faa091865ad05c956114ab7c7642994845717382 100644 (file)
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -458,49 +458,21 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
   */
  static int nfs_vmtruncate(struct inode * inode, loff_t offset)
  {
-       if (i_size_read(inode) < offset) {
-               unsigned long limit;
-
-               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-               if (limit != RLIM_INFINITY && offset > limit)
-                       goto out_sig;
-               if (offset > inode->i_sb->s_maxbytes)
-                       goto out_big;
-               spin_lock(&inode->i_lock);
-               i_size_write(inode, offset);
-               spin_unlock(&inode->i_lock);
-       } else {
-               struct address_space *mapping = inode->i_mapping;
+       loff_t oldsize;
+       int err;
  
-               /*
-                * truncation of in-use swapfiles is disallowed - it would
-                * cause subsequent swapout to scribble on the now-freed
-                * blocks.
-                */
-               if (IS_SWAPFILE(inode))
-                       return -ETXTBSY;
-               spin_lock(&inode->i_lock);
-               i_size_write(inode, offset);
-               spin_unlock(&inode->i_lock);
+       err = inode_newsize_ok(inode, offset);
+       if (err)
+               goto out;
  
-               /*
-                * unmap_mapping_range is called twice, first simply for
-                * efficiency so that truncate_inode_pages does fewer
-                * single-page unmaps.  However after this first call, and
-                * before truncate_inode_pages finishes, it is possible for
-                * private pages to be COWed, which remain after
-                * truncate_inode_pages finishes, hence the second
-                * unmap_mapping_range call must be made for correctness.
-                */
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-               truncate_inode_pages(mapping, offset);
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       }
-       return 0;
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out_big:
-       return -EFBIG;
+       spin_lock(&inode->i_lock);
+       oldsize = inode->i_size;
+       i_size_write(inode, offset);
+       spin_unlock(&inode->i_lock);
+
+       truncate_pagecache(inode, oldsize, offset);
+out:
+       return err;
  }
  
  /**
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c

index c862c9340f9a4598d3391c44d557187ec5b06871..5e078b222b4e976a63ad37904a5b594134051f81 100644 (file)
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -13,7 +13,6 @@
  #include <linux/time.h>
  #include <linux/mm.h>
  #include <linux/slab.h>
-#include <linux/utsname.h>
  #include <linux/errno.h>
  #include <linux/string.h>
  #include <linux/in.h>
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c

index ee6a13f0544325ed8bf07e663a12165a7bd7e60f..3f8881d1a0504253de64420a9a25f7d7308d73be 100644 (file)
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -7,7 +7,6 @@
   */
  
  #include <linux/mm.h>
-#include <linux/utsname.h>
  #include <linux/errno.h>
  #include <linux/string.h>
  #include <linux/sunrpc/clnt.h>
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c

index 35869a4921f19e8c3ad3a273c28defabbf16283a..5fe5492fbd29e51fab41de8930b29f6863e76747 100644 (file)
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -10,7 +10,6 @@
  #include <linux/time.h>
  #include <linux/mm.h>
  #include <linux/slab.h>
-#include <linux/utsname.h>
  #include <linux/errno.h>
  #include <linux/string.h>
  #include <linux/in.h>
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

index be6544aef41ff0cfdfe6d41791811cd871beb451..ed7c269e25143175a21156a4c5ce784e3efea4c1 100644 (file)
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -36,7 +36,6 @@
   */
  
  #include <linux/mm.h>
-#include <linux/utsname.h>
  #include <linux/delay.h>
  #include <linux/errno.h>
  #include <linux/string.h>
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c

index cfc30d362f94e5ab60c4724ce97a2cf9f6d75626..83ad47cbdd8ad5f584849495989229be9ac47bc4 100644 (file)
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -39,7 +39,6 @@
  #include <linux/time.h>
  #include <linux/mm.h>
  #include <linux/slab.h>
-#include <linux/utsname.h>
  #include <linux/errno.h>
  #include <linux/string.h>
  #include <linux/in.h>
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c

index 7be72d90d49de3191899c8be4626d7cfc385d148..ef583854d8d03c77864c7d68d7cb3af3c88cfa47 100644 (file)
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -32,7 +32,6 @@
  #include <linux/slab.h>
  #include <linux/time.h>
  #include <linux/mm.h>
-#include <linux/utsname.h>
  #include <linux/errno.h>
  #include <linux/string.h>
  #include <linux/in.h>
diff --git a/fs/nfs/super.c b/fs/nfs/super.c

index f1cc0587cfef3f2a02fe8c22b5226b014e0321a2..810770f96816ff17b3e2b1f70afc6cf528b7e61c 100644 (file)
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -728,6 +728,27 @@ static void nfs_umount_begin(struct super_block *sb)
         unlock_kernel();
  }
  
+static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(int flags)
+{
+       struct nfs_parsed_mount_data *data;
+
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (data) {
+               data->flags             = flags;
+               data->rsize             = NFS_MAX_FILE_IO_SIZE;
+               data->wsize             = NFS_MAX_FILE_IO_SIZE;
+               data->acregmin          = NFS_DEF_ACREGMIN;
+               data->acregmax          = NFS_DEF_ACREGMAX;
+               data->acdirmin          = NFS_DEF_ACDIRMIN;
+               data->acdirmax          = NFS_DEF_ACDIRMAX;
+               data->nfs_server.port   = NFS_UNSPEC_PORT;
+               data->auth_flavors[0]   = RPC_AUTH_UNIX;
+               data->auth_flavor_len   = 1;
+               data->minorversion      = 0;
+       }
+       return data;
+}
+
  /*
   * Sanity-check a server address provided by the mount command.
   *
@@ -1430,10 +1451,13 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
         int status;
  
         if (args->mount_server.version == 0) {
-               if (args->flags & NFS_MOUNT_VER3)
-                       args->mount_server.version = NFS_MNT3_VERSION;
-               else
-                       args->mount_server.version = NFS_MNT_VERSION;
+               switch (args->version) {
+                       default:
+                               args->mount_server.version = NFS_MNT3_VERSION;
+                               break;
+                       case 2:
+                               args->mount_server.version = NFS_MNT_VERSION;
+               }
         }
         request.version = args->mount_server.version;
  
@@ -1634,20 +1658,6 @@ static int nfs_validate_mount_data(void *options,
         if (data == NULL)
                 goto out_no_data;
  
-       args->flags             = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP);
-       args->rsize             = NFS_MAX_FILE_IO_SIZE;
-       args->wsize             = NFS_MAX_FILE_IO_SIZE;
-       args->acregmin          = NFS_DEF_ACREGMIN;
-       args->acregmax          = NFS_DEF_ACREGMAX;
-       args->acdirmin          = NFS_DEF_ACDIRMIN;
-       args->acdirmax          = NFS_DEF_ACDIRMAX;
-       args->mount_server.port = NFS_UNSPEC_PORT;
-       args->nfs_server.port   = NFS_UNSPEC_PORT;
-       args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
-       args->auth_flavors[0]   = RPC_AUTH_UNIX;
-       args->auth_flavor_len   = 1;
-       args->minorversion      = 0;
-
         switch (data->version) {
         case 1:
                 data->namlen = 0;
@@ -1778,7 +1788,7 @@ static int nfs_validate_mount_data(void *options,
         }
  
  #ifndef CONFIG_NFS_V3
-       if (args->flags & NFS_MOUNT_VER3)
+       if (args->version == 3)
                 goto out_v3_not_compiled;
  #endif /* !CONFIG_NFS_V3 */
  
@@ -1936,7 +1946,7 @@ static void nfs_fill_super(struct super_block *sb,
         if (data->bsize)
                 sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
  
-       if (server->flags & NFS_MOUNT_VER3) {
+       if (server->nfs_client->rpc_ops->version == 3) {
                 /* The VFS shouldn't apply the umask to mode bits. We will do
                  * so ourselves when necessary.
                  */
@@ -1960,7 +1970,7 @@ static void nfs_clone_super(struct super_block *sb,
         sb->s_blocksize = old_sb->s_blocksize;
         sb->s_maxbytes = old_sb->s_maxbytes;
  
-       if (server->flags & NFS_MOUNT_VER3) {
+       if (server->nfs_client->rpc_ops->version == 3) {
                 /* The VFS shouldn't apply the umask to mode bits. We will do
                  * so ourselves when necessary.
                  */
@@ -2094,7 +2104,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
         };
         int error = -ENOMEM;
  
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       data = nfs_alloc_parsed_mount_data(NFS_MOUNT_VER3 | NFS_MOUNT_TCP);
         mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
         if (data == NULL || mntfh == NULL)
                 goto out_free_fh;
@@ -2144,7 +2154,8 @@ static int nfs_get_sb(struct file_system_type *fs_type,
         if (!s->s_root) {
                 /* initial superblock/root creation */
                 nfs_fill_super(s, data);
-               nfs_fscache_get_super_cookie(s, data);
+               nfs_fscache_get_super_cookie(
+                       s, data ? data->fscache_uniq : NULL, NULL);
         }
  
         mntroot = nfs_get_root(s, mntfh);
@@ -2245,6 +2256,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
         if (!s->s_root) {
                 /* initial superblock/root creation */
                 nfs_clone_super(s, data->sb);
+               nfs_fscache_get_super_cookie(s, NULL, data);
         }
  
         mntroot = nfs_get_root(s, data->fh);
@@ -2362,18 +2374,7 @@ static int nfs4_validate_mount_data(void *options,
         if (data == NULL)
                 goto out_no_data;
  
-       args->rsize             = NFS_MAX_FILE_IO_SIZE;
-       args->wsize             = NFS_MAX_FILE_IO_SIZE;
-       args->acregmin          = NFS_DEF_ACREGMIN;
-       args->acregmax          = NFS_DEF_ACREGMAX;
-       args->acdirmin          = NFS_DEF_ACDIRMIN;
-       args->acdirmax          = NFS_DEF_ACDIRMAX;
-       args->nfs_server.port   = NFS_UNSPEC_PORT;
-       args->auth_flavors[0]   = RPC_AUTH_UNIX;
-       args->auth_flavor_len   = 1;
         args->version           = 4;
-       args->minorversion      = 0;
-
         switch (data->version) {
         case 1:
                 if (data->host_addrlen > sizeof(args->nfs_server.address))
@@ -2508,7 +2509,8 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
         if (!s->s_root) {
                 /* initial superblock/root creation */
                 nfs4_fill_super(s);
-               nfs_fscache_get_super_cookie(s, data);
+               nfs_fscache_get_super_cookie(
+                       s, data ? data->fscache_uniq : NULL, NULL);
         }
  
         mntroot = nfs4_get_root(s, mntfh);
@@ -2656,7 +2658,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
         struct nfs_parsed_mount_data *data;
         int error = -ENOMEM;
  
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       data = nfs_alloc_parsed_mount_data(0);
         if (data == NULL)
                 goto out_free_data;
  
@@ -2741,6 +2743,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
         if (!s->s_root) {
                 /* initial superblock/root creation */
                 nfs4_clone_super(s, data->sb);
+               nfs_fscache_get_super_cookie(s, NULL, data);
         }
  
         mntroot = nfs4_get_root(s, data->fh);
@@ -2822,6 +2825,7 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
         if (!s->s_root) {
                 /* initial superblock/root creation */
                 nfs4_fill_super(s);
+               nfs_fscache_get_super_cookie(s, NULL, data);
         }
  
         mntroot = nfs4_get_root(s, &mntfh);
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c

index cdfa86fa1471b18a7ae2701de30ac91657452beb..ba2c199592fd450d41c9778f70cf4fbcb3d46f16 100644 (file)
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -38,7 +38,6 @@
  #include <linux/init.h>
  
  #include <linux/mm.h>
-#include <linux/utsname.h>
  #include <linux/errno.h>
  #include <linux/string.h>
  #include <linux/sunrpc/clnt.h>
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c

index 477d37d83b316367e1ac04fb31ba98e375a37b1a..2224b4d07bf0116b478f9d8cf565c424f9b64b78 100644 (file)
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -270,7 +270,8 @@ struct nls_table *load_nls(char *charset)
  
  void unload_nls(struct nls_table *nls)
  {
-       module_put(nls->owner);
+       if (nls)
+               module_put(nls->owner);
  }
  
  static const wchar_t charset2uni[256] = {
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c

index b38f944f0667351b69d947acf0e15660d995834b..cfce53cb65d76e0177601b129d972b8365d7d892 100644 (file)
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1550,6 +1550,7 @@ const struct address_space_operations ntfs_aops = {
         .migratepage    = buffer_migrate_page,  /* Move a page cache page from
                                                    one physical page to an
                                                    other. */
+       .error_remove_page = generic_error_remove_page,
  };
  
  /**
@@ -1569,6 +1570,7 @@ const struct address_space_operations ntfs_mst_aops = {
         .migratepage    = buffer_migrate_page,  /* Move a page cache page from
                                                    one physical page to an
                                                    other. */
+       .error_remove_page = generic_error_remove_page,
  };
  
  #ifdef NTFS_RW
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c

index abaaa1cbf8de4dac7ac9e8bfb6e143e82b06bff7..80b04770e8e9c22a64a4becf0686260627e0d7e6 100644 (file)
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -201,8 +201,7 @@ use_utf8:
                                                 v, old_nls->charset);
                                 nls_map = old_nls;
                         } else /* nls_map */ {
-                               if (old_nls)
-                                       unload_nls(old_nls);
+                               unload_nls(old_nls);
                         }
                 } else if (!strcmp(p, "utf8")) {
                         bool val = false;
@@ -2427,10 +2426,9 @@ static void ntfs_put_super(struct super_block *sb)
                 ntfs_free(vol->upcase);
                 vol->upcase = NULL;
         }
-       if (vol->nls_map) {
-               unload_nls(vol->nls_map);
-               vol->nls_map = NULL;
-       }
+
+       unload_nls(vol->nls_map);
+
         sb->s_fs_info = NULL;
         kfree(vol);
  
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c

index 72e76062a900d2555fac2dae44ef325a5d8aa897..deb2b132ae5ed42b68fd11f58413f2ffa4779b83 100644 (file)
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2022,4 +2022,5 @@ const struct address_space_operations ocfs2_aops = {
         .releasepage            = ocfs2_releasepage,
         .migratepage            = buffer_migrate_page,
         .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
  };
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c

index 81eff8e583222747cc824ce02d6edde94378e711..01cf8cc3d286f483d5c405d35dc574ec0fb095b7 100644 (file)
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -30,7 +30,6 @@
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
-#include <linux/utsname.h>
  #include <linux/init.h>
  #include <linux/sysctl.h>
  #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c

index 75997b4deaf3ff9fafd4fcf25293c9312fb17fc8..ca96bce50e18cb6629343f6a0436b8bec2aa20a6 100644 (file)
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -30,7 +30,6 @@
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
-#include <linux/utsname.h>
  #include <linux/init.h>
  #include <linux/sysctl.h>
  #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c

index c5c88124096df424fdb6d248512f239b7d87a2b1..ca46002ec10ea7ae1fa3da82e27a5f010f47bd92 100644 (file)
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -27,7 +27,6 @@
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
-#include <linux/utsname.h>
  #include <linux/sysctl.h>
  #include <linux/spinlock.h>
  #include <linux/debugfs.h>
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c

index 4d9e6b288dd871e4c4ffee4157522913988a159b..0334000676d3ad7f48765ff1fcde2e6d8e1f3a03 100644 (file)
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -28,7 +28,6 @@
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
-#include <linux/utsname.h>
  #include <linux/init.h>
  #include <linux/spinlock.h>
  #include <linux/delay.h>
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c

index 83a9f2972ac8189ca581ac9237ecf94a590b6f0d..437698e9465fd01114fd117228cd39095445f795 100644 (file)
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -30,7 +30,6 @@
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
-#include <linux/utsname.h>
  #include <linux/init.h>
  #include <linux/sysctl.h>
  #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c

index f8b653fcd4ddb872ec046217a6f1a731db36dc25..83bcaf266b358d7922998b3ec1d46d203531d616 100644 (file)
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -30,7 +30,6 @@
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
-#include <linux/utsname.h>
  #include <linux/init.h>
  #include <linux/sysctl.h>
  #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c

index 43e6e328056902a0fdd8cb98c7961b76a912c3dd..d9fa3d22e17c6d27134327d3fd96199038913ed0 100644 (file)
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -30,7 +30,6 @@
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
-#include <linux/utsname.h>
  #include <linux/init.h>
  #include <linux/sysctl.h>
  #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c

index 98569e86c61309ff78066dbb15770821d365098e..52ec020ea78b42f11ad83b3a2632dbcf08a20dde 100644 (file)
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -30,7 +30,6 @@
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
-#include <linux/utsname.h>
  #include <linux/init.h>
  #include <linux/sysctl.h>
  #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c

index 756f5b0998e0d4e389ffcda4b558f861094eeb91..00f53b2aea76a21a955eeac0c403a81c954947e8 100644 (file)
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -30,7 +30,6 @@
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
-#include <linux/utsname.h>
  #include <linux/init.h>
  #include <linux/sysctl.h>
  #include <linux/random.h>
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c

index 24feb449a1dc1f7329e0784d34b470db520d2684..4cc3c890a2cd48a0d13dc8cb7da4efabf0c22e16 100644 (file)
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -28,7 +28,6 @@
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
-#include <linux/utsname.h>
  #include <linux/init.h>
  #include <linux/random.h>
  #include <linux/statfs.h>
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c

index 579dd1b1110fde07fe90acb0242967d0e8078f7b..e3421030a69f713971a78e7d5d525098a4e44416 100644 (file)
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -38,7 +38,6 @@
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/pagemap.h>
-#include <linux/utsname.h>
  #include <linux/namei.h>
  
  #define MLOG_MASK_PREFIX ML_NAMEI
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c

index 171e052c07b3684f4a4264ebb44e802affc20f6e..c7bff4f603ff1557f7663fa22b9fb7ae8ed8a2e8 100644 (file)
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -97,7 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                 "Committed_AS:   %8lu kB\n"
                 "VmallocTotal:   %8lu kB\n"
                 "VmallocUsed:    %8lu kB\n"
-               "VmallocChunk:   %8lu kB\n",
+               "VmallocChunk:   %8lu kB\n"
+#ifdef CONFIG_MEMORY_FAILURE
+               "HardwareCorrupted: %8lu kB\n"
+#endif
+               ,
                 K(i.totalram),
                 K(i.freeram),
                 K(i.bufferram),
@@ -144,6 +148,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                 (unsigned long)VMALLOC_TOTAL >> 10,
                 vmi.used >> 10,
                 vmi.largest_chunk >> 10
+#ifdef CONFIG_MEMORY_FAILURE
+               ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
+#endif
                 );
  
         hugetlb_report_meminfo(m);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c

index 9b1e4e9a16bfd0f1ee07f109b1964f5700c1a577..f667e8aeabdf1145b42f4cecdf082246eed1e030 100644 (file)
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
  
         /* careful: calling conventions are nasty here */
         res = count;
-       error = table->proc_handler(table, write, filp, buf, &res, ppos);
+       error = table->proc_handler(table, write, buf, &res, ppos);
         if (!error)
                 error = res;
  out:
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c

index 0c10a0b3f1460c5ff8d533de1c6304a657c097fd..766b1d456050ff98d2603f3af027b7d61b98174d 100644 (file)
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -4,13 +4,18 @@
  #include <linux/sched.h>
  #include <linux/seq_file.h>
  #include <linux/time.h>
+#include <linux/kernel_stat.h>
  #include <asm/cputime.h>
  
  static int uptime_proc_show(struct seq_file *m, void *v)
  {
         struct timespec uptime;
         struct timespec idle;
-       cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
+       int i;
+       cputime_t idletime = cputime_zero;
+
+       for_each_possible_cpu(i)
+               idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
  
         do_posix_clock_monotonic_gettime(&uptime);
         monotonic_to_bootbased(&uptime);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c

index 11f0c06316ded778283af0bdbe53d06101225df0..32fae4040ebf46a94bc84cf1260c2edebce0c330 100644 (file)
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -69,14 +69,11 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
         /* make various checks */
         order = get_order(newsize);
         if (unlikely(order >= MAX_ORDER))
-               goto too_big;
+               return -EFBIG;
  
-       limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-       if (limit != RLIM_INFINITY && newsize > limit)
-               goto fsize_exceeded;
-
-       if (newsize > inode->i_sb->s_maxbytes)
-               goto too_big;
+       ret = inode_newsize_ok(inode, newsize);
+       if (ret)
+               return ret;
  
         i_size_write(inode, newsize);
  
@@ -118,12 +115,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
  
         return 0;
  
- fsize_exceeded:
-       send_sig(SIGXFSZ, current, 0);
- too_big:
-       return -EFBIG;
-
- add_error:
+add_error:
         while (loop < npages)
                 __free_page(pages + loop++);
         return ret;
diff --git a/fs/read_write.c b/fs/read_write.c

index 6c8c55dec2bcd6b2759f9698abab5b663258cc4b..3ac28987f22a38b3c2005a74e8ee3dac01621679 100644 (file)
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -839,9 +839,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
  
         pos = *ppos;
-       retval = -EINVAL;
-       if (unlikely(pos < 0))
-               goto fput_out;
         if (unlikely(pos + count > max)) {
                 retval = -EOVERFLOW;
                 if (pos >= max)
diff --git a/fs/romfs/super.c b/fs/romfs/super.c

index 47f132df0c3f0a630673831c2b862ce96ac72968..c117fa80d1e9b9ddc9be29a7d412022b9d158010 100644 (file)
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -528,7 +528,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
         pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK;
  
         root = romfs_iget(sb, pos);
-       if (!root)
+       if (IS_ERR(root))
                 goto error;
  
         sb->s_root = d_alloc_root(root);
diff --git a/fs/seq_file.c b/fs/seq_file.c

index 6c959275f2d0ef41577d9cdb09086f3b9c670883..eae7d9dbf3ffed297d62b8321137de25ba965c83 100644 (file)
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path);
   */
  int seq_path(struct seq_file *m, struct path *path, char *esc)
  {
-       if (m->count < m->size) {
-               char *s = m->buf + m->count;
-               char *p = d_path(path, s, m->size - m->count);
+       char *buf;
+       size_t size = seq_get_buf(m, &buf);
+       int res = -1;
+
+       if (size) {
+               char *p = d_path(path, buf, size);
                 if (!IS_ERR(p)) {
-                       s = mangle_path(s, p, esc);
-                       if (s) {
-                               p = m->buf + m->count;
-                               m->count = s - m->buf;
-                               return s - p;
-                       }
+                       char *end = mangle_path(buf, p, esc);
+                       if (end)
+                               res = end - buf;
                 }
         }
-       m->count = m->size;
-       return -1;
+       seq_commit(m, res);
+
+       return res;
  }
  EXPORT_SYMBOL(seq_path);
  
@@ -454,26 +455,28 @@ EXPORT_SYMBOL(seq_path);
  int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
                   char *esc)
  {
-       int err = -ENAMETOOLONG;
-       if (m->count < m->size) {
-               char *s = m->buf + m->count;
+       char *buf;
+       size_t size = seq_get_buf(m, &buf);
+       int res = -ENAMETOOLONG;
+
+       if (size) {
                 char *p;
  
                 spin_lock(&dcache_lock);
-               p = __d_path(path, root, s, m->size - m->count);
+               p = __d_path(path, root, buf, size);
                 spin_unlock(&dcache_lock);
-               err = PTR_ERR(p);
+               res = PTR_ERR(p);
                 if (!IS_ERR(p)) {
-                       s = mangle_path(s, p, esc);
-                       if (s) {
-                               p = m->buf + m->count;
-                               m->count = s - m->buf;
-                               return 0;
-                       }
+                       char *end = mangle_path(buf, p, esc);
+                       if (end)
+                               res = end - buf;
+                       else
+                               res = -ENAMETOOLONG;
                 }
         }
-       m->count = m->size;
-       return err;
+       seq_commit(m, res);
+
+       return res < 0 ? res : 0;
  }
  
  /*
@@ -481,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
   */
  int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
  {
-       if (m->count < m->size) {
-               char *s = m->buf + m->count;
-               char *p = dentry_path(dentry, s, m->size - m->count);
+       char *buf;
+       size_t size = seq_get_buf(m, &buf);
+       int res = -1;
+
+       if (size) {
+               char *p = dentry_path(dentry, buf, size);
                 if (!IS_ERR(p)) {
-                       s = mangle_path(s, p, esc);
-                       if (s) {
-                               p = m->buf + m->count;
-                               m->count = s - m->buf;
-                               return s - p;
-                       }
+                       char *end = mangle_path(buf, p, esc);
+                       if (end)
+                               res = end - buf;
                 }
         }
-       m->count = m->size;
-       return -1;
+       seq_commit(m, res);
+
+       return res;
  }
  
  int seq_bitmap(struct seq_file *m, const unsigned long *bits,
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c

index 1402d2d54f5239d43663e01c792506946bebdbe9..1c4c8f089970041c49de182704bf537dbfce9259 100644 (file)
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -459,14 +459,8 @@ smb_show_options(struct seq_file *s, struct vfsmount *m)
  static void
  smb_unload_nls(struct smb_sb_info *server)
  {
-       if (server->remote_nls) {
-               unload_nls(server->remote_nls);
-               server->remote_nls = NULL;
-       }
-       if (server->local_nls) {
-               unload_nls(server->local_nls);
-               server->local_nls = NULL;
-       }
+       unload_nls(server->remote_nls);
+       unload_nls(server->local_nls);
  }
  
  static void
diff --git a/fs/super.c b/fs/super.c

index 0e7207b9815c78a33f237e678da91f0adb969c45..19eb70b374bcb3239cd7a75ee901336f942b02e4 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -465,6 +465,48 @@ rescan:
  }
  
  EXPORT_SYMBOL(get_super);
+
+/**
+ * get_active_super - get an active reference to the superblock of a device
+ * @bdev: device to get the superblock for
+ *
+ * Scans the superblock list and finds the superblock of the file system
+ * mounted on the device given.  Returns the superblock with an active
+ * reference and s_umount held exclusively or %NULL if none was found.
+ */
+struct super_block *get_active_super(struct block_device *bdev)
+{
+       struct super_block *sb;
+
+       if (!bdev)
+               return NULL;
+
+       spin_lock(&sb_lock);
+       list_for_each_entry(sb, &super_blocks, s_list) {
+               if (sb->s_bdev != bdev)
+                       continue;
+
+               sb->s_count++;
+               spin_unlock(&sb_lock);
+               down_write(&sb->s_umount);
+               if (sb->s_root) {
+                       spin_lock(&sb_lock);
+                       if (sb->s_count > S_BIAS) {
+                               atomic_inc(&sb->s_active);
+                               sb->s_count--;
+                               spin_unlock(&sb_lock);
+                               return sb;
+                       }
+                       spin_unlock(&sb_lock);
+               }
+               up_write(&sb->s_umount);
+               put_super(sb);
+               yield();
+               spin_lock(&sb_lock);
+       }
+       spin_unlock(&sb_lock);
+       return NULL;
+}
   
  struct super_block * user_get_super(dev_t dev)
  {
@@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
  {
         int retval;
         int remount_rw;
-       
+
+       if (sb->s_frozen != SB_UNFROZEN)
+               return -EBUSY;
+
  #ifdef CONFIG_BLOCK
         if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
                 return -EACCES;
  #endif
+
         if (flags & MS_RDONLY)
                 acct_auto_close(sb);
         shrink_dcache_sb(sb);
@@ -743,9 +789,14 @@ int get_sb_bdev(struct file_system_type *fs_type,
          * will protect the lockfs code from trying to start a snapshot
          * while we are mounting
          */
-       down(&bdev->bd_mount_sem);
+       mutex_lock(&bdev->bd_fsfreeze_mutex);
+       if (bdev->bd_fsfreeze_count > 0) {
+               mutex_unlock(&bdev->bd_fsfreeze_mutex);
+               error = -EBUSY;
+               goto error_bdev;
+       }
         s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
-       up(&bdev->bd_mount_sem);
+       mutex_unlock(&bdev->bd_fsfreeze_mutex);
         if (IS_ERR(s))
                 goto error_s;
  
@@ -892,6 +943,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
         if (error)
                 goto out_sb;
  
+       /*
+        * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
+        * but s_maxbytes was an unsigned long long for many releases. Throw
+        * this warning for a little while to try and catch filesystems that
+        * violate this rule. This warning should be either removed or
+        * converted to a BUG() in 2.6.34.
+        */
+       WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
+               "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
+
         mnt->mnt_mountpoint = mnt->mnt_root;
         mnt->mnt_parent = mnt;
         up_write(&mnt->mnt_sb->s_umount);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c

index d5e5559e31db3774ba6f76e350302009ccf831eb..381854461b282fe928c93cf61efb41edb1ad1acf 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1635,4 +1635,5 @@ const struct address_space_operations xfs_address_space_operations = {
         .direct_IO              = xfs_vm_direct_IO,
         .migratepage            = buffer_migrate_page,
         .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
  };
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c

index 916c0ffb6083de2be56fdf56656ee199e248df73..c5bc67c4e3bb32527acca1e560797bec87e217cb 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -26,7 +26,6 @@ STATIC int
  xfs_stats_clear_proc_handler(
         ctl_table       *ctl,
         int             write,
-       struct file     *filp,
         void            __user *buffer,
         size_t          *lenp,
         loff_t          *ppos)
@@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler(
         int             c, ret, *valp = ctl->data;
         __uint32_t      vn_active;
  
-       ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
  
         if (!ret && write && *valp) {
                 printk("XFS Clearing xfsstats\n");
diff --git a/include/acpi/button.h b/include/acpi/button.h

new file mode 100644 (file)

index 0000000..97eea0e
--- /dev/null
+++ b/include/acpi/button.h
@@ -0,0 +1,25 @@
+#ifndef ACPI_BUTTON_H
+#define ACPI_BUTTON_H
+
+#include <linux/notifier.h>
+
+#if defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE)
+extern int acpi_lid_notifier_register(struct notifier_block *nb);
+extern int acpi_lid_notifier_unregister(struct notifier_block *nb);
+extern int acpi_lid_open(void);
+#else
+static inline int acpi_lid_notifier_register(struct notifier_block *nb)
+{
+       return 0;
+}
+static inline int acpi_lid_notifier_unregister(struct notifier_block *nb)
+{
+       return 0;
+}
+static inline int acpi_lid_open(void)
+{
+       return 1;
+}
+#endif /* defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) */
+
+#endif /* ACPI_BUTTON_H */
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h

index 4d3e48373e74eee4ce2881bc56442c7ef59409c3..0c3dd8603927ac2d20394a8e531d82eb963dfc34 100644 (file)
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -73,6 +73,19 @@
  #define F_SETSIG       10      /* for sockets. */
  #define F_GETSIG       11      /* for sockets. */
  #endif
+#ifndef F_SETOWN_EX
+#define F_SETOWN_EX    12
+#define F_GETOWN_EX    13
+#endif
+
+#define F_OWNER_TID    0
+#define F_OWNER_PID    1
+#define F_OWNER_GID    2
+
+struct f_owner_ex {
+       int     type;
+       pid_t   pid;
+};
  
  /* for F_[GET|SET]FL */
  #define FD_CLOEXEC     1       /* actually anything with low bit set goes */
diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h

index dd63bd38864b23627f36e57b53c8413af63cd238..5ee13b2fd223599422446b509227eadb24b7f7e8 100644 (file)
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -34,6 +34,7 @@
  #define MADV_REMOVE    9               /* remove these pages & resources */
  #define MADV_DONTFORK  10              /* don't inherit across fork */
  #define MADV_DOFORK    11              /* do inherit across fork */
+#define MADV_HWPOISON  100             /* poison a page for testing */
  
  #define MADV_MERGEABLE   12            /* KSM may merge identical pages */
  #define MADV_UNMERGEABLE 13            /* KSM may not merge identical pages */
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h

index c840719a8c595cea463df2113d4934e6ff2a0ff8..942d30b5aab15186cef7ca2709f362b4ea393176 100644 (file)
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -82,6 +82,7 @@ typedef struct siginfo {
  #ifdef __ARCH_SI_TRAPNO
                         int _trapno;    /* TRAP # which caused the signal */
  #endif
+                       short _addr_lsb; /* LSB of the reported address */
                 } _sigfault;
  
                 /* SIGPOLL */
@@ -112,6 +113,7 @@ typedef struct siginfo {
  #ifdef __ARCH_SI_TRAPNO
  #define si_trapno      _sifields._sigfault._trapno
  #endif
+#define si_addr_lsb    _sifields._sigfault._addr_lsb
  #define si_band                _sifields._sigpoll._band
  #define si_fd          _sifields._sigpoll._fd
  
@@ -192,7 +194,11 @@ typedef struct siginfo {
  #define BUS_ADRALN     (__SI_FAULT|1)  /* invalid address alignment */
  #define BUS_ADRERR     (__SI_FAULT|2)  /* non-existant physical address */
  #define BUS_OBJERR     (__SI_FAULT|3)  /* object specific hardware error */
-#define NSIGBUS                3
+/* hardware memory error consumed on a machine check: action required */
+#define BUS_MCEERR_AR  (__SI_FAULT|4)
+/* hardware memory error detected in process but not consumed: action optional*/
+#define BUS_MCEERR_AO  (__SI_FAULT|5)
+#define NSIGBUS                5
  
  /*
   * SIGTRAP si_codes
diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h

index 88bada2ebc4b1fb1bb980f96daa8409ac5933eb2..510df36dd5d44f8b40e0ed048bbb7c8fa1ef506f 100644 (file)
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h
@@ -37,9 +37,6 @@
  #ifndef parent_node
  #define parent_node(node)      ((void)(node),0)
  #endif
-#ifndef node_to_cpumask
-#define node_to_cpumask(node)  ((void)node, cpu_online_map)
-#endif
  #ifndef cpumask_of_node
  #define cpumask_of_node(node)  ((void)node, cpu_online_mask)
  #endif
@@ -55,18 +52,4 @@
  
  #endif /* CONFIG_NUMA */
  
-/*
- * returns pointer to cpumask for specified node
- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
- */
-#ifndef node_to_cpumask_ptr
-
-#define        node_to_cpumask_ptr(v, node)                                    \
-               cpumask_t _##v = node_to_cpumask(node);                 \
-               const cpumask_t *v = &_##v
-
-#define node_to_cpumask_ptr_next(v, node)                              \
-                         _##v = node_to_cpumask(node)
-#endif
-
  #endif /* _ASM_GENERIC_TOPOLOGY_H */
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h

index 853508499d20d12b5d197178e68bbc809633e55b..3f6e545609be892f79374033d79a851372d1e2fd 100644 (file)
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -552,6 +552,7 @@
         {0x8086, 0x2e12, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
         {0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
         {0x8086, 0x2e32, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+       {0x8086, 0x2e42, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
         {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
         {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
         {0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h

index 8e1e92583fbc851ad998dd374f0b5df09914f73e..7e0cb1da92e68be66b9f8c0839a42ea0e6a95e99 100644 (file)
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -185,6 +185,7 @@ typedef struct _drm_i915_sarea {
  #define DRM_I915_GEM_GET_APERTURE 0x23
  #define DRM_I915_GEM_MMAP_GTT  0x24
  #define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25
+#define DRM_I915_GEM_MADVISE   0x26
  
  #define DRM_IOCTL_I915_INIT            DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
  #define DRM_IOCTL_I915_FLUSH           DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -221,6 +222,7 @@ typedef struct _drm_i915_sarea {
  #define DRM_IOCTL_I915_GEM_GET_TILING  DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling)
  #define DRM_IOCTL_I915_GEM_GET_APERTURE        DRM_IOR  (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture)
  #define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_intel_get_pipe_from_crtc_id)
+#define DRM_IOCTL_I915_GEM_MADVISE     DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
  
  /* Allow drivers to submit batchbuffers directly to hardware, relying
   * on the security mechanisms provided by hardware.
@@ -667,4 +669,21 @@ struct drm_i915_get_pipe_from_crtc_id {
         __u32 pipe;
  };
  
+#define I915_MADV_WILLNEED 0
+#define I915_MADV_DONTNEED 1
+#define __I915_MADV_PURGED 2 /* internal state */
+
+struct drm_i915_gem_madvise {
+       /** Handle of the buffer to change the backing store advice */
+       __u32 handle;
+
+       /* Advice: either the buffer will be needed again in the near future,
+        *         or wont be and could be discarded under memory pressure.
+        */
+       __u32 madv;
+
+       /** Whether the backing store still exists. */
+       __u32 retained;
+};
+
  #endif                         /* _I915_DRM_H_ */
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h

index 5fc2ef8d97fac5851eb2d40f7b7e2d67a3be48e8..a1c486a88e8856bc572f1c481764833d0987fa16 100644 (file)
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -58,25 +58,60 @@ struct dma_chan_ref {
   * array.
   * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
   * dependency chain
- * @ASYNC_TX_DEP_ACK: ack the dependency descriptor.  Useful for chaining.
+ * @ASYNC_TX_FENCE: specify that the next operation in the dependency
+ * chain uses this operation's result as an input
   */
  enum async_tx_flags {
         ASYNC_TX_XOR_ZERO_DST    = (1 << 0),
         ASYNC_TX_XOR_DROP_DST    = (1 << 1),
-       ASYNC_TX_ACK             = (1 << 3),
-       ASYNC_TX_DEP_ACK         = (1 << 4),
+       ASYNC_TX_ACK             = (1 << 2),
+       ASYNC_TX_FENCE           = (1 << 3),
+};
+
+/**
+ * struct async_submit_ctl - async_tx submission/completion modifiers
+ * @flags: submission modifiers
+ * @depend_tx: parent dependency of the current operation being submitted
+ * @cb_fn: callback routine to run at operation completion
+ * @cb_param: parameter for the callback routine
+ * @scribble: caller provided space for dma/page address conversions
+ */
+struct async_submit_ctl {
+       enum async_tx_flags flags;
+       struct dma_async_tx_descriptor *depend_tx;
+       dma_async_tx_callback cb_fn;
+       void *cb_param;
+       void *scribble;
  };
  
  #ifdef CONFIG_DMA_ENGINE
  #define async_tx_issue_pending_all dma_issue_pending_all
+
+/**
+ * async_tx_issue_pending - send pending descriptor to the hardware channel
+ * @tx: descriptor handle to retrieve hardware context
+ *
+ * Note: any dependent operations will have already been issued by
+ * async_tx_channel_switch, or (in the case of no channel switch) will
+ * be already pending on this channel.
+ */
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+       if (likely(tx)) {
+               struct dma_chan *chan = tx->chan;
+               struct dma_device *dma = chan->device;
+
+               dma->device_issue_pending(chan);
+       }
+}
  #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
  #include <asm/async_tx.h>
  #else
  #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
          __async_tx_find_channel(dep, type)
  struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-       enum dma_transaction_type tx_type);
+__async_tx_find_channel(struct async_submit_ctl *submit,
+                       enum dma_transaction_type tx_type);
  #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
  #else
  static inline void async_tx_issue_pending_all(void)
@@ -84,10 +119,16 @@ static inline void async_tx_issue_pending_all(void)
         do { } while (0);
  }
  
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+       do { } while (0);
+}
+
  static inline struct dma_chan *
-async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-       enum dma_transaction_type tx_type, struct page **dst, int dst_count,
-       struct page **src, int src_count, size_t len)
+async_tx_find_channel(struct async_submit_ctl *submit,
+                     enum dma_transaction_type tx_type, struct page **dst,
+                     int dst_count, struct page **src, int src_count,
+                     size_t len)
  {
         return NULL;
  }
@@ -99,46 +140,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
   * @cb_fn_param: parameter to pass to the callback routine
   */
  static inline void
-async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param)
+async_tx_sync_epilog(struct async_submit_ctl *submit)
  {
-       if (cb_fn)
-               cb_fn(cb_fn_param);
+       if (submit->cb_fn)
+               submit->cb_fn(submit->cb_param);
  }
  
-void
-async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
-       enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param);
+typedef union {
+       unsigned long addr;
+       struct page *page;
+       dma_addr_t dma;
+} addr_conv_t;
+
+static inline void
+init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags,
+                 struct dma_async_tx_descriptor *tx,
+                 dma_async_tx_callback cb_fn, void *cb_param,
+                 addr_conv_t *scribble)
+{
+       args->flags = flags;
+       args->depend_tx = tx;
+       args->cb_fn = cb_fn;
+       args->cb_param = cb_param;
+       args->scribble = scribble;
+}
+
+void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
+                    struct async_submit_ctl *submit);
  
  struct dma_async_tx_descriptor *
  async_xor(struct page *dest, struct page **src_list, unsigned int offset,
-       int src_cnt, size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param);
+         int src_cnt, size_t len, struct async_submit_ctl *submit);
  
  struct dma_async_tx_descriptor *
-async_xor_zero_sum(struct page *dest, struct page **src_list,
-       unsigned int offset, int src_cnt, size_t len,
-       u32 *result, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param);
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+             int src_cnt, size_t len, enum sum_check_flags *result,
+             struct async_submit_ctl *submit);
  
  struct dma_async_tx_descriptor *
  async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
-       unsigned int src_offset, size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param);
+            unsigned int src_offset, size_t len,
+            struct async_submit_ctl *submit);
  
  struct dma_async_tx_descriptor *
  async_memset(struct page *dest, int val, unsigned int offset,
-       size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param);
+            size_t len, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
+                  size_t len, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
+                  size_t len, enum sum_check_flags *pqres, struct page *spare,
+                  struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
+                       struct page **ptrs, struct async_submit_ctl *submit);
  
  struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param);
+async_raid6_datap_recov(int src_num, size_t bytes, int faila,
+                       struct page **ptrs, struct async_submit_ctl *submit);
  
  void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
  #endif /* _ASYNC_TX_H_ */
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h

index 2046b5b8af48ae094d0182b143450b5459b67b30..aece486ac7349b8463ace95585b2fa9379a04ab3 100644 (file)
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -120,7 +120,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
  extern int prepare_bprm_creds(struct linux_binprm *bprm);
  extern void install_exec_creds(struct linux_binprm *bprm);
  extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
-extern int set_binfmt(struct linux_binfmt *new);
+extern void set_binfmt(struct linux_binfmt *new);
  extern void free_bprm(struct linux_binprm *);
  
  #endif /* __KERNEL__ */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h

index 90bba9e622864b88156638d231476a7889e4b125..b62bb9294d0c594618515c5f0214e963f7e3a904 100644 (file)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -141,6 +141,38 @@ enum {
         CGRP_WAIT_ON_RMDIR,
  };
  
+/* which pidlist file are we talking about? */
+enum cgroup_filetype {
+       CGROUP_FILE_PROCS,
+       CGROUP_FILE_TASKS,
+};
+
+/*
+ * A pidlist is a list of pids that virtually represents the contents of one
+ * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
+ * a pair (one each for procs, tasks) for each pid namespace that's relevant
+ * to the cgroup.
+ */
+struct cgroup_pidlist {
+       /*
+        * used to find which pidlist is wanted. doesn't change as long as
+        * this particular list stays in the list.
+        */
+       struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
+       /* array of xids */
+       pid_t *list;
+       /* how many elements the above list has */
+       int length;
+       /* how many files are using the current array */
+       int use_count;
+       /* each of these stored in a list by its cgroup */
+       struct list_head links;
+       /* pointer to the cgroup we belong to, for list removal purposes */
+       struct cgroup *owner;
+       /* protects the other fields */
+       struct rw_semaphore mutex;
+};
+
  struct cgroup {
         unsigned long flags;            /* "unsigned long" so bitops work */
  
@@ -179,11 +211,12 @@ struct cgroup {
          */
         struct list_head release_list;
  
-       /* pids_mutex protects pids_list and cached pid arrays. */
-       struct rw_semaphore pids_mutex;
-
-       /* Linked list of struct cgroup_pids */
-       struct list_head pids_list;
+       /*
+        * list of pidlists, up to two for each namespace (one for procs, one
+        * for tasks); created on demand.
+        */
+       struct list_head pidlists;
+       struct mutex pidlist_mutex;
  
         /* For RCU-protected deletion */
         struct rcu_head rcu_head;
@@ -227,6 +260,9 @@ struct css_set {
          * during subsystem registration (at boot time).
          */
         struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+       /* For RCU-protected deletion */
+       struct rcu_head rcu_head;
  };
  
  /*
@@ -389,10 +425,11 @@ struct cgroup_subsys {
                                                   struct cgroup *cgrp);
         int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
         void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
-       int (*can_attach)(struct cgroup_subsys *ss,
-                         struct cgroup *cgrp, struct task_struct *tsk);
+       int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
+                         struct task_struct *tsk, bool threadgroup);
         void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                       struct cgroup *old_cgrp, struct task_struct *tsk);
+                       struct cgroup *old_cgrp, struct task_struct *tsk,
+                       bool threadgroup);
         void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
         void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
         int (*populate)(struct cgroup_subsys *ss,
diff --git a/include/linux/configfs.h b/include/linux/configfs.h

index 7f627775c947a4a89701092422eae9025cbd7731..ddb7a97c78c25c1cf93a8302d79aa7f00d5e67a2 100644 (file)
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -27,8 +27,8 @@
   *
   * configfs Copyright (C) 2005 Oracle.  All rights reserved.
   *
- * Please read Documentation/filesystems/configfs.txt before using the
- * configfs interface, ESPECIALLY the parts about reference counts and
+ * Please read Documentation/filesystems/configfs/configfs.txt before using
+ * the configfs interface, ESPECIALLY the parts about reference counts and
   * item destructors.
   */
  
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h

index 9b1d458aac6e9be98252be3b1945012b122ff41f..789cf5f920ce80e697ca05a9a7fde743cdde9e1a 100644 (file)
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -3,444 +3,37 @@
  
  /*
   * Cpumasks provide a bitmap suitable for representing the
- * set of CPU's in a system, one bit position per CPU number.
- *
- * The new cpumask_ ops take a "struct cpumask *"; the old ones
- * use cpumask_t.
- *
- * See detailed comments in the file linux/bitmap.h describing the
- * data type on which these cpumasks are based.
- *
- * For details of cpumask_scnprintf() and cpumask_parse_user(),
- * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c.
- * For details of cpulist_scnprintf() and cpulist_parse(), see
- * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c.
- * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c
- * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c.
- * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c.
- * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c.
- *
- * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
- * Note: The alternate operations with the suffix "_nr" are used
- *       to limit the range of the loop to nr_cpu_ids instead of
- *       NR_CPUS when NR_CPUS > 64 for performance reasons.
- *       If NR_CPUS is <= 64 then most assembler bitmask
- *       operators execute faster with a constant range, so
- *       the operator will continue to use NR_CPUS.
- *
- *       Another consideration is that nr_cpu_ids is initialized
- *       to NR_CPUS and isn't lowered until the possible cpus are
- *       discovered (including any disabled cpus).  So early uses
- *       will span the entire range of NR_CPUS.
- * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
- *
- * The obsolescent cpumask operations are:
- *
- * void cpu_set(cpu, mask)             turn on bit 'cpu' in mask
- * void cpu_clear(cpu, mask)           turn off bit 'cpu' in mask
- * void cpus_setall(mask)              set all bits
- * void cpus_clear(mask)               clear all bits
- * int cpu_isset(cpu, mask)            true iff bit 'cpu' set in mask
- * int cpu_test_and_set(cpu, mask)     test and set bit 'cpu' in mask
- *
- * int cpus_and(dst, src1, src2)       dst = src1 & src2  [intersection]
- * void cpus_or(dst, src1, src2)       dst = src1 | src2  [union]
- * void cpus_xor(dst, src1, src2)      dst = src1 ^ src2
- * int cpus_andnot(dst, src1, src2)    dst = src1 & ~src2
- * void cpus_complement(dst, src)      dst = ~src
- *
- * int cpus_equal(mask1, mask2)                Does mask1 == mask2?
- * int cpus_intersects(mask1, mask2)   Do mask1 and mask2 intersect?
- * int cpus_subset(mask1, mask2)       Is mask1 a subset of mask2?
- * int cpus_empty(mask)                        Is mask empty (no bits sets)?
- * int cpus_full(mask)                 Is mask full (all bits sets)?
- * int cpus_weight(mask)               Hamming weigh - number of set bits
- * int cpus_weight_nr(mask)            Same using nr_cpu_ids instead of NR_CPUS
- *
- * void cpus_shift_right(dst, src, n)  Shift right
- * void cpus_shift_left(dst, src, n)   Shift left
- *
- * int first_cpu(mask)                 Number lowest set bit, or NR_CPUS
- * int next_cpu(cpu, mask)             Next cpu past 'cpu', or NR_CPUS
- * int next_cpu_nr(cpu, mask)          Next cpu past 'cpu', or nr_cpu_ids
- *
- * cpumask_t cpumask_of_cpu(cpu)       Return cpumask with bit 'cpu' set
- *                                     (can be used as an lvalue)
- * CPU_MASK_ALL                                Initializer - all bits set
- * CPU_MASK_NONE                       Initializer - no bits set
- * unsigned long *cpus_addr(mask)      Array of unsigned long's in mask
- *
- * CPUMASK_ALLOC kmalloc's a structure that is a composite of many cpumask_t
- * variables, and CPUMASK_PTR provides pointers to each field.
- *
- * The structure should be defined something like this:
- * struct my_cpumasks {
- *     cpumask_t mask1;
- *     cpumask_t mask2;
- * };
- *
- * Usage is then:
- *     CPUMASK_ALLOC(my_cpumasks);
- *     CPUMASK_PTR(mask1, my_cpumasks);
- *     CPUMASK_PTR(mask2, my_cpumasks);
- *
- *     --- DO NOT reference cpumask_t pointers until this check ---
- *     if (my_cpumasks == NULL)
- *             "kmalloc failed"...
- *
- * References are now pointers to the cpumask_t variables (*mask1, ...)
- *
- *if NR_CPUS > BITS_PER_LONG
- *   CPUMASK_ALLOC(m)                  Declares and allocates struct m *m =
- *                                             kmalloc(sizeof(*m), GFP_KERNEL)
- *   CPUMASK_FREE(m)                   Macro for kfree(m)
- *else
- *   CPUMASK_ALLOC(m)                  Declares struct m _m, *m = &_m
- *   CPUMASK_FREE(m)                   Nop
- *endif
- *   CPUMASK_PTR(v, m)                 Declares cpumask_t *v = &(m->v)
- * ------------------------------------------------------------------------
- *
- * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
- * int cpumask_parse_user(ubuf, ulen, mask)    Parse ascii string as cpumask
- * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
- * int cpulist_parse(buf, map)         Parse ascii string as cpulist
- * int cpu_remap(oldbit, old, new)     newbit = map(old, new)(oldbit)
- * void cpus_remap(dst, src, old, new) *dst = map(old, new)(src)
- * void cpus_onto(dst, orig, relmap)   *dst = orig relative to relmap
- * void cpus_fold(dst, orig, sz)       dst bits = orig bits mod sz
- *
- * for_each_cpu_mask(cpu, mask)                for-loop cpu over mask using NR_CPUS
- * for_each_cpu_mask_nr(cpu, mask)     for-loop cpu over mask using nr_cpu_ids
- *
- * int num_online_cpus()               Number of online CPUs
- * int num_possible_cpus()             Number of all possible CPUs
- * int num_present_cpus()              Number of present CPUs
- *
- * int cpu_online(cpu)                 Is some cpu online?
- * int cpu_possible(cpu)               Is some cpu possible?
- * int cpu_present(cpu)                        Is some cpu present (can schedule)?
- *
- * int any_online_cpu(mask)            First online cpu in mask
- *
- * for_each_possible_cpu(cpu)          for-loop cpu over cpu_possible_map
- * for_each_online_cpu(cpu)            for-loop cpu over cpu_online_map
- * for_each_present_cpu(cpu)           for-loop cpu over cpu_present_map
- *
- * Subtlety:
- * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway)
- *    to generate slightly worse code.  Note for example the additional
- *    40 lines of assembly code compiling the "for each possible cpu"
- *    loops buried in the disk_stat_read() macros calls when compiling
- *    drivers/block/genhd.c (arch i386, CONFIG_SMP=y).  So use a simple
- *    one-line #define for cpu_isset(), instead of wrapping an inline
- *    inside a macro, the way we do the other calls.
+ * set of CPU's in a system, one bit position per CPU number.  In general,
+ * only nr_cpu_ids (<= NR_CPUS) bits are valid.
   */
-
  #include <linux/kernel.h>
  #include <linux/threads.h>
  #include <linux/bitmap.h>
  
  typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
-extern cpumask_t _unused_cpumask_arg_;
-
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
-static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
-{
-       set_bit(cpu, dstp->bits);
-}
-
-#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
-static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
-{
-       clear_bit(cpu, dstp->bits);
-}
-
-#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
-static inline void __cpus_setall(cpumask_t *dstp, int nbits)
-{
-       bitmap_fill(dstp->bits, nbits);
-}
-
-#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
-static inline void __cpus_clear(cpumask_t *dstp, int nbits)
-{
-       bitmap_zero(dstp->bits, nbits);
-}
-
-/* No static inline type checking - see Subtlety (1) above. */
-#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
-
-#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
-static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
-{
-       return test_and_set_bit(cpu, addr->bits);
-}
-
-#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
-static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
-                                       const cpumask_t *src2p, int nbits)
-{
-       return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
-static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
-                                       const cpumask_t *src2p, int nbits)
-{
-       bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
-static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
-                                       const cpumask_t *src2p, int nbits)
-{
-       bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_andnot(dst, src1, src2) \
-                               __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
-static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
-                                       const cpumask_t *src2p, int nbits)
-{
-       return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS)
-static inline void __cpus_complement(cpumask_t *dstp,
-                                       const cpumask_t *srcp, int nbits)
-{
-       bitmap_complement(dstp->bits, srcp->bits, nbits);
-}
-
-#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_equal(const cpumask_t *src1p,
-                                       const cpumask_t *src2p, int nbits)
-{
-       return bitmap_equal(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_intersects(const cpumask_t *src1p,
-                                       const cpumask_t *src2p, int nbits)
-{
-       return bitmap_intersects(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_subset(const cpumask_t *src1p,
-                                       const cpumask_t *src2p, int nbits)
-{
-       return bitmap_subset(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
-static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
-{
-       return bitmap_empty(srcp->bits, nbits);
-}
-
-#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS)
-static inline int __cpus_full(const cpumask_t *srcp, int nbits)
-{
-       return bitmap_full(srcp->bits, nbits);
-}
-
-#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
-static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
-{
-       return bitmap_weight(srcp->bits, nbits);
-}
-
-#define cpus_shift_right(dst, src, n) \
-                       __cpus_shift_right(&(dst), &(src), (n), NR_CPUS)
-static inline void __cpus_shift_right(cpumask_t *dstp,
-                                       const cpumask_t *srcp, int n, int nbits)
-{
-       bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
-}
-
-#define cpus_shift_left(dst, src, n) \
-                       __cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
-static inline void __cpus_shift_left(cpumask_t *dstp,
-                                       const cpumask_t *srcp, int n, int nbits)
-{
-       bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
-}
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
  
  /**
- * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
- * @bitmap: the bitmap
- *
- * There are a few places where cpumask_var_t isn't appropriate and
- * static cpumasks must be used (eg. very early boot), yet we don't
- * expose the definition of 'struct cpumask'.
- *
- * This does the conversion, and can be used as a constant initializer.
- */
-#define to_cpumask(bitmap)                                             \
-       ((struct cpumask *)(1 ? (bitmap)                                \
-                           : (void *)sizeof(__check_is_bitmap(bitmap))))
-
-static inline int __check_is_bitmap(const unsigned long *bitmap)
-{
-       return 1;
-}
-
-/*
- * Special-case data structure for "single bit set only" constant CPU masks.
+ * cpumask_bits - get the bits in a cpumask
+ * @maskp: the struct cpumask *
   *
- * We pre-generate all the 64 (or 32) possible bit positions, with enough
- * padding to the left and the right, and return the constant pointer
- * appropriately offset.
- */
-extern const unsigned long
-       cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
-
-static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
-{
-       const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
-       p -= cpu / BITS_PER_LONG;
-       return to_cpumask(p);
-}
-
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-/*
- * In cases where we take the address of the cpumask immediately,
- * gcc optimizes it out (it's a constant) and there's no huge stack
- * variable created:
+ * You should only assume nr_cpu_ids bits of this mask are valid.  This is
+ * a macro so it's const-correct.
   */
-#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu))
-
-
-#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
-
-#if NR_CPUS <= BITS_PER_LONG
-
-#define CPU_MASK_ALL                                                   \
-(cpumask_t) { {                                                                \
-       [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD                 \
-} }
-
-#define CPU_MASK_ALL_PTR       (&CPU_MASK_ALL)
-
-#else
-
-#define CPU_MASK_ALL                                                   \
-(cpumask_t) { {                                                                \
-       [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,                        \
-       [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD                 \
-} }
-
-/* cpu_mask_all is in init/main.c */
-extern cpumask_t cpu_mask_all;
-#define CPU_MASK_ALL_PTR       (&cpu_mask_all)
-
-#endif
-
-#define CPU_MASK_NONE                                                  \
-(cpumask_t) { {                                                                \
-       [0 ... BITS_TO_LONGS(NR_CPUS)-1] =  0UL                         \
-} }
-
-#define CPU_MASK_CPU0                                                  \
-(cpumask_t) { {                                                                \
-       [0] =  1UL                                                      \
-} }
-
-#define cpus_addr(src) ((src).bits)
-
-#if NR_CPUS > BITS_PER_LONG
-#define        CPUMASK_ALLOC(m)        struct m *m = kmalloc(sizeof(*m), GFP_KERNEL)
-#define        CPUMASK_FREE(m)         kfree(m)
-#else
-#define        CPUMASK_ALLOC(m)        struct m _m, *m = &_m
-#define        CPUMASK_FREE(m)
-#endif
-#define        CPUMASK_PTR(v, m)       cpumask_t *v = &(m->v)
-
-#define cpu_remap(oldbit, old, new) \
-               __cpu_remap((oldbit), &(old), &(new), NR_CPUS)
-static inline int __cpu_remap(int oldbit,
-               const cpumask_t *oldp, const cpumask_t *newp, int nbits)
-{
-       return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits);
-}
-
-#define cpus_remap(dst, src, old, new) \
-               __cpus_remap(&(dst), &(src), &(old), &(new), NR_CPUS)
-static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp,
-               const cpumask_t *oldp, const cpumask_t *newp, int nbits)
-{
-       bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits);
-}
-
-#define cpus_onto(dst, orig, relmap) \
-               __cpus_onto(&(dst), &(orig), &(relmap), NR_CPUS)
-static inline void __cpus_onto(cpumask_t *dstp, const cpumask_t *origp,
-               const cpumask_t *relmapp, int nbits)
-{
-       bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits);
-}
-
-#define cpus_fold(dst, orig, sz) \
-               __cpus_fold(&(dst), &(orig), sz, NR_CPUS)
-static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
-               int sz, int nbits)
-{
-       bitmap_fold(dstp->bits, origp->bits, sz, nbits);
-}
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+#define cpumask_bits(maskp) ((maskp)->bits)
  
  #if NR_CPUS == 1
-
  #define nr_cpu_ids             1
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#define first_cpu(src)         ({ (void)(src); 0; })
-#define next_cpu(n, src)       ({ (void)(src); 1; })
-#define any_online_cpu(mask)   0
-#define for_each_cpu_mask(cpu, mask)   \
-       for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
-#else /* NR_CPUS > 1 */
-
+#else
  extern int nr_cpu_ids;
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-int __first_cpu(const cpumask_t *srcp);
-int __next_cpu(int n, const cpumask_t *srcp);
-int __any_online_cpu(const cpumask_t *mask);
-
-#define first_cpu(src)         __first_cpu(&(src))
-#define next_cpu(n, src)       __next_cpu((n), &(src))
-#define any_online_cpu(mask) __any_online_cpu(&(mask))
-#define for_each_cpu_mask(cpu, mask)                   \
-       for ((cpu) = -1;                                \
-               (cpu) = next_cpu((cpu), (mask)),        \
-               (cpu) < NR_CPUS; )
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
  #endif
  
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#if NR_CPUS <= 64
-
-#define next_cpu_nr(n, src)            next_cpu(n, src)
-#define cpus_weight_nr(cpumask)                cpus_weight(cpumask)
-#define for_each_cpu_mask_nr(cpu, mask)        for_each_cpu_mask(cpu, mask)
-
-#else /* NR_CPUS > 64 */
-
-int __next_cpu_nr(int n, const cpumask_t *srcp);
-#define next_cpu_nr(n, src)    __next_cpu_nr((n), &(src))
-#define cpus_weight_nr(cpumask)        __cpus_weight(&(cpumask), nr_cpu_ids)
-#define for_each_cpu_mask_nr(cpu, mask)                        \
-       for ((cpu) = -1;                                \
-               (cpu) = next_cpu_nr((cpu), (mask)),     \
-               (cpu) < nr_cpu_ids; )
-
-#endif /* NR_CPUS > 64 */
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* Assuming NR_CPUS is huge, a runtime limit is more efficient.  Also,
+ * not all bits may be allocated. */
+#define nr_cpumask_bits        nr_cpu_ids
+#else
+#define nr_cpumask_bits        NR_CPUS
+#endif
  
  /*
   * The following particular system cpumasks and operations manage
@@ -485,59 +78,24 @@ int __next_cpu_nr(int n, const cpumask_t *srcp);
  extern const struct cpumask *const cpu_possible_mask;
  extern const struct cpumask *const cpu_online_mask;
  extern const struct cpumask *const cpu_present_mask;
-extern const struct cpumask *const cpu_active_mask;
-
-/* These strip const, as traditionally they weren't const. */
-#define cpu_possible_map       (*(cpumask_t *)cpu_possible_mask)
-#define cpu_online_map         (*(cpumask_t *)cpu_online_mask)
-#define cpu_present_map                (*(cpumask_t *)cpu_present_mask)
-#define cpu_active_map         (*(cpumask_t *)cpu_active_mask)
-
-#if NR_CPUS > 1
-#define num_online_cpus()      cpumask_weight(cpu_online_mask)
-#define num_possible_cpus()    cpumask_weight(cpu_possible_mask)
-#define num_present_cpus()     cpumask_weight(cpu_present_mask)
-#define cpu_online(cpu)                cpumask_test_cpu((cpu), cpu_online_mask)
-#define cpu_possible(cpu)      cpumask_test_cpu((cpu), cpu_possible_mask)
-#define cpu_present(cpu)       cpumask_test_cpu((cpu), cpu_present_mask)
-#define cpu_active(cpu)                cpumask_test_cpu((cpu), cpu_active_mask)
-#else
-#define num_online_cpus()      1
-#define num_possible_cpus()    1
-#define num_present_cpus()     1
-#define cpu_online(cpu)                ((cpu) == 0)
-#define cpu_possible(cpu)      ((cpu) == 0)
-#define cpu_present(cpu)       ((cpu) == 0)
-#define cpu_active(cpu)                ((cpu) == 0)
-#endif
-
-#define cpu_is_offline(cpu)    unlikely(!cpu_online(cpu))
-
-/* These are the new versions of the cpumask operators: passed by pointer.
- * The older versions will be implemented in terms of these, then deleted. */
-#define cpumask_bits(maskp) ((maskp)->bits)
-
-#if NR_CPUS <= BITS_PER_LONG
-#define CPU_BITS_ALL                                           \
-{                                                              \
-       [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
-}
-
-#else /* NR_CPUS > BITS_PER_LONG */
-
-#define CPU_BITS_ALL                                           \
-{                                                              \
-       [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,                \
-       [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD         \
-}
-#endif /* NR_CPUS > BITS_PER_LONG */
+extern const struct cpumask *const cpu_active_mask;
  
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* Assuming NR_CPUS is huge, a runtime limit is more efficient.  Also,
- * not all bits may be allocated. */
-#define nr_cpumask_bits        nr_cpu_ids
+#if NR_CPUS > 1
+#define num_online_cpus()      cpumask_weight(cpu_online_mask)
+#define num_possible_cpus()    cpumask_weight(cpu_possible_mask)
+#define num_present_cpus()     cpumask_weight(cpu_present_mask)
+#define cpu_online(cpu)                cpumask_test_cpu((cpu), cpu_online_mask)
+#define cpu_possible(cpu)      cpumask_test_cpu((cpu), cpu_possible_mask)
+#define cpu_present(cpu)       cpumask_test_cpu((cpu), cpu_present_mask)
+#define cpu_active(cpu)                cpumask_test_cpu((cpu), cpu_active_mask)
  #else
-#define nr_cpumask_bits        NR_CPUS
+#define num_online_cpus()      1
+#define num_possible_cpus()    1
+#define num_present_cpus()     1
+#define cpu_online(cpu)                ((cpu) == 0)
+#define cpu_possible(cpu)      ((cpu) == 0)
+#define cpu_present(cpu)       ((cpu) == 0)
+#define cpu_active(cpu)                ((cpu) == 0)
  #endif
  
  /* verify cpu argument to cpumask_* operators */
@@ -1100,4 +658,241 @@ void set_cpu_active(unsigned int cpu, bool active);
  void init_cpu_present(const struct cpumask *src);
  void init_cpu_possible(const struct cpumask *src);
  void init_cpu_online(const struct cpumask *src);
+
+/**
+ * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
+ * @bitmap: the bitmap
+ *
+ * There are a few places where cpumask_var_t isn't appropriate and
+ * static cpumasks must be used (eg. very early boot), yet we don't
+ * expose the definition of 'struct cpumask'.
+ *
+ * This does the conversion, and can be used as a constant initializer.
+ */
+#define to_cpumask(bitmap)                                             \
+       ((struct cpumask *)(1 ? (bitmap)                                \
+                           : (void *)sizeof(__check_is_bitmap(bitmap))))
+
+static inline int __check_is_bitmap(const unsigned long *bitmap)
+{
+       return 1;
+}
+
+/*
+ * Special-case data structure for "single bit set only" constant CPU masks.
+ *
+ * We pre-generate all the 64 (or 32) possible bit positions, with enough
+ * padding to the left and the right, and return the constant pointer
+ * appropriately offset.
+ */
+extern const unsigned long
+       cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
+
+static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
+{
+       const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
+       p -= cpu / BITS_PER_LONG;
+       return to_cpumask(p);
+}
+
+#define cpu_is_offline(cpu)    unlikely(!cpu_online(cpu))
+
+#if NR_CPUS <= BITS_PER_LONG
+#define CPU_BITS_ALL                                           \
+{                                                              \
+       [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+}
+
+#else /* NR_CPUS > BITS_PER_LONG */
+
+#define CPU_BITS_ALL                                           \
+{                                                              \
+       [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,                \
+       [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD         \
+}
+#endif /* NR_CPUS > BITS_PER_LONG */
+
+/*
+ *
+ * From here down, all obsolete.  Use cpumask_ variants!
+ *
+ */
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
+/* These strip const, as traditionally they weren't const. */
+#define cpu_possible_map       (*(cpumask_t *)cpu_possible_mask)
+#define cpu_online_map         (*(cpumask_t *)cpu_online_mask)
+#define cpu_present_map                (*(cpumask_t *)cpu_present_mask)
+#define cpu_active_map         (*(cpumask_t *)cpu_active_mask)
+
+#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu))
+
+#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
+
+#if NR_CPUS <= BITS_PER_LONG
+
+#define CPU_MASK_ALL                                                   \
+(cpumask_t) { {                                                                \
+       [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD                 \
+} }
+
+#else
+
+#define CPU_MASK_ALL                                                   \
+(cpumask_t) { {                                                                \
+       [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,                        \
+       [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD                 \
+} }
+
+#endif
+
+#define CPU_MASK_NONE                                                  \
+(cpumask_t) { {                                                                \
+       [0 ... BITS_TO_LONGS(NR_CPUS)-1] =  0UL                         \
+} }
+
+#define CPU_MASK_CPU0                                                  \
+(cpumask_t) { {                                                                \
+       [0] =  1UL                                                      \
+} }
+
+#if NR_CPUS == 1
+#define first_cpu(src)         ({ (void)(src); 0; })
+#define next_cpu(n, src)       ({ (void)(src); 1; })
+#define any_online_cpu(mask)   0
+#define for_each_cpu_mask(cpu, mask)   \
+       for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#else /* NR_CPUS > 1 */
+int __first_cpu(const cpumask_t *srcp);
+int __next_cpu(int n, const cpumask_t *srcp);
+int __any_online_cpu(const cpumask_t *mask);
+
+#define first_cpu(src)         __first_cpu(&(src))
+#define next_cpu(n, src)       __next_cpu((n), &(src))
+#define any_online_cpu(mask) __any_online_cpu(&(mask))
+#define for_each_cpu_mask(cpu, mask)                   \
+       for ((cpu) = -1;                                \
+               (cpu) = next_cpu((cpu), (mask)),        \
+               (cpu) < NR_CPUS; )
+#endif /* SMP */
+
+#if NR_CPUS <= 64
+
+#define for_each_cpu_mask_nr(cpu, mask)        for_each_cpu_mask(cpu, mask)
+
+#else /* NR_CPUS > 64 */
+
+int __next_cpu_nr(int n, const cpumask_t *srcp);
+#define for_each_cpu_mask_nr(cpu, mask)                        \
+       for ((cpu) = -1;                                \
+               (cpu) = __next_cpu_nr((cpu), &(mask)),  \
+               (cpu) < nr_cpu_ids; )
+
+#endif /* NR_CPUS > 64 */
+
+#define cpus_addr(src) ((src).bits)
+
+#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
+static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
+{
+       set_bit(cpu, dstp->bits);
+}
+
+#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
+static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
+{
+       clear_bit(cpu, dstp->bits);
+}
+
+#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
+static inline void __cpus_setall(cpumask_t *dstp, int nbits)
+{
+       bitmap_fill(dstp->bits, nbits);
+}
+
+#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
+static inline void __cpus_clear(cpumask_t *dstp, int nbits)
+{
+       bitmap_zero(dstp->bits, nbits);
+}
+
+/* No static inline type checking - see Subtlety (1) above. */
+#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
+
+#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
+static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
+{
+       return test_and_set_bit(cpu, addr->bits);
+}
+
+#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
+static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_andnot(dst, src1, src2) \
+                               __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
+static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_equal(const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       return bitmap_equal(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_intersects(const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       return bitmap_intersects(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_subset(const cpumask_t *src1p,
+                                       const cpumask_t *src2p, int nbits)
+{
+       return bitmap_subset(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
+static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
+{
+       return bitmap_empty(srcp->bits, nbits);
+}
+
+#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
+static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
+{
+       return bitmap_weight(srcp->bits, nbits);
+}
+
+#define cpus_shift_left(dst, src, n) \
+                       __cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_left(cpumask_t *dstp,
+                                       const cpumask_t *srcp, int n, int nbits)
+{
+       bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
+}
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+
  #endif /* __LINUX_CPUMASK_H */
diff --git a/include/linux/cred.h b/include/linux/cred.h

index fb371601a3b416d4bfd7aeed4932fdec39e0ac0c..4e3387a89cb92a8549e2c2479c37ac7fd2443956 100644 (file)
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -176,23 +176,7 @@ extern void __invalid_creds(const struct cred *, const char *, unsigned);
  extern void __validate_process_creds(struct task_struct *,
                                      const char *, unsigned);
  
-static inline bool creds_are_invalid(const struct cred *cred)
-{
-       if (cred->magic != CRED_MAGIC)
-               return true;
-       if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
-               return true;
-#ifdef CONFIG_SECURITY_SELINUX
-       if (selinux_is_enabled()) {
-               if ((unsigned long) cred->security < PAGE_SIZE)
-                       return true;
-               if ((*(u32 *)cred->security & 0xffffff00) ==
-                   (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
-                       return true;
-       }
-#endif
-       return false;
-}
+extern bool creds_are_invalid(const struct cred *cred);
  
  static inline void __validate_creds(const struct cred *cred,
                                     const char *file, unsigned line)
diff --git a/include/linux/dca.h b/include/linux/dca.h

index 9c20c7e87d0aa830ac3009a676328e691f926903..d27a7a05718d3d0b299228f7c217d7924f242253 100644 (file)
--- a/include/linux/dca.h
+++ b/include/linux/dca.h
@@ -20,6 +20,9 @@
   */
  #ifndef DCA_H
  #define DCA_H
+
+#include <linux/pci.h>
+
  /* DCA Provider API */
  
  /* DCA Notifier Interface */
@@ -36,6 +39,12 @@ struct dca_provider {
         int                      id;
  };
  
+struct dca_domain {
+       struct list_head        node;
+       struct list_head        dca_providers;
+       struct pci_bus          *pci_rc;
+};
+
  struct dca_ops {
         int     (*add_requester)    (struct dca_provider *, struct device *);
         int     (*remove_requester) (struct dca_provider *, struct device *);
@@ -47,7 +56,7 @@ struct dca_ops {
  struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
  void free_dca_provider(struct dca_provider *dca);
  int register_dca_provider(struct dca_provider *dca, struct device *dev);
-void unregister_dca_provider(struct dca_provider *dca);
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev);
  
  static inline void *dca_priv(struct dca_provider *dca)
  {
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h

index eb5c2ba2f81ab25e7b6e1a6d0a2fe93cc61b1894..fc1b930f246cddd7c196ff698d38a2419e842058 100644 (file)
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -9,7 +9,7 @@
   *     2 as published by the Free Software Foundation.
   *
   *  debugfs is for people to use instead of /proc or /sys.
- *  See Documentation/DocBook/kernel-api for more details.
+ *  See Documentation/DocBook/filesystems for more details.
   */
  
  #ifndef _DEBUGFS_H_
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h

index ffefba81c818ccc7424b54fade9f8af2a1eea10d..2b9f2ac7ed60f0e8c61e79f2f7791545f877e1ef 100644 (file)
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -48,19 +48,20 @@ enum dma_status {
  
  /**
   * enum dma_transaction_type - DMA transaction types/indexes
+ *
+ * Note: The DMA_ASYNC_TX capability is not to be set by drivers.  It is
+ * automatically set as dma devices are registered.
   */
  enum dma_transaction_type {
         DMA_MEMCPY,
         DMA_XOR,
-       DMA_PQ_XOR,
-       DMA_DUAL_XOR,
-       DMA_PQ_UPDATE,
-       DMA_ZERO_SUM,
-       DMA_PQ_ZERO_SUM,
+       DMA_PQ,
+       DMA_XOR_VAL,
+       DMA_PQ_VAL,
         DMA_MEMSET,
-       DMA_MEMCPY_CRC32C,
         DMA_INTERRUPT,
         DMA_PRIVATE,
+       DMA_ASYNC_TX,
         DMA_SLAVE,
  };
  
@@ -70,18 +71,25 @@ enum dma_transaction_type {
  
  /**
   * enum dma_ctrl_flags - DMA flags to augment operation preparation,
- *     control completion, and communicate status.
+ *  control completion, and communicate status.
   * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
- *     this transaction
+ *  this transaction
   * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
- *     acknowledges receipt, i.e. has has a chance to establish any
- *     dependency chains
+ *  acknowledges receipt, i.e. has has a chance to establish any dependency
+ *  chains
   * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
   * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
   * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
   *     (if not set, do the source dma-unmapping as page)
   * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
   *     (if not set, do the destination dma-unmapping as page)
+ * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
+ * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
+ * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
+ *  sources that were the result of a previous operation, in the case of a PQ
+ *  operation it continues the calculation with new sources
+ * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
+ *  on the result of this operation
   */
  enum dma_ctrl_flags {
         DMA_PREP_INTERRUPT = (1 << 0),
@@ -90,8 +98,31 @@ enum dma_ctrl_flags {
         DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
         DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
         DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
+       DMA_PREP_PQ_DISABLE_P = (1 << 6),
+       DMA_PREP_PQ_DISABLE_Q = (1 << 7),
+       DMA_PREP_CONTINUE = (1 << 8),
+       DMA_PREP_FENCE = (1 << 9),
  };
  
+/**
+ * enum sum_check_bits - bit position of pq_check_flags
+ */
+enum sum_check_bits {
+       SUM_CHECK_P = 0,
+       SUM_CHECK_Q = 1,
+};
+
+/**
+ * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
+ * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
+ * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
+ */
+enum sum_check_flags {
+       SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
+       SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
+};
+
+
  /**
   * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
   * See linux/cpumask.h
@@ -180,8 +211,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param);
   * @flags: flags to augment operation preparation, control completion, and
   *     communicate status
   * @phys: physical address of the descriptor
- * @tx_list: driver common field for operations that require multiple
- *     descriptors
   * @chan: target channel for this operation
   * @tx_submit: set the prepared descriptor(s) to be executed by the engine
   * @callback: routine to call after this operation is complete
@@ -195,7 +224,6 @@ struct dma_async_tx_descriptor {
         dma_cookie_t cookie;
         enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */
         dma_addr_t phys;
-       struct list_head tx_list;
         struct dma_chan *chan;
         dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
         dma_async_tx_callback callback;
@@ -213,6 +241,11 @@ struct dma_async_tx_descriptor {
   * @global_node: list_head for global dma_device_list
   * @cap_mask: one or more dma_capability flags
   * @max_xor: maximum number of xor sources, 0 if no capability
+ * @max_pq: maximum number of PQ sources and PQ-continue capability
+ * @copy_align: alignment shift for memcpy operations
+ * @xor_align: alignment shift for xor operations
+ * @pq_align: alignment shift for pq operations
+ * @fill_align: alignment shift for memset operations
   * @dev_id: unique device ID
   * @dev: struct device reference for dma mapping api
   * @device_alloc_chan_resources: allocate resources and return the
@@ -220,7 +253,9 @@ struct dma_async_tx_descriptor {
   * @device_free_chan_resources: release DMA channel's resources
   * @device_prep_dma_memcpy: prepares a memcpy operation
   * @device_prep_dma_xor: prepares a xor operation
- * @device_prep_dma_zero_sum: prepares a zero_sum operation
+ * @device_prep_dma_xor_val: prepares a xor validation operation
+ * @device_prep_dma_pq: prepares a pq operation
+ * @device_prep_dma_pq_val: prepares a pqzero_sum operation
   * @device_prep_dma_memset: prepares a memset operation
   * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
   * @device_prep_slave_sg: prepares a slave dma operation
@@ -235,7 +270,13 @@ struct dma_device {
         struct list_head channels;
         struct list_head global_node;
         dma_cap_mask_t  cap_mask;
-       int max_xor;
+       unsigned short max_xor;
+       unsigned short max_pq;
+       u8 copy_align;
+       u8 xor_align;
+       u8 pq_align;
+       u8 fill_align;
+       #define DMA_HAS_PQ_CONTINUE (1 << 15)
  
         int dev_id;
         struct device *dev;
@@ -249,9 +290,17 @@ struct dma_device {
         struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
                 struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
                 unsigned int src_cnt, size_t len, unsigned long flags);
-       struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
+       struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
                 struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
-               size_t len, u32 *result, unsigned long flags);
+               size_t len, enum sum_check_flags *result, unsigned long flags);
+       struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
+               struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+               unsigned int src_cnt, const unsigned char *scf,
+               size_t len, unsigned long flags);
+       struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
+               struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+               unsigned int src_cnt, const unsigned char *scf, size_t len,
+               enum sum_check_flags *pqres, unsigned long flags);
         struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
                 struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
                 unsigned long flags);
@@ -270,6 +319,96 @@ struct dma_device {
         void (*device_issue_pending)(struct dma_chan *chan);
  };
  
+static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
+{
+       size_t mask;
+
+       if (!align)
+               return true;
+       mask = (1 << align) - 1;
+       if (mask & (off1 | off2 | len))
+               return false;
+       return true;
+}
+
+static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1,
+                                      size_t off2, size_t len)
+{
+       return dmaengine_check_align(dev->copy_align, off1, off2, len);
+}
+
+static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1,
+                                     size_t off2, size_t len)
+{
+       return dmaengine_check_align(dev->xor_align, off1, off2, len);
+}
+
+static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
+                                    size_t off2, size_t len)
+{
+       return dmaengine_check_align(dev->pq_align, off1, off2, len);
+}
+
+static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
+                                      size_t off2, size_t len)
+{
+       return dmaengine_check_align(dev->fill_align, off1, off2, len);
+}
+
+static inline void
+dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
+{
+       dma->max_pq = maxpq;
+       if (has_pq_continue)
+               dma->max_pq |= DMA_HAS_PQ_CONTINUE;
+}
+
+static inline bool dmaf_continue(enum dma_ctrl_flags flags)
+{
+       return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
+}
+
+static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
+{
+       enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
+
+       return (flags & mask) == mask;
+}
+
+static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
+{
+       return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
+}
+
+static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
+{
+       return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
+}
+
+/* dma_maxpq - reduce maxpq in the face of continued operations
+ * @dma - dma device with PQ capability
+ * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
+ *
+ * When an engine does not support native continuation we need 3 extra
+ * source slots to reuse P and Q with the following coefficients:
+ * 1/ {00} * P : remove P from Q', but use it as a source for P'
+ * 2/ {01} * Q : use Q to continue Q' calculation
+ * 3/ {00} * Q : subtract Q from P' to cancel (2)
+ *
+ * In the case where P is disabled we only need 1 extra source:
+ * 1/ {01} * Q : use Q to continue Q' calculation
+ */
+static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
+{
+       if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
+               return dma_dev_to_maxpq(dma);
+       else if (dmaf_p_disabled_continue(flags))
+               return dma_dev_to_maxpq(dma) - 1;
+       else if (dmaf_continue(flags))
+               return dma_dev_to_maxpq(dma) - 3;
+       BUG();
+}
+
  /* --- public DMA engine API --- */
  
  #ifdef CONFIG_DMA_ENGINE
@@ -299,7 +438,11 @@ static inline void net_dmaengine_put(void)
  #ifdef CONFIG_ASYNC_TX_DMA
  #define async_dmaengine_get()  dmaengine_get()
  #define async_dmaengine_put()  dmaengine_put()
+#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
+#else
  #define async_dma_find_channel(type) dma_find_channel(type)
+#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
  #else
  static inline void async_dmaengine_get(void)
  {
@@ -312,7 +455,7 @@ async_dma_find_channel(enum dma_transaction_type type)
  {
         return NULL;
  }
-#endif
+#endif /* CONFIG_ASYNC_TX_DMA */
  
  dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
         void *dest, void *src, size_t len);
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 51803528b095fa70380dc6ef201f11704e229f75..2adaa2529f184fda637a6a2aeea55bda41693104 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -595,6 +595,7 @@ struct address_space_operations {
         int (*launder_page) (struct page *);
         int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
                                         unsigned long);
+       int (*error_remove_page)(struct address_space *, struct page *);
  };
  
  /*
@@ -640,7 +641,6 @@ struct block_device {
         struct super_block *    bd_super;
         int                     bd_openers;
         struct mutex            bd_mutex;       /* open/close mutex */
-       struct semaphore        bd_mount_sem;
         struct list_head        bd_inodes;
         void *                  bd_holder;
         int                     bd_holders;
@@ -1315,7 +1315,7 @@ struct super_block {
         unsigned long           s_blocksize;
         unsigned char           s_blocksize_bits;
         unsigned char           s_dirt;
-       unsigned long long      s_maxbytes;     /* Max file size */
+       loff_t                  s_maxbytes;     /* Max file size */
         struct file_system_type *s_type;
         const struct super_operations   *s_op;
         const struct dquot_operations   *dq_op;
@@ -2156,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t);
  extern int inode_needs_sync(struct inode *inode);
  extern void generic_delete_inode(struct inode *inode);
  extern void generic_drop_inode(struct inode *inode);
+extern int generic_detach_inode(struct inode *inode);
  
  extern struct inode *ilookup5_nowait(struct super_block *sb,
                 unsigned long hashval, int (*test)(struct inode *, void *),
@@ -2334,6 +2335,7 @@ extern void get_filesystem(struct file_system_type *fs);
  extern void put_filesystem(struct file_system_type *fs);
  extern struct file_system_type *get_fs_type(const char *name);
  extern struct super_block *get_super(struct block_device *);
+extern struct super_block *get_active_super(struct block_device *bdev);
  extern struct super_block *user_get_super(dev_t);
  extern void drop_super(struct super_block *sb);
  
@@ -2381,7 +2383,8 @@ extern int buffer_migrate_page(struct address_space *,
  #define buffer_migrate_page NULL
  #endif
  
-extern int inode_change_ok(struct inode *, struct iattr *);
+extern int inode_change_ok(const struct inode *, struct iattr *);
+extern int inode_newsize_ok(const struct inode *, loff_t offset);
  extern int __must_check inode_setattr(struct inode *, struct iattr *);
  
  extern void file_update_time(struct file *file);
@@ -2467,7 +2470,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
                           size_t len, loff_t *ppos);
  
  struct ctl_table;
-int proc_nr_files(struct ctl_table *table, int write, struct file *filp,
+int proc_nr_files(struct ctl_table *table, int write,
                   void __user *buffer, size_t *lenp, loff_t *ppos);
  
  int __init get_filesystem_list(char *buf);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h

index 3c0924a18dafd0bac336e1c45b79a080ea834e55..cd3d2abaf30a806f508d0d98950f97739ba26562 100644 (file)
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -19,7 +19,7 @@
  extern int ftrace_enabled;
  extern int
  ftrace_enable_sysctl(struct ctl_table *table, int write,
-                    struct file *filp, void __user *buffer, size_t *lenp,
+                    void __user *buffer, size_t *lenp,
                      loff_t *ppos);
  
  typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
@@ -94,7 +94,7 @@ static inline void ftrace_start(void) { }
  extern int stack_tracer_enabled;
  int
  stack_trace_sysctl(struct ctl_table *table, int write,
-                  struct file *file, void __user *buffer, size_t *lenp,
+                  void __user *buffer, size_t *lenp,
                    loff_t *ppos);
  #endif
  
diff --git a/include/linux/futex.h b/include/linux/futex.h

index 34956c8fdebf8df63ab44c1f84b136406b3d0f34..8ec17997d94fa0aafeaab5cc95824194c9e913f3 100644 (file)
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -4,11 +4,6 @@
  #include <linux/compiler.h>
  #include <linux/types.h>
  
-struct inode;
-struct mm_struct;
-struct task_struct;
-union ktime;
-
  /* Second argument to futex syscall */
  
  
@@ -129,6 +124,11 @@ struct robust_list_head {
  #define FUTEX_BITSET_MATCH_ANY 0xffffffff
  
  #ifdef __KERNEL__
+struct inode;
+struct mm_struct;
+struct task_struct;
+union ktime;
+
  long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
               u32 __user *uaddr2, u32 val2, u32 val3);
  
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index 176e7ee73eff5f8c9a97a77b69def8e50cadc5d3..11ab19ac6b3d98af1b4389b62a085c929254c491 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -20,9 +20,9 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
  }
  
  void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
-int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
-int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
-int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
+int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
  int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
  int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
                         struct page **, struct vm_area_struct **,
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h

new file mode 100644 (file)

index 0000000..fc5db82
--- /dev/null
+++ b/include/linux/i2c/adp5588.h
@@ -0,0 +1,92 @@
+/*
+ * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _ADP5588_H
+#define _ADP5588_H
+
+#define DEV_ID 0x00            /* Device ID */
+#define CFG 0x01               /* Configuration Register1 */
+#define INT_STAT 0x02          /* Interrupt Status Register */
+#define KEY_LCK_EC_STAT 0x03   /* Key Lock and Event Counter Register */
+#define Key_EVENTA 0x04                /* Key Event Register A */
+#define Key_EVENTB 0x05                /* Key Event Register B */
+#define Key_EVENTC 0x06                /* Key Event Register C */
+#define Key_EVENTD 0x07                /* Key Event Register D */
+#define Key_EVENTE 0x08                /* Key Event Register E */
+#define Key_EVENTF 0x09                /* Key Event Register F */
+#define Key_EVENTG 0x0A                /* Key Event Register G */
+#define Key_EVENTH 0x0B                /* Key Event Register H */
+#define Key_EVENTI 0x0C                /* Key Event Register I */
+#define Key_EVENTJ 0x0D                /* Key Event Register J */
+#define KP_LCK_TMR 0x0E                /* Keypad Lock1 to Lock2 Timer */
+#define UNLOCK1 0x0F           /* Unlock Key1 */
+#define UNLOCK2 0x10           /* Unlock Key2 */
+#define GPIO_INT_STAT1 0x11    /* GPIO Interrupt Status */
+#define GPIO_INT_STAT2 0x12    /* GPIO Interrupt Status */
+#define GPIO_INT_STAT3 0x13    /* GPIO Interrupt Status */
+#define GPIO_DAT_STAT1 0x14    /* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_STAT2 0x15    /* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_STAT3 0x16    /* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_OUT1 0x17     /* GPIO DATA OUT */
+#define GPIO_DAT_OUT2 0x18     /* GPIO DATA OUT */
+#define GPIO_DAT_OUT3 0x19     /* GPIO DATA OUT */
+#define GPIO_INT_EN1 0x1A      /* GPIO Interrupt Enable */
+#define GPIO_INT_EN2 0x1B      /* GPIO Interrupt Enable */
+#define GPIO_INT_EN3 0x1C      /* GPIO Interrupt Enable */
+#define KP_GPIO1 0x1D          /* Keypad or GPIO Selection */
+#define KP_GPIO2 0x1E          /* Keypad or GPIO Selection */
+#define KP_GPIO3 0x1F          /* Keypad or GPIO Selection */
+#define GPI_EM1 0x20           /* GPI Event Mode 1 */
+#define GPI_EM2 0x21           /* GPI Event Mode 2 */
+#define GPI_EM3 0x22           /* GPI Event Mode 3 */
+#define GPIO_DIR1 0x23         /* GPIO Data Direction */
+#define GPIO_DIR2 0x24         /* GPIO Data Direction */
+#define GPIO_DIR3 0x25         /* GPIO Data Direction */
+#define GPIO_INT_LVL1 0x26     /* GPIO Edge/Level Detect */
+#define GPIO_INT_LVL2 0x27     /* GPIO Edge/Level Detect */
+#define GPIO_INT_LVL3 0x28     /* GPIO Edge/Level Detect */
+#define Debounce_DIS1 0x29     /* Debounce Disable */
+#define Debounce_DIS2 0x2A     /* Debounce Disable */
+#define Debounce_DIS3 0x2B     /* Debounce Disable */
+#define GPIO_PULL1 0x2C                /* GPIO Pull Disable */
+#define GPIO_PULL2 0x2D                /* GPIO Pull Disable */
+#define GPIO_PULL3 0x2E                /* GPIO Pull Disable */
+#define CMP_CFG_STAT 0x30      /* Comparator Configuration and Status Register */
+#define CMP_CONFG_SENS1 0x31   /* Sensor1 Comparator Configuration Register */
+#define CMP_CONFG_SENS2 0x32   /* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */
+#define CMP1_LVL2_TRIP 0x33    /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */
+#define CMP1_LVL2_HYS 0x34     /* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */
+#define CMP1_LVL3_TRIP 0x35    /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */
+#define CMP1_LVL3_HYS 0x36     /* Sensor 2 Comparator Configuration Register */
+#define CMP2_LVL2_TRIP 0x37    /* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */
+#define CMP2_LVL2_HYS 0x38     /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */
+#define CMP2_LVL3_TRIP 0x39    /* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */
+#define CMP2_LVL3_HYS 0x3A     /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */
+#define CMP1_ADC_DAT_R1 0x3B   /* Comparator 1 ADC data Register1 */
+#define CMP1_ADC_DAT_R2 0x3C   /* Comparator 1 ADC data Register2 */
+#define CMP2_ADC_DAT_R1 0x3D   /* Comparator 2 ADC data Register1 */
+#define CMP2_ADC_DAT_R2 0x3E   /* Comparator 2 ADC data Register2 */
+
+#define ADP5588_DEVICE_ID_MASK 0xF
+
+/* Put one of these structures in i2c_board_info platform_data */
+
+#define ADP5588_KEYMAPSIZE     80
+
+struct adp5588_kpad_platform_data {
+       int rows;                       /* Number of rows */
+       int cols;                       /* Number of columns */
+       const unsigned short *keymap;   /* Pointer to keymap */
+       unsigned short keymapsize;      /* Keymap size */
+       unsigned repeat:1;              /* Enable key repeat */
+       unsigned en_keylock:1;          /* Enable Key Lock feature */
+       unsigned short unlock_key1;     /* Unlock Key 1 */
+       unsigned short unlock_key2;     /* Unlock Key 2 */
+};
+
+#endif
diff --git a/include/linux/i2c/mcs5000_ts.h b/include/linux/i2c/mcs5000_ts.h

new file mode 100644 (file)

index 0000000..5a117b5
--- /dev/null
+++ b/include/linux/i2c/mcs5000_ts.h
@@ -0,0 +1,24 @@
+/*
+ * mcs5000_ts.h
+ *
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MCS5000_TS_H
+#define __LINUX_MCS5000_TS_H
+
+/* platform data for the MELFAS MCS-5000 touchscreen driver */
+struct mcs5000_ts_platform_data {
+       void (*cfg_pin)(void);
+       int x_size;
+       int y_size;
+};
+
+#endif /* __LINUX_MCS5000_TS_H */
diff --git a/include/linux/i8042.h b/include/linux/i8042.h

index 7907a72403eeddbfa8612ac492598bf811bcf9d4..60c3360ef6adf633416b51773d828623851afca0 100644 (file)
--- a/include/linux/i8042.h
+++ b/include/linux/i8042.h
@@ -7,6 +7,7 @@
   * the Free Software Foundation.
   */
  
+#include <linux/types.h>
  
  /*
   * Standard commands.
@@ -30,6 +31,35 @@
  #define I8042_CMD_MUX_PFX      0x0090
  #define I8042_CMD_MUX_SEND     0x1090
  
+struct serio;
+
+#if defined(CONFIG_SERIO_I8042) || defined(CONFIG_SERIO_I8042_MODULE)
+
+void i8042_lock_chip(void);
+void i8042_unlock_chip(void);
  int i8042_command(unsigned char *param, int command);
+bool i8042_check_port_owner(const struct serio *);
+
+#else
+
+void i8042_lock_chip(void)
+{
+}
+
+void i8042_unlock_chip(void)
+{
+}
+
+int i8042_command(unsigned char *param, int command)
+{
+       return -ENOSYS;
+}
+
+bool i8042_check_port_owner(const struct serio *serio)
+{
+       return false;
+}
+
+#endif
  
  #endif
diff --git a/include/linux/input.h b/include/linux/input.h

index 8b3bc3e0d1463a21a9bb8b71908d3c15dfd4dfb3..0ccfc30cd40f7696e0b9e863272f82fb8e42e0f0 100644 (file)
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1123,7 +1123,7 @@ struct input_dev {
         struct mutex mutex;
  
         unsigned int users;
-       int going_away;
+       bool going_away;
  
         struct device dev;
  
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h

index 8e9e151f811e9fdb8344ed240ba9db6bd3bba2a0..b78cf8194957644b05908807839b0ed570802310 100644 (file)
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -84,7 +84,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
   * struct irqaction - per interrupt action descriptor
   * @handler:   interrupt handler function
   * @flags:     flags (see IRQF_* above)
- * @mask:      no comment as it is useless and about to be removed
   * @name:      name of the device
   * @dev_id:    cookie to identify the device
   * @next:      pointer to the next irqaction for shared interrupts
@@ -97,7 +96,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
  struct irqaction {
         irq_handler_t handler;
         unsigned long flags;
-       cpumask_t mask;
         const char *name;
         void *dev_id;
         struct irqaction *next;
diff --git a/include/linux/libps2.h b/include/linux/libps2.h

index fcf5fbe6a50c3596be52388d74e210d6b4c38231..79603a6c356fb5dd28f2bddf27bceafbf263c7d2 100644 (file)
--- a/include/linux/libps2.h
+++ b/include/linux/libps2.h
@@ -44,6 +44,8 @@ struct ps2dev {
  void ps2_init(struct ps2dev *ps2dev, struct serio *serio);
  int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout);
  void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout);
+void ps2_begin_command(struct ps2dev *ps2dev);
+void ps2_end_command(struct ps2dev *ps2dev);
  int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
  int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
  int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data);
diff --git a/include/linux/linkage.h b/include/linux/linkage.h

index 691f59171c6c722e52faaeaed4e086d46f646eb2..5126cceb6ae97c7d401f197497f0a1311e55fe6f 100644 (file)
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -57,6 +57,7 @@
  
  #ifdef __ASSEMBLY__
  
+#ifndef LINKER_SCRIPT
  #define ALIGN __ALIGN
  #define ALIGN_STR __ALIGN_STR
  
@@ -66,6 +67,7 @@
    ALIGN; \
    name:
  #endif
+#endif /* LINKER_SCRIPT */
  
  #ifndef WEAK
  #define WEAK(name)        \
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index e46a0734ab6e68c9f4df95ce5f9771923af392ea..bf9213b2db8f2da2a443d8788d2d70eb0a003af8 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void)
  
  extern bool mem_cgroup_oom_called(struct task_struct *task);
  void mem_cgroup_update_mapped_file_stat(struct page *page, int val);
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+                                               gfp_t gfp_mask, int nid,
+                                               int zid);
  #else /* CONFIG_CGROUP_MEM_RES_CTLR */
  struct mem_cgroup;
  
@@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page,
  {
  }
  
+static inline
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+                                           gfp_t gfp_mask, int nid, int zid)
+{
+       return 0;
+}
+
  #endif /* CONFIG_CGROUP_MEM_CONT */
  
  #endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h

index b6eae5e3144b9dff9ce70e0186c4e7fbbda55475..df08551cb0ad04aa6a16e03c8ab7b40999995bdf 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -695,11 +695,12 @@ static inline int page_mapped(struct page *page)
  #define VM_FAULT_SIGBUS        0x0002
  #define VM_FAULT_MAJOR 0x0004
  #define VM_FAULT_WRITE 0x0008  /* Special case for get_user_pages */
+#define VM_FAULT_HWPOISON 0x0010       /* Hit poisoned page */
  
  #define VM_FAULT_NOPAGE        0x0100  /* ->fault installed the pte, not return page */
  #define VM_FAULT_LOCKED        0x0200  /* ->fault locked the returned page */
  
-#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS)
+#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)
  
  /*
   * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
@@ -791,8 +792,14 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
         unmap_mapping_range(mapping, holebegin, holelen, 0);
  }
  
-extern int vmtruncate(struct inode * inode, loff_t offset);
-extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
+extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
+extern int vmtruncate(struct inode *inode, loff_t offset);
+extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
+
+int truncate_inode_page(struct address_space *mapping, struct page *page);
+int generic_error_remove_page(struct address_space *mapping, struct page *page);
+
+int invalidate_inode_page(struct page *page);
  
  #ifdef CONFIG_MMU
  extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -1279,7 +1286,7 @@ int in_gate_area_no_task(unsigned long addr);
  #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
  #endif /* __HAVE_ARCH_GATE_AREA */
  
-int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
+int drop_caches_sysctl_handler(struct ctl_table *, int,
                                         void __user *, size_t *, loff_t *);
  unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
                         unsigned long lru_pages);
@@ -1308,5 +1315,12 @@ void vmemmap_populate_print_last(void);
  extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
                                  size_t size);
  extern void refund_locked_memory(struct mm_struct *mm, size_t size);
+
+extern void memory_failure(unsigned long pfn, int trapno);
+extern int __memory_failure(unsigned long pfn, int trapno, int ref);
+extern int sysctl_memory_failure_early_kill;
+extern int sysctl_memory_failure_recovery;
+extern atomic_long_t mce_bad_pages;
+
  #endif /* __KERNEL__ */
  #endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 0042090a4d70cd839a97c6b436ea91f8ddf51d40..21d6aa45206aa985a01c1a1fcfb9faaf7ff83307 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -240,6 +240,8 @@ struct mm_struct {
  
         unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
  
+       struct linux_binfmt *binfmt;
+
         cpumask_t cpu_vm_mask;
  
         /* Architecture-specific MM context */
@@ -259,11 +261,10 @@ struct mm_struct {
         unsigned long flags; /* Must use atomic bitops to access the bits */
  
         struct core_state *core_state; /* coredumping support */
-
-       /* aio bits */
+#ifdef CONFIG_AIO
         spinlock_t              ioctx_lock;
         struct hlist_head       ioctx_list;
-
+#endif
  #ifdef CONFIG_MM_OWNER
         /*
          * "owner" points to a task that is regarded as the canonical
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 652ef01be5823230e8cf89ea96e14b12acd2076a..6f7561730d88c3b8c816e34e76b140cb9612ce8c 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -755,21 +755,20 @@ static inline int is_dma(struct zone *zone)
  
  /* These two functions are used to setup the per zone pages min values */
  struct ctl_table;
-struct file;
-int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, 
+int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
                                         void __user *, size_t *, loff_t *);
  extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
+int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
                                         void __user *, size_t *, loff_t *);
-int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
+int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
                                         void __user *, size_t *, loff_t *);
  int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
-                       struct file *, void __user *, size_t *, loff_t *);
+                       void __user *, size_t *, loff_t *);
  int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
-                       struct file *, void __user *, size_t *, loff_t *);
+                       void __user *, size_t *, loff_t *);
  
  extern int numa_zonelist_order_handler(struct ctl_table *, int,
-                       struct file *, void __user *, size_t *, loff_t *);
+                       void __user *, size_t *, loff_t *);
  extern char numa_zonelist_order[];
  #define NUMA_ZONELIST_ORDER_LEN 16     /* string buffer size */
  
diff --git a/include/linux/module.h b/include/linux/module.h

index 1c755b2f937dd19e99f353203ef4c130fb53cf6c..482efc865acf272994b4de7906a8d403a1c1454a 100644 (file)
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -128,7 +128,10 @@ extern struct module __this_module;
   */
  #define MODULE_LICENSE(_license) MODULE_INFO(license, _license)
  
-/* Author, ideally of form NAME[, NAME]*[ and NAME] */
+/*
+ * Author(s), use "Name <email>" or just "Name", for multiple
+ * authors use multiple MODULE_AUTHOR() statements/lines.
+ */
  #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
    
  /* What your module does. */
@@ -308,10 +311,14 @@ struct module
  #endif
  
  #ifdef CONFIG_KALLSYMS
-       /* We keep the symbol and string tables for kallsyms. */
-       Elf_Sym *symtab;
-       unsigned int num_symtab;
-       char *strtab;
+       /*
+        * We keep the symbol and string tables for kallsyms.
+        * The core_* fields below are temporary, loader-only (they
+        * could really be discarded after module init).
+        */
+       Elf_Sym *symtab, *core_symtab;
+       unsigned int num_symtab, core_num_syms;
+       char *strtab, *core_strtab;
  
         /* Section attributes */
         struct module_sect_attrs *sect_attrs;
diff --git a/include/linux/netlink.h b/include/linux/netlink.h

index 080f6ba9e73a35063a8b0b6ae8a797b6a9b107da..ab5d3126831f106394632bdfdfc6ad1d531daa8f 100644 (file)
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -187,6 +187,7 @@ extern struct sock *netlink_kernel_create(struct net *net,
  extern void netlink_kernel_release(struct sock *sk);
  extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
  extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
+extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
  extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group);
  extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
  extern int netlink_has_listeners(struct sock *sk, unsigned int group);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h

index 13de789f0a5c1b8b47660dd37ca0c7da73dd26fd..6b202b173955541adbe03ebd8137af6749f1c214 100644 (file)
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -51,6 +51,9 @@
   * PG_buddy is set to indicate that the page is free and in the buddy system
   * (see mm/page_alloc.c).
   *
+ * PG_hwpoison indicates that a page got corrupted in hardware and contains
+ * data with incorrect ECC bits that triggered a machine check. Accessing is
+ * not safe since it may cause another machine check. Don't touch!
   */
  
  /*
@@ -101,6 +104,9 @@ enum pageflags {
  #endif
  #ifdef CONFIG_ARCH_USES_PG_UNCACHED
         PG_uncached,            /* Page has been mapped as uncached */
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+       PG_hwpoison,            /* hardware poisoned page. Don't touch */
  #endif
         __NR_PAGEFLAGS,
  
@@ -269,6 +275,15 @@ PAGEFLAG(Uncached, uncached)
  PAGEFLAG_FALSE(Uncached)
  #endif
  
+#ifdef CONFIG_MEMORY_FAILURE
+PAGEFLAG(HWPoison, hwpoison)
+TESTSETFLAG(HWPoison, hwpoison)
+#define __PG_HWPOISON (1UL << PG_hwpoison)
+#else
+PAGEFLAG_FALSE(HWPoison)
+#define __PG_HWPOISON 0
+#endif
+
  static inline int PageUptodate(struct page *page)
  {
         int ret = test_bit(PG_uptodate, &(page)->flags);
@@ -393,7 +408,7 @@ static inline void __ClearPageTail(struct page *page)
          1 << PG_private | 1 << PG_private_2 | \
          1 << PG_buddy   | 1 << PG_writeback | 1 << PG_reserved | \
          1 << PG_slab    | 1 << PG_swapcache | 1 << PG_active | \
-        1 << PG_unevictable | __PG_MLOCKED)
+        1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)
  
  /*
   * Flags checked when a page is prepped for return by the page allocator.
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h

index ada779f2417846bcee829c5e4f33a42f79a76b73..4b938d4f3ac2eff57736a6b8ced5a32f0c260d6a 100644 (file)
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -38,6 +38,7 @@ enum {
         PCG_LOCK,  /* page cgroup is locked */
         PCG_CACHE, /* charged as cache */
         PCG_USED, /* this object is in use. */
+       PCG_ACCT_LRU, /* page has been accounted for */
  };
  
  #define TESTPCGFLAG(uname, lname)                      \
@@ -52,11 +53,23 @@ static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
  static inline void ClearPageCgroup##uname(struct page_cgroup *pc)      \
         { clear_bit(PCG_##lname, &pc->flags);  }
  
+#define TESTCLEARPCGFLAG(uname, lname)                 \
+static inline int TestClearPageCgroup##uname(struct page_cgroup *pc)   \
+       { return test_and_clear_bit(PCG_##lname, &pc->flags);  }
+
  /* Cache flag is set only once (at allocation) */
  TESTPCGFLAG(Cache, CACHE)
+CLEARPCGFLAG(Cache, CACHE)
+SETPCGFLAG(Cache, CACHE)
  
  TESTPCGFLAG(Used, USED)
  CLEARPCGFLAG(Used, USED)
+SETPCGFLAG(Used, USED)
+
+SETPCGFLAG(AcctLRU, ACCT_LRU)
+CLEARPCGFLAG(AcctLRU, ACCT_LRU)
+TESTPCGFLAG(AcctLRU, ACCT_LRU)
+TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
  
  static inline int page_cgroup_nid(struct page_cgroup *pc)
  {
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h

index 7803565aa877a2f3e39d4be29d794989f971ce87..da1fda8623e089fbd8d1a67d40f317216e071a6f 100644 (file)
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2527,6 +2527,16 @@
  #define PCI_DEVICE_ID_INTEL_E7525_MCH  0x359e
  #define PCI_DEVICE_ID_INTEL_IOAT_CNB   0x360b
  #define PCI_DEVICE_ID_INTEL_FBD_CNB    0x360c
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF0  0x3710
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF1  0x3711
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF2  0x3712
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF3  0x3713
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF4  0x3714
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF5  0x3715
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF6  0x3716
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF7  0x3717
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF8  0x3718
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF9  0x3719
  #define PCI_DEVICE_ID_INTEL_ICH10_0    0x3a14
  #define PCI_DEVICE_ID_INTEL_ICH10_1    0x3a16
  #define PCI_DEVICE_ID_INTEL_ICH10_2    0x3a18
diff --git a/include/linux/phonet.h b/include/linux/phonet.h

index 1ef5a07818315b69a4b7b875b198e6c1b1137d24..e5126cff9b2a4f30e61aca41f5d922998302e621 100644 (file)
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -38,6 +38,7 @@
  #define PNPIPE_IFINDEX         2
  
  #define PNADDR_ANY             0
+#define PNADDR_BROADCAST       0xFC
  #define PNPORT_RESOURCE_ROUTING        0
  
  /* Values for PNPIPE_ENCAP option */
diff --git a/include/linux/prctl.h b/include/linux/prctl.h

index 07bff666e65b695d1062638f8d20f9770d610161..931150566ade8d720f156665b2a5400bbc499f0d 100644 (file)
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -88,4 +88,6 @@
  #define PR_TASK_PERF_EVENTS_DISABLE            31
  #define PR_TASK_PERF_EVENTS_ENABLE             32
  
+#define PR_MCE_KILL    33
+
  #endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/relay.h b/include/linux/relay.h

index 953fc055e87567fc4034af88020c114885b78b3c..14a86bc7102b47805843a84c97c8e59ce8b34e02 100644 (file)
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -140,7 +140,7 @@ struct rchan_callbacks
          * cause relay_open() to create a single global buffer rather
          * than the default set of per-cpu buffers.
          *
-        * See Documentation/filesystems/relayfs.txt for more info.
+        * See Documentation/filesystems/relay.txt for more info.
          */
         struct dentry *(*create_buf_file)(const char *filename,
                                           struct dentry *parent,
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h

index 511f42fc68166475b063b54a09f596ff74556310..731af71cddc9b1a8f3fb326f08f4d0bec048e08e 100644 (file)
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -34,6 +34,10 @@ struct res_counter {
          * the limit that usage cannot exceed
          */
         unsigned long long limit;
+       /*
+        * the limit that usage can be exceed
+        */
+       unsigned long long soft_limit;
         /*
          * the number of unsuccessful attempts to consume the resource
          */
@@ -87,6 +91,7 @@ enum {
         RES_MAX_USAGE,
         RES_LIMIT,
         RES_FAILCNT,
+       RES_SOFT_LIMIT,
  };
  
  /*
@@ -109,7 +114,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
  int __must_check res_counter_charge_locked(struct res_counter *counter,
                 unsigned long val);
  int __must_check res_counter_charge(struct res_counter *counter,
-               unsigned long val, struct res_counter **limit_fail_at);
+               unsigned long val, struct res_counter **limit_fail_at,
+               struct res_counter **soft_limit_at);
  
  /*
   * uncharge - tell that some portion of the resource is released
@@ -122,7 +128,8 @@ int __must_check res_counter_charge(struct res_counter *counter,
   */
  
  void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val);
+void res_counter_uncharge(struct res_counter *counter, unsigned long val,
+                               bool *was_soft_limit_excess);
  
  static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
  {
@@ -132,6 +139,36 @@ static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
         return false;
  }
  
+static inline bool res_counter_soft_limit_check_locked(struct res_counter *cnt)
+{
+       if (cnt->usage < cnt->soft_limit)
+               return true;
+
+       return false;
+}
+
+/**
+ * Get the difference between the usage and the soft limit
+ * @cnt: The counter
+ *
+ * Returns 0 if usage is less than or equal to soft limit
+ * The difference between usage and soft limit, otherwise.
+ */
+static inline unsigned long long
+res_counter_soft_limit_excess(struct res_counter *cnt)
+{
+       unsigned long long excess;
+       unsigned long flags;
+
+       spin_lock_irqsave(&cnt->lock, flags);
+       if (cnt->usage <= cnt->soft_limit)
+               excess = 0;
+       else
+               excess = cnt->usage - cnt->soft_limit;
+       spin_unlock_irqrestore(&cnt->lock, flags);
+       return excess;
+}
+
  /*
   * Helper function to detect if the cgroup is within it's limit or
   * not. It's currently called from cgroup_rss_prepare()
@@ -147,6 +184,17 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt)
         return ret;
  }
  
+static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt)
+{
+       bool ret;
+       unsigned long flags;
+
+       spin_lock_irqsave(&cnt->lock, flags);
+       ret = res_counter_soft_limit_check_locked(cnt);
+       spin_unlock_irqrestore(&cnt->lock, flags);
+       return ret;
+}
+
  static inline void res_counter_reset_max(struct res_counter *cnt)
  {
         unsigned long flags;
@@ -180,4 +228,16 @@ static inline int res_counter_set_limit(struct res_counter *cnt,
         return ret;
  }
  
+static inline int
+res_counter_set_soft_limit(struct res_counter *cnt,
+                               unsigned long long soft_limit)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&cnt->lock, flags);
+       cnt->soft_limit = soft_limit;
+       spin_unlock_irqrestore(&cnt->lock, flags);
+       return 0;
+}
+
  #endif
diff --git a/include/linux/rmap.h b/include/linux/rmap.h

index 477841d29fce238a2888a7c0af0c7d05bc795524..cb0ba7032609d5602a709a54f4b012413b981587 100644 (file)
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -81,7 +81,19 @@ static inline void page_dup_rmap(struct page *page)
   */
  int page_referenced(struct page *, int is_locked,
                         struct mem_cgroup *cnt, unsigned long *vm_flags);
-int try_to_unmap(struct page *, int ignore_refs);
+enum ttu_flags {
+       TTU_UNMAP = 0,                  /* unmap mode */
+       TTU_MIGRATION = 1,              /* migration mode */
+       TTU_MUNLOCK = 2,                /* munlock mode */
+       TTU_ACTION_MASK = 0xff,
+
+       TTU_IGNORE_MLOCK = (1 << 8),    /* ignore mlock */
+       TTU_IGNORE_ACCESS = (1 << 9),   /* don't age */
+       TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
+};
+#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
+
+int try_to_unmap(struct page *, enum ttu_flags flags);
  
  /*
   * Called from mm/filemap_xip.c to unmap empty zero page
@@ -108,6 +120,13 @@ int page_mkclean(struct page *);
   */
  int try_to_munlock(struct page *);
  
+/*
+ * Called by memory-failure.c to kill processes.
+ */
+struct anon_vma *page_lock_anon_vma(struct page *page);
+void page_unlock_anon_vma(struct anon_vma *anon_vma);
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
+
  #else  /* !CONFIG_MMU */
  
  #define anon_vma_init()                do {} while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index cbf2a3b46280a78f8d8cdfc408492fb1ac26fad3..75e6e60bf583bb89a7784d4476a32766d10db420 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -309,7 +309,7 @@ extern void softlockup_tick(void);
  extern void touch_softlockup_watchdog(void);
  extern void touch_all_softlockup_watchdogs(void);
  extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-                                   struct file *filp, void __user *buffer,
+                                   void __user *buffer,
                                     size_t *lenp, loff_t *ppos);
  extern unsigned int  softlockup_panic;
  extern int softlockup_thresh;
@@ -331,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count;
  extern unsigned long sysctl_hung_task_timeout_secs;
  extern unsigned long sysctl_hung_task_warnings;
  extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-                                        struct file *filp, void __user *buffer,
+                                        void __user *buffer,
                                          size_t *lenp, loff_t *ppos);
  #endif
  
@@ -1271,7 +1271,6 @@ struct task_struct {
         struct mm_struct *mm, *active_mm;
  
  /* task state */
-       struct linux_binfmt *binfmt;
         int exit_state;
         int exit_code, exit_signal;
         int pdeath_signal;  /*  The signal sent when the parent dies  */
@@ -1735,6 +1734,7 @@ extern cputime_t task_gtime(struct task_struct *p);
  #define PF_EXITPIDONE  0x00000008      /* pi exit done on shut down */
  #define PF_VCPU                0x00000010      /* I'm a virtual CPU */
  #define PF_FORKNOEXEC  0x00000040      /* forked but didn't exec */
+#define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
  #define PF_SUPERPRIV   0x00000100      /* used super-user privileges */
  #define PF_DUMPCORE    0x00000200      /* dumped core */
  #define PF_SIGNALED    0x00000400      /* killed by a signal */
@@ -1754,6 +1754,7 @@ extern cputime_t task_gtime(struct task_struct *p);
  #define PF_SPREAD_PAGE 0x01000000      /* Spread page cache over cpuset */
  #define PF_SPREAD_SLAB 0x02000000      /* Spread some slab caches over cpuset */
  #define PF_THREAD_BOUND        0x04000000      /* Thread bound to specific cpu */
+#define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
  #define PF_MEMPOLICY   0x10000000      /* Non-default NUMA mempolicy */
  #define PF_MUTEX_TESTER        0x20000000      /* Thread belongs to the rt mutex tester */
  #define PF_FREEZER_SKIP        0x40000000      /* Freezer should not count it as freezeable */
@@ -1817,10 +1818,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
         return 0;
  }
  #endif
+
+#ifndef CONFIG_CPUMASK_OFFSTACK
  static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
  {
         return set_cpus_allowed_ptr(p, &new_mask);
  }
+#endif
  
  /*
   * Architectures can set this to 1 if they have specified
@@ -1903,7 +1907,7 @@ extern unsigned int sysctl_sched_time_avg;
  extern unsigned int sysctl_timer_migration;
  
  int sched_nr_latency_handler(struct ctl_table *table, int write,
-               struct file *file, void __user *buffer, size_t *length,
+               void __user *buffer, size_t *length,
                 loff_t *ppos);
  #endif
  #ifdef CONFIG_SCHED_DEBUG
@@ -1921,7 +1925,7 @@ extern unsigned int sysctl_sched_rt_period;
  extern int sysctl_sched_rt_runtime;
  
  int sched_rt_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                 loff_t *ppos);
  
  extern unsigned int sysctl_sched_compat_yield;
@@ -2056,6 +2060,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv);
  extern int kill_pid(struct pid *pid, int sig, int priv);
  extern int kill_proc_info(int, struct siginfo *, pid_t);
  extern int do_notify_parent(struct task_struct *, int);
+extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
  extern void force_sig(int, struct task_struct *);
  extern void force_sig_specific(int, struct task_struct *);
  extern int send_sig(int, struct task_struct *, int);
@@ -2333,7 +2338,10 @@ static inline int signal_pending(struct task_struct *p)
         return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
  }
  
-extern int __fatal_signal_pending(struct task_struct *p);
+static inline int __fatal_signal_pending(struct task_struct *p)
+{
+       return unlikely(sigismember(&p->pending.signal, SIGKILL));
+}
  
  static inline int fatal_signal_pending(struct task_struct *p)
  {
diff --git a/include/linux/security.h b/include/linux/security.h

index d050b66ab9ef0415b169122590f7335714ad9b2e..239e40d0450bc02380b30c08e8854ecc3d4ba638 100644 (file)
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -133,7 +133,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
                 return PAGE_ALIGN(mmap_min_addr);
         return hint;
  }
-extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+extern int mmap_min_addr_handler(struct ctl_table *table, int write,
                                  void __user *buffer, size_t *lenp, loff_t *ppos);
  
  #ifdef CONFIG_SECURITY
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h

index 0c6a86b795968621a107c2c8c611b5ef7d387516..8366d8f12e537ac9cfb3e9cc053371bcea174025 100644 (file)
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -35,6 +35,44 @@ struct seq_operations {
  
  #define SEQ_SKIP 1
  
+/**
+ * seq_get_buf - get buffer to write arbitrary data to
+ * @m: the seq_file handle
+ * @bufp: the beginning of the buffer is stored here
+ *
+ * Return the number of bytes available in the buffer, or zero if
+ * there's no space.
+ */
+static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
+{
+       BUG_ON(m->count > m->size);
+       if (m->count < m->size)
+               *bufp = m->buf + m->count;
+       else
+               *bufp = NULL;
+
+       return m->size - m->count;
+}
+
+/**
+ * seq_commit - commit data to the buffer
+ * @m: the seq_file handle
+ * @num: the number of bytes to commit
+ *
+ * Commit @num bytes of data written to a buffer previously acquired
+ * by seq_buf_get.  To signal an error condition, or that the data
+ * didn't fit in the available space, pass a negative @num value.
+ */
+static inline void seq_commit(struct seq_file *m, int num)
+{
+       if (num < 0) {
+               m->count = m->size;
+       } else {
+               BUG_ON(m->count + num > m->size);
+               m->count += num;
+       }
+}
+
  char *mangle_path(char *s, char *p, char *esc);
  int seq_open(struct file *, const struct seq_operations *);
  ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/signal.h b/include/linux/signal.h

index c7552836bd954a0f0a0235feea5cf9acb5c1a2c8..ab9272cc270c2a4b4f5a8c0d0d479f3eceeeb32d 100644 (file)
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -233,6 +233,8 @@ static inline int valid_signal(unsigned long sig)
  }
  
  extern int next_signal(struct sigpending *pending, sigset_t *mask);
+extern int do_send_sig_info(int sig, struct siginfo *info,
+                               struct task_struct *p, bool group);
  extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
  extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
  extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
diff --git a/include/linux/smp.h b/include/linux/smp.h

index 9e3d8af09207c9ec5ecf58dbb6c0034200947158..39c64bae776d86069518e4e00017bc75e6aae53c 100644 (file)
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -73,15 +73,6 @@ int smp_call_function(void(*func)(void *info), void *info, int wait);
  void smp_call_function_many(const struct cpumask *mask,
                             void (*func)(void *info), void *info, bool wait);
  
-/* Deprecated: Use smp_call_function_many which takes a pointer to the mask. */
-static inline int
-smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
-                      int wait)
-{
-       smp_call_function_many(&mask, func, info, wait);
-       return 0;
-}
-
  void __smp_call_function_single(int cpuid, struct call_single_data *data,
                                 int wait);
  
@@ -144,8 +135,6 @@ static inline int up_smp_call_function(void (*func)(void *), void *info)
  static inline void smp_send_reschedule(int cpu) { }
  #define num_booting_cpus()                     1
  #define smp_prepare_boot_cpu()                 do {} while (0)
-#define smp_call_function_mask(mask, func, info, wait) \
-                       (up_smp_call_function(func, info))
  #define smp_call_function_many(mask, func, info, wait) \
                         (up_smp_call_function(func, info))
  static inline void init_call_single_data(void)
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h

index 7da466ba4b0d0a91ae0186e88422b64223704155..f5cc0898bc53c166a07f7c9cc9420e8ad2a253f1 100644 (file)
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -11,6 +11,7 @@
  
  #include <linux/uio.h>
  #include <asm/byteorder.h>
+#include <asm/unaligned.h>
  #include <linux/scatterlist.h>
  
  /*
@@ -117,14 +118,14 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le
  static inline __be32 *
  xdr_encode_hyper(__be32 *p, __u64 val)
  {
-       *(__be64 *)p = cpu_to_be64(val);
+       put_unaligned_be64(val, p);
         return p + 2;
  }
  
  static inline __be32 *
  xdr_decode_hyper(__be32 *p, __u64 *valp)
  {
-       *valp = be64_to_cpup((__be64 *)p);
+       *valp = get_unaligned_be64(p);
         return p + 2;
  }
  
diff --git a/include/linux/swap.h b/include/linux/swap.h

index 6c990e658f4ed33dc2244b1b8ef5d1649cc22a31..4ec90019c1a4c3997c8da80dd3d36f3b31d09b11 100644 (file)
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -34,15 +34,37 @@ static inline int current_is_kswapd(void)
   * the type/offset into the pte as 5/27 as well.
   */
  #define MAX_SWAPFILES_SHIFT    5
-#ifndef CONFIG_MIGRATION
-#define MAX_SWAPFILES          (1 << MAX_SWAPFILES_SHIFT)
+
+/*
+ * Use some of the swap files numbers for other purposes. This
+ * is a convenient way to hook into the VM to trigger special
+ * actions on faults.
+ */
+
+/*
+ * NUMA node memory migration support
+ */
+#ifdef CONFIG_MIGRATION
+#define SWP_MIGRATION_NUM 2
+#define SWP_MIGRATION_READ     (MAX_SWAPFILES + SWP_HWPOISON_NUM)
+#define SWP_MIGRATION_WRITE    (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1)
  #else
-/* Use last two entries for page migration swap entries */
-#define MAX_SWAPFILES          ((1 << MAX_SWAPFILES_SHIFT)-2)
-#define SWP_MIGRATION_READ     MAX_SWAPFILES
-#define SWP_MIGRATION_WRITE    (MAX_SWAPFILES + 1)
+#define SWP_MIGRATION_NUM 0
  #endif
  
+/*
+ * Handling of hardware poisoned pages with memory corruption.
+ */
+#ifdef CONFIG_MEMORY_FAILURE
+#define SWP_HWPOISON_NUM 1
+#define SWP_HWPOISON           MAX_SWAPFILES
+#else
+#define SWP_HWPOISON_NUM 0
+#endif
+
+#define MAX_SWAPFILES \
+       ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+
  /*
   * Magic header for a swap area. The first part of the union is
   * what the swap magic looks like for the old (limited to 128MB)
@@ -217,6 +239,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
  extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
                                                   gfp_t gfp_mask, bool noswap,
                                                   unsigned int swappiness);
+extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+                                               gfp_t gfp_mask, bool noswap,
+                                               unsigned int swappiness,
+                                               struct zone *zone,
+                                               int nid);
  extern int __isolate_lru_page(struct page *page, int mode, int file);
  extern unsigned long shrink_all_memory(unsigned long nr_pages);
  extern int vm_swappiness;
@@ -240,7 +267,7 @@ extern int page_evictable(struct page *page, struct vm_area_struct *vma);
  extern void scan_mapping_unevictable_pages(struct address_space *);
  
  extern unsigned long scan_unevictable_pages;
-extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
+extern int scan_unevictable_handler(struct ctl_table *, int,
                                         void __user *, size_t *, loff_t *);
  extern int scan_unevictable_register_node(struct node *node);
  extern void scan_unevictable_unregister_node(struct node *node);
diff --git a/include/linux/swapops.h b/include/linux/swapops.h

index 6ec39ab27b4b48aaf31b02a19bf5a164217abcce..cd42e30b7c6eb5e1b04bd69ca393f1c7878b3351 100644 (file)
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry)
  
  #endif
  
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+       BUG_ON(!PageLocked(page));
+       return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+       return swp_type(entry) == SWP_HWPOISON;
+}
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+       return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+       return 0;
+}
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
+static inline int non_swap_entry(swp_entry_t entry)
+{
+       return swp_type(entry) >= MAX_SWAPFILES;
+}
+#else
+static inline int non_swap_entry(swp_entry_t entry)
+{
+       return 0;
+}
+#endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h

index e76d3b22a46645b503283138b317ced96a0fbbef..1e4743ee6831039d8b6d8e5773d7f5eebafcd33d 100644 (file)
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -29,7 +29,6 @@
  #include <linux/types.h>
  #include <linux/compiler.h>
  
-struct file;
  struct completion;
  
  #define CTL_MAXNAME 10         /* how many path components do we allow in a
@@ -977,25 +976,25 @@ typedef int ctl_handler (struct ctl_table *table,
                          void __user *oldval, size_t __user *oldlenp,
                          void __user *newval, size_t newlen);
  
-typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp,
+typedef int proc_handler (struct ctl_table *ctl, int write,
                           void __user *buffer, size_t *lenp, loff_t *ppos);
  
-extern int proc_dostring(struct ctl_table *, int, struct file *,
+extern int proc_dostring(struct ctl_table *, int,
                          void __user *, size_t *, loff_t *);
-extern int proc_dointvec(struct ctl_table *, int, struct file *,
+extern int proc_dointvec(struct ctl_table *, int,
                          void __user *, size_t *, loff_t *);
-extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_minmax(struct ctl_table *, int,
                                 void __user *, size_t *, loff_t *);
-extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_jiffies(struct ctl_table *, int,
                                  void __user *, size_t *, loff_t *);
-extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
                                         void __user *, size_t *, loff_t *);
-extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_ms_jiffies(struct ctl_table *, int,
                                     void __user *, size_t *, loff_t *);
-extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *,
+extern int proc_doulongvec_minmax(struct ctl_table *, int,
                                   void __user *, size_t *, loff_t *);
  extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
-                                     struct file *, void __user *, size_t *, loff_t *);
+                                     void __user *, size_t *, loff_t *);
  
  extern int do_sysctl (int __user *name, int nlen,
                       void __user *oldval, size_t __user *oldlenp,
diff --git a/include/linux/time.h b/include/linux/time.h

index 56787c0933456c560bc98e860da4a989e8437f18..fe04e5ef6a592ebddf885b2f313323ec5226e452 100644 (file)
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -155,6 +155,34 @@ extern void timekeeping_leap_insert(int leapsecond);
  struct tms;
  extern void do_sys_times(struct tms *);
  
+/*
+ * Similar to the struct tm in userspace <time.h>, but it needs to be here so
+ * that the kernel source is self contained.
+ */
+struct tm {
+       /*
+        * the number of seconds after the minute, normally in the range
+        * 0 to 59, but can be up to 60 to allow for leap seconds
+        */
+       int tm_sec;
+       /* the number of minutes after the hour, in the range 0 to 59*/
+       int tm_min;
+       /* the number of hours past midnight, in the range 0 to 23 */
+       int tm_hour;
+       /* the day of the month, in the range 1 to 31 */
+       int tm_mday;
+       /* the number of months since January, in the range 0 to 11 */
+       int tm_mon;
+       /* the number of years since 1900 */
+       long tm_year;
+       /* the number of days since Sunday, in the range 0 to 6 */
+       int tm_wday;
+       /* the number of days since January 1, in the range 0 to 365 */
+       int tm_yday;
+};
+
+void time_to_tm(time_t totalsecs, int offset, struct tm *result);
+
  /**
   * timespec_to_ns - Convert timespec to nanoseconds
   * @ts:                pointer to the timespec variable to be converted
diff --git a/include/linux/topology.h b/include/linux/topology.h

index 809b26c07090b11ea8966b4efd756e03aa7ef4cf..fc0bf3edeb6705814af2b1ba8428fc0a39e78e0b 100644 (file)
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -211,12 +211,6 @@ int arch_update_cpu_topology(void);
  #ifndef topology_core_id
  #define topology_core_id(cpu)                  ((void)(cpu), 0)
  #endif
-#ifndef topology_thread_siblings
-#define topology_thread_siblings(cpu)          cpumask_of_cpu(cpu)
-#endif
-#ifndef topology_core_siblings
-#define topology_core_siblings(cpu)            cpumask_of_cpu(cpu)
-#endif
  #ifndef topology_thread_cpumask
  #define topology_thread_cpumask(cpu)           cpumask_of(cpu)
  #endif
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h

index 17ba82efa4830543bace0639129b7a0f6e2cd3bb..1eb44a924e5643066677f7d3edef90d6ef9cce0d 100644 (file)
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -1,7 +1,7 @@
  /*
   * Tracing hooks
   *
- * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
   *
   * This copyrighted material is made available to anyone wishing to use,
   * modify, copy, or redistribute it subject to the terms and conditions
@@ -463,22 +463,38 @@ static inline int tracehook_get_signal(struct task_struct *task,
  
  /**
   * tracehook_notify_jctl - report about job control stop/continue
- * @notify:            nonzero if this is the last thread in the group to stop
+ * @notify:            zero, %CLD_STOPPED or %CLD_CONTINUED
   * @why:               %CLD_STOPPED or %CLD_CONTINUED
   *
   * This is called when we might call do_notify_parent_cldstop().
- * It's called when about to stop for job control; we are already in
- * %TASK_STOPPED state, about to call schedule().  It's also called when
- * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made.
   *
- * Return nonzero to generate a %SIGCHLD with @why, which is
- * normal if @notify is nonzero.
+ * @notify is zero if we would not ordinarily send a %SIGCHLD,
+ * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD.
   *
- * Called with no locks held.
+ * @why is %CLD_STOPPED when about to stop for job control;
+ * we are already in %TASK_STOPPED state, about to call schedule().
+ * It might also be that we have just exited (check %PF_EXITING),
+ * but need to report that a group-wide stop is complete.
+ *
+ * @why is %CLD_CONTINUED when waking up after job control stop and
+ * ready to make a delayed @notify report.
+ *
+ * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal.
+ *
+ * Called with the siglock held.
   */
  static inline int tracehook_notify_jctl(int notify, int why)
  {
-       return notify || (current->ptrace & PT_PTRACED);
+       return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
+}
+
+/**
+ * tracehook_finish_jctl - report about return from job control stop
+ *
+ * This is called by do_signal_stop() after wakeup.
+ */
+static inline void tracehook_finish_jctl(void)
+{
  }
  
  #define DEATH_REAP                     -1
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h

index 63a3f7a8058032e1069d96e73ce5749b76a8e8da..660a9de96f81dba7ca358b8c4e952dc32df1e05f 100644 (file)
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -4,7 +4,7 @@
  /*
   * Kernel Tracepoint API.
   *
- * See Documentation/tracepoint.txt.
+ * See Documentation/trace/tracepoints.txt.
   *
   * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
   *
diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h

index 46dd12c5709e60d8b95ae0e151952d16ea02dc94..9356b24223ac3505343deaee610509a681ff3e1c 100644 (file)
--- a/include/linux/unaligned/be_byteshift.h
+++ b/include/linux/unaligned/be_byteshift.h
@@ -1,7 +1,7 @@
  #ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H
  #define _LINUX_UNALIGNED_BE_BYTESHIFT_H
  
-#include <linux/kernel.h>
+#include <linux/types.h>
  
  static inline u16 __get_unaligned_be16(const u8 *p)
  {
diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h

index 59777e951baf31fda11df5c0d977f745aa64388c..be376fb79b6454a50908f5d5b7cfad66fc083b00 100644 (file)
--- a/include/linux/unaligned/le_byteshift.h
+++ b/include/linux/unaligned/le_byteshift.h
@@ -1,7 +1,7 @@
  #ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H
  #define _LINUX_UNALIGNED_LE_BYTESHIFT_H
  
-#include <linux/kernel.h>
+#include <linux/types.h>
  
  static inline u16 __get_unaligned_le16(const u8 *p)
  {
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h

index bb69e256cd16d25b5ead7ddc64b2d581a84069a9..f8147305205980c1b0e6f6cf2b0545625e0efa79 100644 (file)
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -89,6 +89,7 @@ struct driver_info {
  #define FLAG_FRAMING_AX 0x0040         /* AX88772/178 packets */
  #define FLAG_WLAN      0x0080          /* use "wlan%d" names */
  #define FLAG_AVOID_UNLINK_URBS 0x0100  /* don't unlink urbs at usbnet_stop() */
+#define FLAG_SEND_ZLP  0x0200          /* hw requires ZLPs are sent */
  
  
         /* init device ... can sleep, or cause probe() failure */
diff --git a/include/linux/utsname.h b/include/linux/utsname.h

index 3656b300de3a740e39620172e4eb0e0997b06aea..69f39974c041b964c4a32bb0dead5942f191dae2 100644 (file)
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -36,7 +36,6 @@ struct new_utsname {
  #include <linux/kref.h>
  #include <linux/nsproxy.h>
  #include <linux/err.h>
-#include <asm/atomic.h>
  
  struct uts_namespace {
         struct kref kref;
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h

index 923f9040ea203568649e18faa245c1adf83f2dd8..2dfaa293ae8c971303092235da547b5afb438319 100644 (file)
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -1,5 +1,6 @@
  /*
- * vgaarb.c
+ * The VGA aribiter manages VGA space routing and VGA resource decode to
+ * allow multiple VGA devices to be used in a system in a safe way.
   *
   * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
   * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
diff --git a/include/linux/writeback.h b/include/linux/writeback.h

index 75cf58666ff9f4a57c71b2d9512dbacbefc41911..66ebddcff6641f95498aa3c2e7af07cbffaa3a29 100644 (file)
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -110,21 +110,20 @@ extern int laptop_mode;
  extern unsigned long determine_dirtyable_memory(void);
  
  extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                 loff_t *ppos);
  extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                 loff_t *ppos);
  extern int dirty_ratio_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                 loff_t *ppos);
  extern int dirty_bytes_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                 loff_t *ppos);
  
  struct ctl_table;
-struct file;
-int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
+int dirty_writeback_centisecs_handler(struct ctl_table *, int,
                                       void __user *, size_t *, loff_t *);
  
  void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h

index b77c1478c99fcce80dbaf2f5bd877269e96a086e..a7fb54808a236a2feaf48895823450a976fd6a19 100644 (file)
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -38,6 +38,8 @@
   * @P9_DEBUG_SLABS: memory management tracing
   * @P9_DEBUG_FCALL: verbose dump of protocol messages
   * @P9_DEBUG_FID: fid allocation/deallocation tracking
+ * @P9_DEBUG_PKT: packet marshalling/unmarshalling
+ * @P9_DEBUG_FSC: FS-cache tracing
   *
   * These flags are passed at mount time to turn on various levels of
   * verbosity and tracing which will be output to the system logs.
@@ -54,6 +56,7 @@ enum p9_debug_flags {
         P9_DEBUG_FCALL =        (1<<8),
         P9_DEBUG_FID =          (1<<9),
         P9_DEBUG_PKT =          (1<<10),
+       P9_DEBUG_FSC =          (1<<11),
  };
  
  #ifdef CONFIG_NET_9P_DEBUG
diff --git a/include/net/ip.h b/include/net/ip.h

index 72c36926c26d80363bec2bdc3094d91ecb109e9e..5b26a0bd178ecc031d357786f519bfdacde0f8eb 100644 (file)
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -399,7 +399,7 @@ extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
   * fed into the routing cache should use these handlers.
   */
  int ipv4_doint_and_flush(ctl_table *ctl, int write,
-                        struct file* filp, void __user *buffer,
+                        void __user *buffer,
                          size_t *lenp, loff_t *ppos);
  int ipv4_doint_and_flush_strategy(ctl_table *table,
                                   void __user *oldval, size_t __user *oldlenp,
diff --git a/include/net/ipip.h b/include/net/ipip.h

index 5d3036fa1511695879a18d3adc02f57feef49392..76e3ea6e2fe5248b6cb44a2bb9d68ded3b7f4d28 100644 (file)
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -12,7 +12,6 @@ struct ip_tunnel
         struct ip_tunnel        *next;
         struct net_device       *dev;
  
-       int                     recursion;      /* Depth of hard_start_xmit recursion */
         int                     err_count;      /* Number of arrived ICMP errors */
         unsigned long           err_time;       /* Time when the last ICMP error arrived */
  
diff --git a/include/net/ndisc.h b/include/net/ndisc.h

index 1459ed3e2697b495bb37226c8190b7f81d428c3c..f76f22d057216bb3e28b68a3e68fd4c002e50b30 100644 (file)
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -55,7 +55,6 @@ enum {
  #include <net/neighbour.h>
  
  struct ctl_table;
-struct file;
  struct inet6_dev;
  struct net_device;
  struct net_proto_family;
@@ -139,7 +138,6 @@ extern int                  igmp6_event_report(struct sk_buff *skb);
  #ifdef CONFIG_SYSCTL
  extern int                     ndisc_ifinfo_sysctl_change(struct ctl_table *ctl,
                                                            int write,
-                                                          struct file * filp,
                                                            void __user *buffer,
                                                            size_t *lenp,
                                                            loff_t *ppos);
diff --git a/init/Kconfig b/init/Kconfig

index 0aa6579504cc93757ea0ecd284fb7b6782029ff8..c7bac39d6c61b7da35e5ee1e9a8c9e33a2033807 100644 (file)
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1006,14 +1006,6 @@ config SLUB_DEBUG
           SLUB sysfs support. /sys/slab will not exist and there will be
           no support for cache validation etc.
  
-config STRIP_ASM_SYMS
-       bool "Strip assembler-generated symbols during link"
-       default n
-       help
-         Strip internal assembler-generated symbols during a link (symbols
-         that look like '.Lxxx') so they don't pollute the output of
-         get_wchan() and suchlike.
-
  config COMPAT_BRK
         bool "Disable heap randomization"
         default y
diff --git a/init/main.c b/init/main.c

index 6107223124e4579a663dde4cb26e91aab68191c1..7449819a4805b14dd3c68d7300cc54ce5d4b8044 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -18,7 +18,6 @@
  #include <linux/string.h>
  #include <linux/ctype.h>
  #include <linux/delay.h>
-#include <linux/utsname.h>
  #include <linux/ioport.h>
  #include <linux/init.h>
  #include <linux/smp_lock.h>
@@ -360,11 +359,6 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
  
  #else
  
-#if NR_CPUS > BITS_PER_LONG
-cpumask_t cpu_mask_all __read_mostly = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_mask_all);
-#endif
-
  /* Setup number of possible processor ids */
  int nr_cpu_ids __read_mostly = NR_CPUS;
  EXPORT_SYMBOL(nr_cpu_ids);
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c

index 40eab7314aeb6c38fcef65ea58cea52421e646ff..7d3704750efc1fd7ef9010acc20fcf2b76e14a28 100644 (file)
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -27,18 +27,18 @@ static void *get_ipc(ctl_table *table)
  }
  
  #ifdef CONFIG_PROC_SYSCTL
-static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
+static int proc_ipc_dointvec(ctl_table *table, int write,
         void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct ctl_table ipc_table;
         memcpy(&ipc_table, table, sizeof(ipc_table));
         ipc_table.data = get_ipc(table);
  
-       return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
+       return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
  }
  
  static int proc_ipc_callback_dointvec(ctl_table *table, int write,
-       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct ctl_table ipc_table;
         size_t lenp_bef = *lenp;
@@ -47,7 +47,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
         memcpy(&ipc_table, table, sizeof(ipc_table));
         ipc_table.data = get_ipc(table);
  
-       rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
+       rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
  
         if (write && !rc && lenp_bef == *lenp)
                 /*
@@ -61,13 +61,13 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
  }
  
  static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
-       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct ctl_table ipc_table;
         memcpy(&ipc_table, table, sizeof(ipc_table));
         ipc_table.data = get_ipc(table);
  
-       return proc_doulongvec_minmax(&ipc_table, write, filp, buffer,
+       return proc_doulongvec_minmax(&ipc_table, write, buffer,
                                         lenp, ppos);
  }
  
@@ -95,7 +95,7 @@ static void ipc_auto_callback(int val)
  }
  
  static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
-       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct ctl_table ipc_table;
         size_t lenp_bef = *lenp;
@@ -106,7 +106,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
         ipc_table.data = get_ipc(table);
         oldval = *((int *)(ipc_table.data));
  
-       rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
+       rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
  
         if (write && !rc && lenp_bef == *lenp) {
                 int newval = *((int *)(ipc_table.data));
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c

index 24ae46dfe45daaf2ff0a1cb8511c9e4a2ef4e29e..8a058711fc103ad9df3a19c4b59eecc7dfcff686 100644 (file)
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -31,24 +31,24 @@ static void *get_mq(ctl_table *table)
         return which;
  }
  
-static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
+static int proc_mq_dointvec(ctl_table *table, int write,
         void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct ctl_table mq_table;
         memcpy(&mq_table, table, sizeof(mq_table));
         mq_table.data = get_mq(table);
  
-       return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos);
+       return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
  }
  
  static int proc_mq_dointvec_minmax(ctl_table *table, int write,
-       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct ctl_table mq_table;
         memcpy(&mq_table, table, sizeof(mq_table));
         mq_table.data = get_mq(table);
  
-       return proc_dointvec_minmax(&mq_table, write, filp, buffer,
+       return proc_dointvec_minmax(&mq_table, write, buffer,
                                         lenp, ppos);
  }
  #else
diff --git a/kernel/Makefile b/kernel/Makefile

index 187c89b4783d4a43e8ad13d0a98dbed8ae6df15d..b8d4cd8ac0b9d5d93e303833e70ed55d450582ee 100644 (file)
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -58,7 +58,6 @@ obj-$(CONFIG_KEXEC) += kexec.o
  obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
  obj-$(CONFIG_COMPAT) += compat.o
  obj-$(CONFIG_CGROUPS) += cgroup.o
-obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
  obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
  obj-$(CONFIG_CPUSETS) += cpuset.o
  obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
diff --git a/kernel/audit.c b/kernel/audit.c

index defc2e6f1e3bc9742606d7efc0a0af4e79095b14..5feed232be9d4b55f72f5b38a010e4e6619c7a19 100644 (file)
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -855,18 +855,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                 break;
         }
         case AUDIT_SIGNAL_INFO:
-               err = security_secid_to_secctx(audit_sig_sid, &ctx, &len);
-               if (err)
-                       return err;
+               len = 0;
+               if (audit_sig_sid) {
+                       err = security_secid_to_secctx(audit_sig_sid, &ctx, &len);
+                       if (err)
+                               return err;
+               }
                 sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
                 if (!sig_data) {
-                       security_release_secctx(ctx, len);
+                       if (audit_sig_sid)
+                               security_release_secctx(ctx, len);
                         return -ENOMEM;
                 }
                 sig_data->uid = audit_sig_uid;
                 sig_data->pid = audit_sig_pid;
-               memcpy(sig_data->ctx, ctx, len);
-               security_release_secctx(ctx, len);
+               if (audit_sig_sid) {
+                       memcpy(sig_data->ctx, ctx, len);
+                       security_release_secctx(ctx, len);
+               }
                 audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO,
                                 0, 0, sig_data, sizeof(*sig_data) + len);
                 kfree(sig_data);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c

index 0e96dbc60ea9b45896a0a451287b406f2592b48c..cc7e87936cbc57f7a2eebebbd212fe297c530154 100644 (file)
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -45,8 +45,8 @@
  
  struct audit_watch {
         atomic_t                count;  /* reference count */
-       char                    *path;  /* insertion path */
         dev_t                   dev;    /* associated superblock device */
+       char                    *path;  /* insertion path */
         unsigned long           ino;    /* associated inode number */
         struct audit_parent     *parent; /* associated parent */
         struct list_head        wlist;  /* entry in parent->watches list */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c

index 68d3c6a0ecd635bbeba49c5de2d3e89b50315897..267e484f019817b87187c74bfa7aa12275411b0c 100644 (file)
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -168,12 +168,12 @@ struct audit_context {
         int                 in_syscall; /* 1 if task is in a syscall */
         enum audit_state    state, current_state;
         unsigned int        serial;     /* serial number for record */
-       struct timespec     ctime;      /* time of syscall entry */
         int                 major;      /* syscall number */
+       struct timespec     ctime;      /* time of syscall entry */
         unsigned long       argv[4];    /* syscall arguments */
-       int                 return_valid; /* return code is valid */
         long                return_code;/* syscall return code */
         u64                 prio;
+       int                 return_valid; /* return code is valid */
         int                 name_count;
         struct audit_names  names[AUDIT_NAMES];
         char *              filterkey;  /* key for rule that triggered record */
@@ -198,8 +198,8 @@ struct audit_context {
         char                target_comm[TASK_COMM_LEN];
  
         struct audit_tree_refs *trees, *first_trees;
-       int tree_count;
         struct list_head killed_trees;
+       int tree_count;
  
         int type;
         union {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index cd83d9933b6b85cd05bf6db17f424056d2d504fc..7ccba4bc5e3b815a9ac9a9c5a0e40d8e70b5c780 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -23,6 +23,7 @@
   */
  
  #include <linux/cgroup.h>
+#include <linux/ctype.h>
  #include <linux/errno.h>
  #include <linux/fs.h>
  #include <linux/kernel.h>
@@ -48,6 +49,8 @@
  #include <linux/namei.h>
  #include <linux/smp_lock.h>
  #include <linux/pid_namespace.h>
+#include <linux/idr.h>
+#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
  
  #include <asm/atomic.h>
  
@@ -60,6 +63,8 @@ static struct cgroup_subsys *subsys[] = {
  #include <linux/cgroup_subsys.h>
  };
  
+#define MAX_CGROUP_ROOT_NAMELEN 64
+
  /*
   * A cgroupfs_root represents the root of a cgroup hierarchy,
   * and may be associated with a superblock to form an active
@@ -74,6 +79,9 @@ struct cgroupfs_root {
          */
         unsigned long subsys_bits;
  
+       /* Unique id for this hierarchy. */
+       int hierarchy_id;
+
         /* The bitmask of subsystems currently attached to this hierarchy */
         unsigned long actual_subsys_bits;
  
@@ -94,6 +102,9 @@ struct cgroupfs_root {
  
         /* The path to use for release notifications. */
         char release_agent_path[PATH_MAX];
+
+       /* The name for this hierarchy - may be empty */
+       char name[MAX_CGROUP_ROOT_NAMELEN];
  };
  
  /*
@@ -141,6 +152,10 @@ struct css_id {
  static LIST_HEAD(roots);
  static int root_count;
  
+static DEFINE_IDA(hierarchy_ida);
+static int next_hierarchy_id;
+static DEFINE_SPINLOCK(hierarchy_id_lock);
+
  /* dummytop is a shorthand for the dummy hierarchy's top cgroup */
  #define dummytop (&rootnode.top_cgroup)
  
@@ -201,6 +216,7 @@ struct cg_cgroup_link {
          * cgroup, anchored on cgroup->css_sets
          */
         struct list_head cgrp_link_list;
+       struct cgroup *cgrp;
         /*
          * List running through cg_cgroup_links pointing at a
          * single css_set object, anchored on css_set->cg_links
@@ -227,8 +243,11 @@ static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
  static DEFINE_RWLOCK(css_set_lock);
  static int css_set_count;
  
-/* hash table for cgroup groups. This improves the performance to
- * find an existing css_set */
+/*
+ * hash table for cgroup groups. This improves the performance to find
+ * an existing css_set. This hash doesn't (currently) take into
+ * account cgroups in empty hierarchies.
+ */
  #define CSS_SET_HASH_BITS      7
  #define CSS_SET_TABLE_SIZE     (1 << CSS_SET_HASH_BITS)
  static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
@@ -248,48 +267,22 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
         return &css_set_table[index];
  }
  
+static void free_css_set_rcu(struct rcu_head *obj)
+{
+       struct css_set *cg = container_of(obj, struct css_set, rcu_head);
+       kfree(cg);
+}
+
  /* We don't maintain the lists running through each css_set to its
   * task until after the first call to cgroup_iter_start(). This
   * reduces the fork()/exit() overhead for people who have cgroups
   * compiled into their kernel but not actually in use */
  static int use_task_css_set_links __read_mostly;
  
-/* When we create or destroy a css_set, the operation simply
- * takes/releases a reference count on all the cgroups referenced
- * by subsystems in this css_set. This can end up multiple-counting
- * some cgroups, but that's OK - the ref-count is just a
- * busy/not-busy indicator; ensuring that we only count each cgroup
- * once would require taking a global lock to ensure that no
- * subsystems moved between hierarchies while we were doing so.
- *
- * Possible TODO: decide at boot time based on the number of
- * registered subsystems and the number of CPUs or NUMA nodes whether
- * it's better for performance to ref-count every subsystem, or to
- * take a global lock and only add one ref count to each hierarchy.
- */
-
-/*
- * unlink a css_set from the list and free it
- */
-static void unlink_css_set(struct css_set *cg)
+static void __put_css_set(struct css_set *cg, int taskexit)
  {
         struct cg_cgroup_link *link;
         struct cg_cgroup_link *saved_link;
-
-       hlist_del(&cg->hlist);
-       css_set_count--;
-
-       list_for_each_entry_safe(link, saved_link, &cg->cg_links,
-                                cg_link_list) {
-               list_del(&link->cg_link_list);
-               list_del(&link->cgrp_link_list);
-               kfree(link);
-       }
-}
-
-static void __put_css_set(struct css_set *cg, int taskexit)
-{
-       int i;
         /*
          * Ensure that the refcount doesn't hit zero while any readers
          * can see it. Similar to atomic_dec_and_lock(), but for an
@@ -302,21 +295,28 @@ static void __put_css_set(struct css_set *cg, int taskexit)
                 write_unlock(&css_set_lock);
                 return;
         }
-       unlink_css_set(cg);
-       write_unlock(&css_set_lock);
  
-       rcu_read_lock();
-       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-               struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
+       /* This css_set is dead. unlink it and release cgroup refcounts */
+       hlist_del(&cg->hlist);
+       css_set_count--;
+
+       list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+                                cg_link_list) {
+               struct cgroup *cgrp = link->cgrp;
+               list_del(&link->cg_link_list);
+               list_del(&link->cgrp_link_list);
                 if (atomic_dec_and_test(&cgrp->count) &&
                     notify_on_release(cgrp)) {
                         if (taskexit)
                                 set_bit(CGRP_RELEASABLE, &cgrp->flags);
                         check_for_release(cgrp);
                 }
+
+               kfree(link);
         }
-       rcu_read_unlock();
-       kfree(cg);
+
+       write_unlock(&css_set_lock);
+       call_rcu(&cg->rcu_head, free_css_set_rcu);
  }
  
  /*
@@ -337,6 +337,78 @@ static inline void put_css_set_taskexit(struct css_set *cg)
         __put_css_set(cg, 1);
  }
  
+/*
+ * compare_css_sets - helper function for find_existing_css_set().
+ * @cg: candidate css_set being tested
+ * @old_cg: existing css_set for a task
+ * @new_cgrp: cgroup that's being entered by the task
+ * @template: desired set of css pointers in css_set (pre-calculated)
+ *
+ * Returns true if "cg" matches "old_cg" except for the hierarchy
+ * which "new_cgrp" belongs to, for which it should match "new_cgrp".
+ */
+static bool compare_css_sets(struct css_set *cg,
+                            struct css_set *old_cg,
+                            struct cgroup *new_cgrp,
+                            struct cgroup_subsys_state *template[])
+{
+       struct list_head *l1, *l2;
+
+       if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
+               /* Not all subsystems matched */
+               return false;
+       }
+
+       /*
+        * Compare cgroup pointers in order to distinguish between
+        * different cgroups in heirarchies with no subsystems. We
+        * could get by with just this check alone (and skip the
+        * memcmp above) but on most setups the memcmp check will
+        * avoid the need for this more expensive check on almost all
+        * candidates.
+        */
+
+       l1 = &cg->cg_links;
+       l2 = &old_cg->cg_links;
+       while (1) {
+               struct cg_cgroup_link *cgl1, *cgl2;
+               struct cgroup *cg1, *cg2;
+
+               l1 = l1->next;
+               l2 = l2->next;
+               /* See if we reached the end - both lists are equal length. */
+               if (l1 == &cg->cg_links) {
+                       BUG_ON(l2 != &old_cg->cg_links);
+                       break;
+               } else {
+                       BUG_ON(l2 == &old_cg->cg_links);
+               }
+               /* Locate the cgroups associated with these links. */
+               cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
+               cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
+               cg1 = cgl1->cgrp;
+               cg2 = cgl2->cgrp;
+               /* Hierarchies should be linked in the same order. */
+               BUG_ON(cg1->root != cg2->root);
+
+               /*
+                * If this hierarchy is the hierarchy of the cgroup
+                * that's changing, then we need to check that this
+                * css_set points to the new cgroup; if it's any other
+                * hierarchy, then this css_set should point to the
+                * same cgroup as the old css_set.
+                */
+               if (cg1->root == new_cgrp->root) {
+                       if (cg1 != new_cgrp)
+                               return false;
+               } else {
+                       if (cg1 != cg2)
+                               return false;
+               }
+       }
+       return true;
+}
+
  /*
   * find_existing_css_set() is a helper for
   * find_css_set(), and checks to see whether an existing
@@ -378,10 +450,11 @@ static struct css_set *find_existing_css_set(
  
         hhead = css_set_hash(template);
         hlist_for_each_entry(cg, node, hhead, hlist) {
-               if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
-                       /* All subsystems matched */
-                       return cg;
-               }
+               if (!compare_css_sets(cg, oldcg, cgrp, template))
+                       continue;
+
+               /* This css_set matches what we need */
+               return cg;
         }
  
         /* No existing cgroup group matched */
@@ -435,8 +508,14 @@ static void link_css_set(struct list_head *tmp_cg_links,
         link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
                                 cgrp_link_list);
         link->cg = cg;
+       link->cgrp = cgrp;
+       atomic_inc(&cgrp->count);
         list_move(&link->cgrp_link_list, &cgrp->css_sets);
-       list_add(&link->cg_link_list, &cg->cg_links);
+       /*
+        * Always add links to the tail of the list so that the list
+        * is sorted by order of hierarchy creation
+        */
+       list_add_tail(&link->cg_link_list, &cg->cg_links);
  }
  
  /*
@@ -451,11 +530,11 @@ static struct css_set *find_css_set(
  {
         struct css_set *res;
         struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
-       int i;
  
         struct list_head tmp_cg_links;
  
         struct hlist_head *hhead;
+       struct cg_cgroup_link *link;
  
         /* First see if we already have a cgroup group that matches
          * the desired set */
@@ -489,20 +568,12 @@ static struct css_set *find_css_set(
  
         write_lock(&css_set_lock);
         /* Add reference counts and links from the new css_set. */
-       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-               struct cgroup *cgrp = res->subsys[i]->cgroup;
-               struct cgroup_subsys *ss = subsys[i];
-               atomic_inc(&cgrp->count);
-               /*
-                * We want to add a link once per cgroup, so we
-                * only do it for the first subsystem in each
-                * hierarchy
-                */
-               if (ss->root->subsys_list.next == &ss->sibling)
-                       link_css_set(&tmp_cg_links, res, cgrp);
+       list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
+               struct cgroup *c = link->cgrp;
+               if (c->root == cgrp->root)
+                       c = cgrp;
+               link_css_set(&tmp_cg_links, res, c);
         }
-       if (list_empty(&rootnode.subsys_list))
-               link_css_set(&tmp_cg_links, res, dummytop);
  
         BUG_ON(!list_empty(&tmp_cg_links));
  
@@ -517,6 +588,41 @@ static struct css_set *find_css_set(
         return res;
  }
  
+/*
+ * Return the cgroup for "task" from the given hierarchy. Must be
+ * called with cgroup_mutex held.
+ */
+static struct cgroup *task_cgroup_from_root(struct task_struct *task,
+                                           struct cgroupfs_root *root)
+{
+       struct css_set *css;
+       struct cgroup *res = NULL;
+
+       BUG_ON(!mutex_is_locked(&cgroup_mutex));
+       read_lock(&css_set_lock);
+       /*
+        * No need to lock the task - since we hold cgroup_mutex the
+        * task can't change groups, so the only thing that can happen
+        * is that it exits and its css is set back to init_css_set.
+        */
+       css = task->cgroups;
+       if (css == &init_css_set) {
+               res = &root->top_cgroup;
+       } else {
+               struct cg_cgroup_link *link;
+               list_for_each_entry(link, &css->cg_links, cg_link_list) {
+                       struct cgroup *c = link->cgrp;
+                       if (c->root == root) {
+                               res = c;
+                               break;
+                       }
+               }
+       }
+       read_unlock(&css_set_lock);
+       BUG_ON(!res);
+       return res;
+}
+
  /*
   * There is one global cgroup mutex. We also require taking
   * task_lock() when dereferencing a task's cgroup subsys pointers.
@@ -677,6 +783,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
                  */
                 deactivate_super(cgrp->root->sb);
  
+               /*
+                * if we're getting rid of the cgroup, refcount should ensure
+                * that there are no pidlists left.
+                */
+               BUG_ON(!list_empty(&cgrp->pidlists));
+
                 call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
         }
         iput(inode);
@@ -841,6 +953,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
                 seq_puts(seq, ",noprefix");
         if (strlen(root->release_agent_path))
                 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
+       if (strlen(root->name))
+               seq_printf(seq, ",name=%s", root->name);
         mutex_unlock(&cgroup_mutex);
         return 0;
  }
@@ -849,6 +963,12 @@ struct cgroup_sb_opts {
         unsigned long subsys_bits;
         unsigned long flags;
         char *release_agent;
+       char *name;
+       /* User explicitly requested empty subsystem */
+       bool none;
+
+       struct cgroupfs_root *new_root;
+
  };
  
  /* Convert a hierarchy specifier into a bitmask of subsystems and
@@ -863,9 +983,7 @@ static int parse_cgroupfs_options(char *data,
         mask = ~(1UL << cpuset_subsys_id);
  #endif
  
-       opts->subsys_bits = 0;
-       opts->flags = 0;
-       opts->release_agent = NULL;
+       memset(opts, 0, sizeof(*opts));
  
         while ((token = strsep(&o, ",")) != NULL) {
                 if (!*token)
@@ -879,17 +997,42 @@ static int parse_cgroupfs_options(char *data,
                                 if (!ss->disabled)
                                         opts->subsys_bits |= 1ul << i;
                         }
+               } else if (!strcmp(token, "none")) {
+                       /* Explicitly have no subsystems */
+                       opts->none = true;
                 } else if (!strcmp(token, "noprefix")) {
                         set_bit(ROOT_NOPREFIX, &opts->flags);
                 } else if (!strncmp(token, "release_agent=", 14)) {
                         /* Specifying two release agents is forbidden */
                         if (opts->release_agent)
                                 return -EINVAL;
-                       opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);
+                       opts->release_agent =
+                               kstrndup(token + 14, PATH_MAX, GFP_KERNEL);
                         if (!opts->release_agent)
                                 return -ENOMEM;
-                       strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
-                       opts->release_agent[PATH_MAX - 1] = 0;
+               } else if (!strncmp(token, "name=", 5)) {
+                       int i;
+                       const char *name = token + 5;
+                       /* Can't specify an empty name */
+                       if (!strlen(name))
+                               return -EINVAL;
+                       /* Must match [\w.-]+ */
+                       for (i = 0; i < strlen(name); i++) {
+                               char c = name[i];
+                               if (isalnum(c))
+                                       continue;
+                               if ((c == '.') || (c == '-') || (c == '_'))
+                                       continue;
+                               return -EINVAL;
+                       }
+                       /* Specifying two names is forbidden */
+                       if (opts->name)
+                               return -EINVAL;
+                       opts->name = kstrndup(name,
+                                             MAX_CGROUP_ROOT_NAMELEN,
+                                             GFP_KERNEL);
+                       if (!opts->name)
+                               return -ENOMEM;
                 } else {
                         struct cgroup_subsys *ss;
                         int i;
@@ -906,6 +1049,8 @@ static int parse_cgroupfs_options(char *data,
                 }
         }
  
+       /* Consistency checks */
+
         /*
          * Option noprefix was introduced just for backward compatibility
          * with the old cpuset, so we allow noprefix only if mounting just
@@ -915,8 +1060,16 @@ static int parse_cgroupfs_options(char *data,
             (opts->subsys_bits & mask))
                 return -EINVAL;
  
-       /* We can't have an empty hierarchy */
-       if (!opts->subsys_bits)
+
+       /* Can't specify "none" and some subsystems */
+       if (opts->subsys_bits && opts->none)
+               return -EINVAL;
+
+       /*
+        * We either have to specify by name or by subsystems. (So all
+        * empty hierarchies must have a name).
+        */
+       if (!opts->subsys_bits && !opts->name)
                 return -EINVAL;
  
         return 0;
@@ -944,6 +1097,12 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
                 goto out_unlock;
         }
  
+       /* Don't allow name to change at remount */
+       if (opts.name && strcmp(opts.name, root->name)) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
         ret = rebind_subsystems(root, opts.subsys_bits);
         if (ret)
                 goto out_unlock;
@@ -955,6 +1114,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
                 strcpy(root->release_agent_path, opts.release_agent);
   out_unlock:
         kfree(opts.release_agent);
+       kfree(opts.name);
         mutex_unlock(&cgroup_mutex);
         mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
         unlock_kernel();
@@ -974,9 +1134,10 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
         INIT_LIST_HEAD(&cgrp->children);
         INIT_LIST_HEAD(&cgrp->css_sets);
         INIT_LIST_HEAD(&cgrp->release_list);
-       INIT_LIST_HEAD(&cgrp->pids_list);
-       init_rwsem(&cgrp->pids_mutex);
+       INIT_LIST_HEAD(&cgrp->pidlists);
+       mutex_init(&cgrp->pidlist_mutex);
  }
+
  static void init_cgroup_root(struct cgroupfs_root *root)
  {
         struct cgroup *cgrp = &root->top_cgroup;
@@ -988,33 +1149,106 @@ static void init_cgroup_root(struct cgroupfs_root *root)
         init_cgroup_housekeeping(cgrp);
  }
  
+static bool init_root_id(struct cgroupfs_root *root)
+{
+       int ret = 0;
+
+       do {
+               if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
+                       return false;
+               spin_lock(&hierarchy_id_lock);
+               /* Try to allocate the next unused ID */
+               ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
+                                       &root->hierarchy_id);
+               if (ret == -ENOSPC)
+                       /* Try again starting from 0 */
+                       ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
+               if (!ret) {
+                       next_hierarchy_id = root->hierarchy_id + 1;
+               } else if (ret != -EAGAIN) {
+                       /* Can only get here if the 31-bit IDR is full ... */
+                       BUG_ON(ret);
+               }
+               spin_unlock(&hierarchy_id_lock);
+       } while (ret);
+       return true;
+}
+
  static int cgroup_test_super(struct super_block *sb, void *data)
  {
-       struct cgroupfs_root *new = data;
+       struct cgroup_sb_opts *opts = data;
         struct cgroupfs_root *root = sb->s_fs_info;
  
-       /* First check subsystems */
-       if (new->subsys_bits != root->subsys_bits)
-           return 0;
+       /* If we asked for a name then it must match */
+       if (opts->name && strcmp(opts->name, root->name))
+               return 0;
  
-       /* Next check flags */
-       if (new->flags != root->flags)
+       /*
+        * If we asked for subsystems (or explicitly for no
+        * subsystems) then they must match
+        */
+       if ((opts->subsys_bits || opts->none)
+           && (opts->subsys_bits != root->subsys_bits))
                 return 0;
  
         return 1;
  }
  
+static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
+{
+       struct cgroupfs_root *root;
+
+       if (!opts->subsys_bits && !opts->none)
+               return NULL;
+
+       root = kzalloc(sizeof(*root), GFP_KERNEL);
+       if (!root)
+               return ERR_PTR(-ENOMEM);
+
+       if (!init_root_id(root)) {
+               kfree(root);
+               return ERR_PTR(-ENOMEM);
+       }
+       init_cgroup_root(root);
+
+       root->subsys_bits = opts->subsys_bits;
+       root->flags = opts->flags;
+       if (opts->release_agent)
+               strcpy(root->release_agent_path, opts->release_agent);
+       if (opts->name)
+               strcpy(root->name, opts->name);
+       return root;
+}
+
+static void cgroup_drop_root(struct cgroupfs_root *root)
+{
+       if (!root)
+               return;
+
+       BUG_ON(!root->hierarchy_id);
+       spin_lock(&hierarchy_id_lock);
+       ida_remove(&hierarchy_ida, root->hierarchy_id);
+       spin_unlock(&hierarchy_id_lock);
+       kfree(root);
+}
+
  static int cgroup_set_super(struct super_block *sb, void *data)
  {
         int ret;
-       struct cgroupfs_root *root = data;
+       struct cgroup_sb_opts *opts = data;
+
+       /* If we don't have a new root, we can't set up a new sb */
+       if (!opts->new_root)
+               return -EINVAL;
+
+       BUG_ON(!opts->subsys_bits && !opts->none);
  
         ret = set_anon_super(sb, NULL);
         if (ret)
                 return ret;
  
-       sb->s_fs_info = root;
-       root->sb = sb;
+       sb->s_fs_info = opts->new_root;
+       opts->new_root->sb = sb;
  
         sb->s_blocksize = PAGE_CACHE_SIZE;
         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -1051,48 +1285,43 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                          void *data, struct vfsmount *mnt)
  {
         struct cgroup_sb_opts opts;
+       struct cgroupfs_root *root;
         int ret = 0;
         struct super_block *sb;
-       struct cgroupfs_root *root;
-       struct list_head tmp_cg_links;
+       struct cgroupfs_root *new_root;
  
         /* First find the desired set of subsystems */
         ret = parse_cgroupfs_options(data, &opts);
-       if (ret) {
-               kfree(opts.release_agent);
-               return ret;
-       }
-
-       root = kzalloc(sizeof(*root), GFP_KERNEL);
-       if (!root) {
-               kfree(opts.release_agent);
-               return -ENOMEM;
-       }
+       if (ret)
+               goto out_err;
  
-       init_cgroup_root(root);
-       root->subsys_bits = opts.subsys_bits;
-       root->flags = opts.flags;
-       if (opts.release_agent) {
-               strcpy(root->release_agent_path, opts.release_agent);
-               kfree(opts.release_agent);
+       /*
+        * Allocate a new cgroup root. We may not need it if we're
+        * reusing an existing hierarchy.
+        */
+       new_root = cgroup_root_from_opts(&opts);
+       if (IS_ERR(new_root)) {
+               ret = PTR_ERR(new_root);
+               goto out_err;
         }
+       opts.new_root = new_root;
  
-       sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
-
+       /* Locate an existing or new sb for this hierarchy */
+       sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
         if (IS_ERR(sb)) {
-               kfree(root);
-               return PTR_ERR(sb);
+               ret = PTR_ERR(sb);
+               cgroup_drop_root(opts.new_root);
+               goto out_err;
         }
  
-       if (sb->s_fs_info != root) {
-               /* Reusing an existing superblock */
-               BUG_ON(sb->s_root == NULL);
-               kfree(root);
-               root = NULL;
-       } else {
-               /* New superblock */
+       root = sb->s_fs_info;
+       BUG_ON(!root);
+       if (root == opts.new_root) {
+               /* We used the new root structure, so this is a new hierarchy */
+               struct list_head tmp_cg_links;
                 struct cgroup *root_cgrp = &root->top_cgroup;
                 struct inode *inode;
+               struct cgroupfs_root *existing_root;
                 int i;
  
                 BUG_ON(sb->s_root != NULL);
@@ -1105,6 +1334,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                 mutex_lock(&inode->i_mutex);
                 mutex_lock(&cgroup_mutex);
  
+               if (strlen(root->name)) {
+                       /* Check for name clashes with existing mounts */
+                       for_each_active_root(existing_root) {
+                               if (!strcmp(existing_root->name, root->name)) {
+                                       ret = -EBUSY;
+                                       mutex_unlock(&cgroup_mutex);
+                                       mutex_unlock(&inode->i_mutex);
+                                       goto drop_new_super;
+                               }
+                       }
+               }
+
                 /*
                  * We're accessing css_set_count without locking
                  * css_set_lock here, but that's OK - it can only be
@@ -1123,7 +1364,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                 if (ret == -EBUSY) {
                         mutex_unlock(&cgroup_mutex);
                         mutex_unlock(&inode->i_mutex);
-                       goto free_cg_links;
+                       free_cg_links(&tmp_cg_links);
+                       goto drop_new_super;
                 }
  
                 /* EBUSY should be the only error here */
@@ -1155,17 +1397,27 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                 BUG_ON(root->number_of_cgroups != 1);
  
                 cgroup_populate_dir(root_cgrp);
-               mutex_unlock(&inode->i_mutex);
                 mutex_unlock(&cgroup_mutex);
+               mutex_unlock(&inode->i_mutex);
+       } else {
+               /*
+                * We re-used an existing hierarchy - the new root (if
+                * any) is not needed
+                */
+               cgroup_drop_root(opts.new_root);
         }
  
         simple_set_mnt(mnt, sb);
+       kfree(opts.release_agent);
+       kfree(opts.name);
         return 0;
  
- free_cg_links:
-       free_cg_links(&tmp_cg_links);
   drop_new_super:
         deactivate_locked_super(sb);
+ out_err:
+       kfree(opts.release_agent);
+       kfree(opts.name);
+
         return ret;
  }
  
@@ -1211,7 +1463,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
         mutex_unlock(&cgroup_mutex);
  
         kill_litter_super(sb);
-       kfree(root);
+       cgroup_drop_root(root);
  }
  
  static struct file_system_type cgroup_fs_type = {
@@ -1276,27 +1528,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
         return 0;
  }
  
-/*
- * Return the first subsystem attached to a cgroup's hierarchy, and
- * its subsystem id.
- */
-
-static void get_first_subsys(const struct cgroup *cgrp,
-                       struct cgroup_subsys_state **css, int *subsys_id)
-{
-       const struct cgroupfs_root *root = cgrp->root;
-       const struct cgroup_subsys *test_ss;
-       BUG_ON(list_empty(&root->subsys_list));
-       test_ss = list_entry(root->subsys_list.next,
-                            struct cgroup_subsys, sibling);
-       if (css) {
-               *css = cgrp->subsys[test_ss->subsys_id];
-               BUG_ON(!*css);
-       }
-       if (subsys_id)
-               *subsys_id = test_ss->subsys_id;
-}
-
  /**
   * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
   * @cgrp: the cgroup the task is attaching to
@@ -1313,18 +1544,15 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
         struct css_set *cg;
         struct css_set *newcg;
         struct cgroupfs_root *root = cgrp->root;
-       int subsys_id;
-
-       get_first_subsys(cgrp, NULL, &subsys_id);
  
         /* Nothing to do if the task is already in that cgroup */
-       oldcgrp = task_cgroup(tsk, subsys_id);
+       oldcgrp = task_cgroup_from_root(tsk, root);
         if (cgrp == oldcgrp)
                 return 0;
  
         for_each_subsys(root, ss) {
                 if (ss->can_attach) {
-                       retval = ss->can_attach(ss, cgrp, tsk);
+                       retval = ss->can_attach(ss, cgrp, tsk, false);
                         if (retval)
                                 return retval;
                 }
@@ -1362,7 +1590,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
  
         for_each_subsys(root, ss) {
                 if (ss->attach)
-                       ss->attach(ss, cgrp, oldcgrp, tsk);
+                       ss->attach(ss, cgrp, oldcgrp, tsk, false);
         }
         set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
         synchronize_rcu();
@@ -1423,15 +1651,6 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
         return ret;
  }
  
-/* The various types of files and directories in a cgroup file system */
-enum cgroup_filetype {
-       FILE_ROOT,
-       FILE_DIR,
-       FILE_TASKLIST,
-       FILE_NOTIFY_ON_RELEASE,
-       FILE_RELEASE_AGENT,
-};
-
  /**
   * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
   * @cgrp: the cgroup to be checked for liveness
@@ -1876,7 +2095,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
   * the start of a css_set
   */
  static void cgroup_advance_iter(struct cgroup *cgrp,
-                                         struct cgroup_iter *it)
+                               struct cgroup_iter *it)
  {
         struct list_head *l = it->cg_link;
         struct cg_cgroup_link *link;
@@ -2129,7 +2348,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
  }
  
  /*
- * Stuff for reading the 'tasks' file.
+ * Stuff for reading the 'tasks'/'procs' files.
   *
   * Reading this file can return large amounts of data if a cgroup has
   * *lots* of attached tasks. So it may need several calls to read(),
@@ -2139,27 +2358,196 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
   */
  
  /*
- * Load into 'pidarray' up to 'npids' of the tasks using cgroup
- * 'cgrp'.  Return actual number of pids loaded.  No need to
- * task_lock(p) when reading out p->cgroup, since we're in an RCU
- * read section, so the css_set can't go away, and is
- * immutable after creation.
+ * The following two functions "fix" the issue where there are more pids
+ * than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
+ * TODO: replace with a kernel-wide solution to this problem
+ */
+#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
+static void *pidlist_allocate(int count)
+{
+       if (PIDLIST_TOO_LARGE(count))
+               return vmalloc(count * sizeof(pid_t));
+       else
+               return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
+}
+static void pidlist_free(void *p)
+{
+       if (is_vmalloc_addr(p))
+               vfree(p);
+       else
+               kfree(p);
+}
+static void *pidlist_resize(void *p, int newcount)
+{
+       void *newlist;
+       /* note: if new alloc fails, old p will still be valid either way */
+       if (is_vmalloc_addr(p)) {
+               newlist = vmalloc(newcount * sizeof(pid_t));
+               if (!newlist)
+                       return NULL;
+               memcpy(newlist, p, newcount * sizeof(pid_t));
+               vfree(p);
+       } else {
+               newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
+       }
+       return newlist;
+}
+
+/*
+ * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
+ * If the new stripped list is sufficiently smaller and there's enough memory
+ * to allocate a new buffer, will let go of the unneeded memory. Returns the
+ * number of unique elements.
+ */
+/* is the size difference enough that we should re-allocate the array? */
+#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
+static int pidlist_uniq(pid_t **p, int length)
+{
+       int src, dest = 1;
+       pid_t *list = *p;
+       pid_t *newlist;
+
+       /*
+        * we presume the 0th element is unique, so i starts at 1. trivial
+        * edge cases first; no work needs to be done for either
+        */
+       if (length == 0 || length == 1)
+               return length;
+       /* src and dest walk down the list; dest counts unique elements */
+       for (src = 1; src < length; src++) {
+               /* find next unique element */
+               while (list[src] == list[src-1]) {
+                       src++;
+                       if (src == length)
+                               goto after;
+               }
+               /* dest always points to where the next unique element goes */
+               list[dest] = list[src];
+               dest++;
+       }
+after:
+       /*
+        * if the length difference is large enough, we want to allocate a
+        * smaller buffer to save memory. if this fails due to out of memory,
+        * we'll just stay with what we've got.
+        */
+       if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
+               newlist = pidlist_resize(list, dest);
+               if (newlist)
+                       *p = newlist;
+       }
+       return dest;
+}
+
+static int cmppid(const void *a, const void *b)
+{
+       return *(pid_t *)a - *(pid_t *)b;
+}
+
+/*
+ * find the appropriate pidlist for our purpose (given procs vs tasks)
+ * returns with the lock on that pidlist already held, and takes care
+ * of the use count, or returns NULL with no locks held if we're out of
+ * memory.
   */
-static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
+static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
+                                                 enum cgroup_filetype type)
  {
-       int n = 0, pid;
+       struct cgroup_pidlist *l;
+       /* don't need task_nsproxy() if we're looking at ourself */
+       struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns);
+       /*
+        * We can't drop the pidlist_mutex before taking the l->mutex in case
+        * the last ref-holder is trying to remove l from the list at the same
+        * time. Holding the pidlist_mutex precludes somebody taking whichever
+        * list we find out from under us - compare release_pid_array().
+        */
+       mutex_lock(&cgrp->pidlist_mutex);
+       list_for_each_entry(l, &cgrp->pidlists, links) {
+               if (l->key.type == type && l->key.ns == ns) {
+                       /* found a matching list - drop the extra refcount */
+                       put_pid_ns(ns);
+                       /* make sure l doesn't vanish out from under us */
+                       down_write(&l->mutex);
+                       mutex_unlock(&cgrp->pidlist_mutex);
+                       l->use_count++;
+                       return l;
+               }
+       }
+       /* entry not found; create a new one */
+       l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
+       if (!l) {
+               mutex_unlock(&cgrp->pidlist_mutex);
+               put_pid_ns(ns);
+               return l;
+       }
+       init_rwsem(&l->mutex);
+       down_write(&l->mutex);
+       l->key.type = type;
+       l->key.ns = ns;
+       l->use_count = 0; /* don't increment here */
+       l->list = NULL;
+       l->owner = cgrp;
+       list_add(&l->links, &cgrp->pidlists);
+       mutex_unlock(&cgrp->pidlist_mutex);
+       return l;
+}
+
+/*
+ * Load a cgroup's pidarray with either procs' tgids or tasks' pids
+ */
+static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
+                             struct cgroup_pidlist **lp)
+{
+       pid_t *array;
+       int length;
+       int pid, n = 0; /* used for populating the array */
         struct cgroup_iter it;
         struct task_struct *tsk;
+       struct cgroup_pidlist *l;
+
+       /*
+        * If cgroup gets more users after we read count, we won't have
+        * enough space - tough.  This race is indistinguishable to the
+        * caller from the case that the additional cgroup users didn't
+        * show up until sometime later on.
+        */
+       length = cgroup_task_count(cgrp);
+       array = pidlist_allocate(length);
+       if (!array)
+               return -ENOMEM;
+       /* now, populate the array */
         cgroup_iter_start(cgrp, &it);
         while ((tsk = cgroup_iter_next(cgrp, &it))) {
-               if (unlikely(n == npids))
+               if (unlikely(n == length))
                         break;
-               pid = task_pid_vnr(tsk);
-               if (pid > 0)
-                       pidarray[n++] = pid;
+               /* get tgid or pid for procs or tasks file respectively */
+               if (type == CGROUP_FILE_PROCS)
+                       pid = task_tgid_vnr(tsk);
+               else
+                       pid = task_pid_vnr(tsk);
+               if (pid > 0) /* make sure to only use valid results */
+                       array[n++] = pid;
         }
         cgroup_iter_end(cgrp, &it);
-       return n;
+       length = n;
+       /* now sort & (if procs) strip out duplicates */
+       sort(array, length, sizeof(pid_t), cmppid, NULL);
+       if (type == CGROUP_FILE_PROCS)
+               length = pidlist_uniq(&array, length);
+       l = cgroup_pidlist_find(cgrp, type);
+       if (!l) {
+               pidlist_free(array);
+               return -ENOMEM;
+       }
+       /* store array, freeing old if necessary - lock already held */
+       pidlist_free(l->list);
+       l->list = array;
+       l->length = length;
+       l->use_count++;
+       up_write(&l->mutex);
+       *lp = l;
+       return 0;
  }
  
  /**
@@ -2216,37 +2604,14 @@ err:
         return ret;
  }
  
-/*
- * Cache pids for all threads in the same pid namespace that are
- * opening the same "tasks" file.
- */
-struct cgroup_pids {
-       /* The node in cgrp->pids_list */
-       struct list_head list;
-       /* The cgroup those pids belong to */
-       struct cgroup *cgrp;
-       /* The namepsace those pids belong to */
-       struct pid_namespace *ns;
-       /* Array of process ids in the cgroup */
-       pid_t *tasks_pids;
-       /* How many files are using the this tasks_pids array */
-       int use_count;
-       /* Length of the current tasks_pids array */
-       int length;
-};
-
-static int cmppid(const void *a, const void *b)
-{
-       return *(pid_t *)a - *(pid_t *)b;
-}
  
  /*
- * seq_file methods for the "tasks" file. The seq_file position is the
+ * seq_file methods for the tasks/procs files. The seq_file position is the
   * next pid to display; the seq_file iterator is a pointer to the pid
- * in the cgroup->tasks_pids array.
+ * in the cgroup->l->list array.
   */
  
-static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
+static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
  {
         /*
          * Initially we receive a position value that corresponds to
@@ -2254,48 +2619,45 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
          * after a seek to the start). Use a binary-search to find the
          * next pid to display, if any
          */
-       struct cgroup_pids *cp = s->private;
-       struct cgroup *cgrp = cp->cgrp;
+       struct cgroup_pidlist *l = s->private;
         int index = 0, pid = *pos;
         int *iter;
  
-       down_read(&cgrp->pids_mutex);
+       down_read(&l->mutex);
         if (pid) {
-               int end = cp->length;
+               int end = l->length;
  
                 while (index < end) {
                         int mid = (index + end) / 2;
-                       if (cp->tasks_pids[mid] == pid) {
+                       if (l->list[mid] == pid) {
                                 index = mid;
                                 break;
-                       } else if (cp->tasks_pids[mid] <= pid)
+                       } else if (l->list[mid] <= pid)
                                 index = mid + 1;
                         else
                                 end = mid;
                 }
         }
         /* If we're off the end of the array, we're done */
-       if (index >= cp->length)
+       if (index >= l->length)
                 return NULL;
         /* Update the abstract position to be the actual pid that we found */
-       iter = cp->tasks_pids + index;
+       iter = l->list + index;
         *pos = *iter;
         return iter;
  }
  
-static void cgroup_tasks_stop(struct seq_file *s, void *v)
+static void cgroup_pidlist_stop(struct seq_file *s, void *v)
  {
-       struct cgroup_pids *cp = s->private;
-       struct cgroup *cgrp = cp->cgrp;
-       up_read(&cgrp->pids_mutex);
+       struct cgroup_pidlist *l = s->private;
+       up_read(&l->mutex);
  }
  
-static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
  {
-       struct cgroup_pids *cp = s->private;
-       int *p = v;
-       int *end = cp->tasks_pids + cp->length;
-
+       struct cgroup_pidlist *l = s->private;
+       pid_t *p = v;
+       pid_t *end = l->list + l->length;
         /*
          * Advance to the next pid in the array. If this goes off the
          * end, we're done
@@ -2309,124 +2671,107 @@ static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
         }
  }
  
-static int cgroup_tasks_show(struct seq_file *s, void *v)
+static int cgroup_pidlist_show(struct seq_file *s, void *v)
  {
         return seq_printf(s, "%d\n", *(int *)v);
  }
  
-static const struct seq_operations cgroup_tasks_seq_operations = {
-       .start = cgroup_tasks_start,
-       .stop = cgroup_tasks_stop,
-       .next = cgroup_tasks_next,
-       .show = cgroup_tasks_show,
+/*
+ * seq_operations functions for iterating on pidlists through seq_file -
+ * independent of whether it's tasks or procs
+ */
+static const struct seq_operations cgroup_pidlist_seq_operations = {
+       .start = cgroup_pidlist_start,
+       .stop = cgroup_pidlist_stop,
+       .next = cgroup_pidlist_next,
+       .show = cgroup_pidlist_show,
  };
  
-static void release_cgroup_pid_array(struct cgroup_pids *cp)
+static void cgroup_release_pid_array(struct cgroup_pidlist *l)
  {
-       struct cgroup *cgrp = cp->cgrp;
-
-       down_write(&cgrp->pids_mutex);
-       BUG_ON(!cp->use_count);
-       if (!--cp->use_count) {
-               list_del(&cp->list);
-               put_pid_ns(cp->ns);
-               kfree(cp->tasks_pids);
-               kfree(cp);
+       /*
+        * the case where we're the last user of this particular pidlist will
+        * have us remove it from the cgroup's list, which entails taking the
+        * mutex. since in pidlist_find the pidlist->lock depends on cgroup->
+        * pidlist_mutex, we have to take pidlist_mutex first.
+        */
+       mutex_lock(&l->owner->pidlist_mutex);
+       down_write(&l->mutex);
+       BUG_ON(!l->use_count);
+       if (!--l->use_count) {
+               /* we're the last user if refcount is 0; remove and free */
+               list_del(&l->links);
+               mutex_unlock(&l->owner->pidlist_mutex);
+               pidlist_free(l->list);
+               put_pid_ns(l->key.ns);
+               up_write(&l->mutex);
+               kfree(l);
+               return;
         }
-       up_write(&cgrp->pids_mutex);
+       mutex_unlock(&l->owner->pidlist_mutex);
+       up_write(&l->mutex);
  }
  
-static int cgroup_tasks_release(struct inode *inode, struct file *file)
+static int cgroup_pidlist_release(struct inode *inode, struct file *file)
  {
-       struct seq_file *seq;
-       struct cgroup_pids *cp;
-
+       struct cgroup_pidlist *l;
         if (!(file->f_mode & FMODE_READ))
                 return 0;
-
-       seq = file->private_data;
-       cp = seq->private;
-
-       release_cgroup_pid_array(cp);
+       /*
+        * the seq_file will only be initialized if the file was opened for
+        * reading; hence we check if it's not null only in that case.
+        */
+       l = ((struct seq_file *)file->private_data)->private;
+       cgroup_release_pid_array(l);
         return seq_release(inode, file);
  }
  
-static struct file_operations cgroup_tasks_operations = {
+static const struct file_operations cgroup_pidlist_operations = {
         .read = seq_read,
         .llseek = seq_lseek,
         .write = cgroup_file_write,
-       .release = cgroup_tasks_release,
+       .release = cgroup_pidlist_release,
  };
  
  /*
- * Handle an open on 'tasks' file.  Prepare an array containing the
- * process id's of tasks currently attached to the cgroup being opened.
+ * The following functions handle opens on a file that displays a pidlist
+ * (tasks or procs). Prepare an array of the process/thread IDs of whoever's
+ * in the cgroup.
   */
-
-static int cgroup_tasks_open(struct inode *unused, struct file *file)
+/* helper function for the two below it */
+static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
  {
         struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-       struct pid_namespace *ns = current->nsproxy->pid_ns;
-       struct cgroup_pids *cp;
-       pid_t *pidarray;
-       int npids;
+       struct cgroup_pidlist *l;
         int retval;
  
         /* Nothing to do for write-only files */
         if (!(file->f_mode & FMODE_READ))
                 return 0;
  
-       /*
-        * If cgroup gets more users after we read count, we won't have
-        * enough space - tough.  This race is indistinguishable to the
-        * caller from the case that the additional cgroup users didn't
-        * show up until sometime later on.
-        */
-       npids = cgroup_task_count(cgrp);
-       pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
-       if (!pidarray)
-               return -ENOMEM;
-       npids = pid_array_load(pidarray, npids, cgrp);
-       sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
-       /*
-        * Store the array in the cgroup, freeing the old
-        * array if necessary
-        */
-       down_write(&cgrp->pids_mutex);
-
-       list_for_each_entry(cp, &cgrp->pids_list, list) {
-               if (ns == cp->ns)
-                       goto found;
-       }
-
-       cp = kzalloc(sizeof(*cp), GFP_KERNEL);
-       if (!cp) {
-               up_write(&cgrp->pids_mutex);
-               kfree(pidarray);
-               return -ENOMEM;
-       }
-       cp->cgrp = cgrp;
-       cp->ns = ns;
-       get_pid_ns(ns);
-       list_add(&cp->list, &cgrp->pids_list);
-found:
-       kfree(cp->tasks_pids);
-       cp->tasks_pids = pidarray;
-       cp->length = npids;
-       cp->use_count++;
-       up_write(&cgrp->pids_mutex);
-
-       file->f_op = &cgroup_tasks_operations;
+       /* have the array populated */
+       retval = pidlist_array_load(cgrp, type, &l);
+       if (retval)
+               return retval;
+       /* configure file information */
+       file->f_op = &cgroup_pidlist_operations;
  
-       retval = seq_open(file, &cgroup_tasks_seq_operations);
+       retval = seq_open(file, &cgroup_pidlist_seq_operations);
         if (retval) {
-               release_cgroup_pid_array(cp);
+               cgroup_release_pid_array(l);
                 return retval;
         }
-       ((struct seq_file *)file->private_data)->private = cp;
+       ((struct seq_file *)file->private_data)->private = l;
         return 0;
  }
+static int cgroup_tasks_open(struct inode *unused, struct file *file)
+{
+       return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
+}
+static int cgroup_procs_open(struct inode *unused, struct file *file)
+{
+       return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
+}
  
  static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
                                             struct cftype *cft)
@@ -2449,21 +2794,27 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
  /*
   * for the common functions, 'private' gives the type of file
   */
+/* for hysterical raisins, we can't put this on the older files */
+#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
  static struct cftype files[] = {
         {
                 .name = "tasks",
                 .open = cgroup_tasks_open,
                 .write_u64 = cgroup_tasks_write,
-               .release = cgroup_tasks_release,
-               .private = FILE_TASKLIST,
+               .release = cgroup_pidlist_release,
                 .mode = S_IRUGO | S_IWUSR,
         },
-
+       {
+               .name = CGROUP_FILE_GENERIC_PREFIX "procs",
+               .open = cgroup_procs_open,
+               /* .write_u64 = cgroup_procs_write, TODO */
+               .release = cgroup_pidlist_release,
+               .mode = S_IRUGO,
+       },
         {
                 .name = "notify_on_release",
                 .read_u64 = cgroup_read_notify_on_release,
                 .write_u64 = cgroup_write_notify_on_release,
-               .private = FILE_NOTIFY_ON_RELEASE,
         },
  };
  
@@ -2472,7 +2823,6 @@ static struct cftype cft_release_agent = {
         .read_seq_string = cgroup_release_agent_show,
         .write_string = cgroup_release_agent_write,
         .max_write_len = PATH_MAX,
-       .private = FILE_RELEASE_AGENT,
  };
  
  static int cgroup_populate_dir(struct cgroup *cgrp)
@@ -2879,6 +3229,7 @@ int __init cgroup_init_early(void)
         init_task.cgroups = &init_css_set;
  
         init_css_set_link.cg = &init_css_set;
+       init_css_set_link.cgrp = dummytop;
         list_add(&init_css_set_link.cgrp_link_list,
                  &rootnode.top_cgroup.css_sets);
         list_add(&init_css_set_link.cg_link_list,
@@ -2933,7 +3284,7 @@ int __init cgroup_init(void)
         /* Add init_css_set to the hash table */
         hhead = css_set_hash(init_css_set.subsys);
         hlist_add_head(&init_css_set.hlist, hhead);
-
+       BUG_ON(!init_root_id(&rootnode));
         err = register_filesystem(&cgroup_fs_type);
         if (err < 0)
                 goto out;
@@ -2986,15 +3337,16 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
         for_each_active_root(root) {
                 struct cgroup_subsys *ss;
                 struct cgroup *cgrp;
-               int subsys_id;
                 int count = 0;
  
-               seq_printf(m, "%lu:", root->subsys_bits);
+               seq_printf(m, "%d:", root->hierarchy_id);
                 for_each_subsys(root, ss)
                         seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
+               if (strlen(root->name))
+                       seq_printf(m, "%sname=%s", count ? "," : "",
+                                  root->name);
                 seq_putc(m, ':');
-               get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
-               cgrp = task_cgroup(tsk, subsys_id);
+               cgrp = task_cgroup_from_root(tsk, root);
                 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
                 if (retval < 0)
                         goto out_unlock;
@@ -3033,8 +3385,8 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
         mutex_lock(&cgroup_mutex);
         for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
                 struct cgroup_subsys *ss = subsys[i];
-               seq_printf(m, "%s\t%lu\t%d\t%d\n",
-                          ss->name, ss->root->subsys_bits,
+               seq_printf(m, "%s\t%d\t%d\t%d\n",
+                          ss->name, ss->root->hierarchy_id,
                            ss->root->number_of_cgroups, !ss->disabled);
         }
         mutex_unlock(&cgroup_mutex);
@@ -3320,13 +3672,11 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
  {
         int ret;
         struct cgroup *target;
-       int subsys_id;
  
         if (cgrp == dummytop)
                 return 1;
  
-       get_first_subsys(cgrp, NULL, &subsys_id);
-       target = task_cgroup(task, subsys_id);
+       target = task_cgroup_from_root(task, cgrp->root);
         while (cgrp != target && cgrp!= cgrp->top_cgroup)
                 cgrp = cgrp->parent;
         ret = (cgrp == target);
@@ -3693,3 +4043,154 @@ css_get_next(struct cgroup_subsys *ss, int id,
         return ret;
  }
  
+#ifdef CONFIG_CGROUP_DEBUG
+static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
+                                                  struct cgroup *cont)
+{
+       struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
+
+       if (!css)
+               return ERR_PTR(-ENOMEM);
+
+       return css;
+}
+
+static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+       kfree(cont->subsys[debug_subsys_id]);
+}
+
+static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
+{
+       return atomic_read(&cont->count);
+}
+
+static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
+{
+       return cgroup_task_count(cont);
+}
+
+static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
+{
+       return (u64)(unsigned long)current->cgroups;
+}
+
+static u64 current_css_set_refcount_read(struct cgroup *cont,
+                                          struct cftype *cft)
+{
+       u64 count;
+
+       rcu_read_lock();
+       count = atomic_read(&current->cgroups->refcount);
+       rcu_read_unlock();
+       return count;
+}
+
+static int current_css_set_cg_links_read(struct cgroup *cont,
+                                        struct cftype *cft,
+                                        struct seq_file *seq)
+{
+       struct cg_cgroup_link *link;
+       struct css_set *cg;
+
+       read_lock(&css_set_lock);
+       rcu_read_lock();
+       cg = rcu_dereference(current->cgroups);
+       list_for_each_entry(link, &cg->cg_links, cg_link_list) {
+               struct cgroup *c = link->cgrp;
+               const char *name;
+
+               if (c->dentry)
+                       name = c->dentry->d_name.name;
+               else
+                       name = "?";
+               seq_printf(seq, "Root %d group %s\n",
+                          c->root->hierarchy_id, name);
+       }
+       rcu_read_unlock();
+       read_unlock(&css_set_lock);
+       return 0;
+}
+
+#define MAX_TASKS_SHOWN_PER_CSS 25
+static int cgroup_css_links_read(struct cgroup *cont,
+                                struct cftype *cft,
+                                struct seq_file *seq)
+{
+       struct cg_cgroup_link *link;
+
+       read_lock(&css_set_lock);
+       list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
+               struct css_set *cg = link->cg;
+               struct task_struct *task;
+               int count = 0;
+               seq_printf(seq, "css_set %p\n", cg);
+               list_for_each_entry(task, &cg->tasks, cg_list) {
+                       if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
+                               seq_puts(seq, "  ...\n");
+                               break;
+                       } else {
+                               seq_printf(seq, "  task %d\n",
+                                          task_pid_vnr(task));
+                       }
+               }
+       }
+       read_unlock(&css_set_lock);
+       return 0;
+}
+
+static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
+{
+       return test_bit(CGRP_RELEASABLE, &cgrp->flags);
+}
+
+static struct cftype debug_files[] =  {
+       {
+               .name = "cgroup_refcount",
+               .read_u64 = cgroup_refcount_read,
+       },
+       {
+               .name = "taskcount",
+               .read_u64 = debug_taskcount_read,
+       },
+
+       {
+               .name = "current_css_set",
+               .read_u64 = current_css_set_read,
+       },
+
+       {
+               .name = "current_css_set_refcount",
+               .read_u64 = current_css_set_refcount_read,
+       },
+
+       {
+               .name = "current_css_set_cg_links",
+               .read_seq_string = current_css_set_cg_links_read,
+       },
+
+       {
+               .name = "cgroup_css_links",
+               .read_seq_string = cgroup_css_links_read,
+       },
+
+       {
+               .name = "releasable",
+               .read_u64 = releasable_read,
+       },
+};
+
+static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+       return cgroup_add_files(cont, ss, debug_files,
+                               ARRAY_SIZE(debug_files));
+}
+
+struct cgroup_subsys debug_subsys = {
+       .name = "debug",
+       .create = debug_create,
+       .destroy = debug_destroy,
+       .populate = debug_populate,
+       .subsys_id = debug_subsys_id,
+};
+#endif /* CONFIG_CGROUP_DEBUG */
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c

deleted file mode 100644 (file)

index 0c92d79..0000000
--- a/kernel/cgroup_debug.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * kernel/cgroup_debug.c - Example cgroup subsystem that
- * exposes debug info
- *
- * Copyright (C) Google Inc, 2007
- *
- * Developed by Paul Menage (menage@google.com)
- *
- */
-
-#include <linux/cgroup.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/rcupdate.h>
-
-#include <asm/atomic.h>
-
-static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
-                                                  struct cgroup *cont)
-{
-       struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
-
-       if (!css)
-               return ERR_PTR(-ENOMEM);
-
-       return css;
-}
-
-static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
-{
-       kfree(cont->subsys[debug_subsys_id]);
-}
-
-static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
-{
-       return atomic_read(&cont->count);
-}
-
-static u64 taskcount_read(struct cgroup *cont, struct cftype *cft)
-{
-       u64 count;
-
-       count = cgroup_task_count(cont);
-       return count;
-}
-
-static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
-{
-       return (u64)(long)current->cgroups;
-}
-
-static u64 current_css_set_refcount_read(struct cgroup *cont,
-                                          struct cftype *cft)
-{
-       u64 count;
-
-       rcu_read_lock();
-       count = atomic_read(&current->cgroups->refcount);
-       rcu_read_unlock();
-       return count;
-}
-
-static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
-{
-       return test_bit(CGRP_RELEASABLE, &cgrp->flags);
-}
-
-static struct cftype files[] =  {
-       {
-               .name = "cgroup_refcount",
-               .read_u64 = cgroup_refcount_read,
-       },
-       {
-               .name = "taskcount",
-               .read_u64 = taskcount_read,
-       },
-
-       {
-               .name = "current_css_set",
-               .read_u64 = current_css_set_read,
-       },
-
-       {
-               .name = "current_css_set_refcount",
-               .read_u64 = current_css_set_refcount_read,
-       },
-
-       {
-               .name = "releasable",
-               .read_u64 = releasable_read,
-       },
-};
-
-static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
-{
-       return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
-}
-
-struct cgroup_subsys debug_subsys = {
-       .name = "debug",
-       .create = debug_create,
-       .destroy = debug_destroy,
-       .populate = debug_populate,
-       .subsys_id = debug_subsys_id,
-};
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c

index fb249e2bcada880a19105488c7fdde33e9483126..59e9ef6aab4002e1d99170f50156e733e8f46343 100644 (file)
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -159,7 +159,7 @@ static bool is_task_frozen_enough(struct task_struct *task)
   */
  static int freezer_can_attach(struct cgroup_subsys *ss,
                               struct cgroup *new_cgroup,
-                             struct task_struct *task)
+                             struct task_struct *task, bool threadgroup)
  {
         struct freezer *freezer;
  
@@ -177,6 +177,19 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
         if (freezer->state == CGROUP_FROZEN)
                 return -EBUSY;
  
+       if (threadgroup) {
+               struct task_struct *c;
+
+               rcu_read_lock();
+               list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+                       if (is_task_frozen_enough(c)) {
+                               rcu_read_unlock();
+                               return -EBUSY;
+                       }
+               }
+               rcu_read_unlock();
+       }
+
         return 0;
  }
  
diff --git a/kernel/cpuset.c b/kernel/cpuset.c

index 7e75a41bd50855caf3a33109bed11a52fa967f80..b5cb469d25456b03292d27e22ece4508ddba1ca2 100644 (file)
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1324,9 +1324,10 @@ static int fmeter_getrate(struct fmeter *fmp)
  static cpumask_var_t cpus_attach;
  
  /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys *ss,
-                            struct cgroup *cont, struct task_struct *tsk)
+static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+                            struct task_struct *tsk, bool threadgroup)
  {
+       int ret;
         struct cpuset *cs = cgroup_cs(cont);
  
         if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1343,18 +1344,51 @@ static int cpuset_can_attach(struct cgroup_subsys *ss,
         if (tsk->flags & PF_THREAD_BOUND)
                 return -EINVAL;
  
-       return security_task_setscheduler(tsk, 0, NULL);
+       ret = security_task_setscheduler(tsk, 0, NULL);
+       if (ret)
+               return ret;
+       if (threadgroup) {
+               struct task_struct *c;
+
+               rcu_read_lock();
+               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+                       ret = security_task_setscheduler(c, 0, NULL);
+                       if (ret) {
+                               rcu_read_unlock();
+                               return ret;
+                       }
+               }
+               rcu_read_unlock();
+       }
+       return 0;
+}
+
+static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
+                              struct cpuset *cs)
+{
+       int err;
+       /*
+        * can_attach beforehand should guarantee that this doesn't fail.
+        * TODO: have a better way to handle failure here
+        */
+       err = set_cpus_allowed_ptr(tsk, cpus_attach);
+       WARN_ON_ONCE(err);
+
+       task_lock(tsk);
+       cpuset_change_task_nodemask(tsk, to);
+       task_unlock(tsk);
+       cpuset_update_task_spread_flag(cs, tsk);
+
  }
  
-static void cpuset_attach(struct cgroup_subsys *ss,
-                         struct cgroup *cont, struct cgroup *oldcont,
-                         struct task_struct *tsk)
+static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+                         struct cgroup *oldcont, struct task_struct *tsk,
+                         bool threadgroup)
  {
         nodemask_t from, to;
         struct mm_struct *mm;
         struct cpuset *cs = cgroup_cs(cont);
         struct cpuset *oldcs = cgroup_cs(oldcont);
-       int err;
  
         if (cs == &top_cpuset) {
                 cpumask_copy(cpus_attach, cpu_possible_mask);
@@ -1363,15 +1397,19 @@ static void cpuset_attach(struct cgroup_subsys *ss,
                 guarantee_online_cpus(cs, cpus_attach);
                 guarantee_online_mems(cs, &to);
         }
-       err = set_cpus_allowed_ptr(tsk, cpus_attach);
-       if (err)
-               return;
  
-       task_lock(tsk);
-       cpuset_change_task_nodemask(tsk, &to);
-       task_unlock(tsk);
-       cpuset_update_task_spread_flag(cs, tsk);
+       /* do per-task migration stuff possibly for each in the threadgroup */
+       cpuset_attach_task(tsk, &to, cs);
+       if (threadgroup) {
+               struct task_struct *c;
+               rcu_read_lock();
+               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+                       cpuset_attach_task(c, &to, cs);
+               }
+               rcu_read_unlock();
+       }
  
+       /* change mm; only needs to be done once even if threadgroup */
         from = oldcs->mems_allowed;
         to = cs->mems_allowed;
         mm = get_task_mm(tsk);
diff --git a/kernel/cred.c b/kernel/cred.c

index d7f7a01082eb507ff78605a18e1f9faa79b20e61..dd76cfe5f5b038dbd4fe804a5bfbd6663dd634ce 100644 (file)
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -782,6 +782,25 @@ EXPORT_SYMBOL(set_create_files_as);
  
  #ifdef CONFIG_DEBUG_CREDENTIALS
  
+bool creds_are_invalid(const struct cred *cred)
+{
+       if (cred->magic != CRED_MAGIC)
+               return true;
+       if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
+               return true;
+#ifdef CONFIG_SECURITY_SELINUX
+       if (selinux_is_enabled()) {
+               if ((unsigned long) cred->security < PAGE_SIZE)
+                       return true;
+               if ((*(u32 *)cred->security & 0xffffff00) ==
+                   (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
+                       return true;
+       }
+#endif
+       return false;
+}
+EXPORT_SYMBOL(creds_are_invalid);
+
  /*
   * dump invalid credentials
   */
diff --git a/kernel/exit.c b/kernel/exit.c

index 60d6fdcc926509da90ebe38b4e59c275290e8d9f..5859f598c951bde881a9e7ce1c29d19fd66b3463 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -976,8 +976,6 @@ NORET_TYPE void do_exit(long code)
                 disassociate_ctty(1);
  
         module_put(task_thread_info(tsk)->exec_domain->module);
-       if (tsk->binfmt)
-               module_put(tsk->binfmt->module);
  
         proc_exit_connector(tsk);
  
@@ -1097,28 +1095,28 @@ struct wait_opts {
         int __user              *wo_stat;
         struct rusage __user    *wo_rusage;
  
+       wait_queue_t            child_wait;
         int                     notask_error;
  };
  
-static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
+static inline
+struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
  {
-       struct pid *pid = NULL;
-       if (type == PIDTYPE_PID)
-               pid = task->pids[type].pid;
-       else if (type < PIDTYPE_MAX)
-               pid = task->group_leader->pids[type].pid;
-       return pid;
+       if (type != PIDTYPE_PID)
+               task = task->group_leader;
+       return task->pids[type].pid;
  }
  
-static int eligible_child(struct wait_opts *wo, struct task_struct *p)
+static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
  {
-       int err;
-
-       if (wo->wo_type < PIDTYPE_MAX) {
-               if (task_pid_type(p, wo->wo_type) != wo->wo_pid)
-                       return 0;
-       }
+       return  wo->wo_type == PIDTYPE_MAX ||
+               task_pid_type(p, wo->wo_type) == wo->wo_pid;
+}
  
+static int eligible_child(struct wait_opts *wo, struct task_struct *p)
+{
+       if (!eligible_pid(wo, p))
+               return 0;
         /* Wait for all children (clone and not) if __WALL is set;
          * otherwise, wait for clone children *only* if __WCLONE is
          * set; otherwise, wait for non-clone children *only*.  (Note:
@@ -1128,10 +1126,6 @@ static int eligible_child(struct wait_opts *wo, struct task_struct *p)
             && !(wo->wo_flags & __WALL))
                 return 0;
  
-       err = security_task_wait(p);
-       if (err)
-               return err;
-
         return 1;
  }
  
@@ -1144,18 +1138,20 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
  
         put_task_struct(p);
         infop = wo->wo_info;
-       if (!retval)
-               retval = put_user(SIGCHLD, &infop->si_signo);
-       if (!retval)
-               retval = put_user(0, &infop->si_errno);
-       if (!retval)
-               retval = put_user((short)why, &infop->si_code);
-       if (!retval)
-               retval = put_user(pid, &infop->si_pid);
-       if (!retval)
-               retval = put_user(uid, &infop->si_uid);
-       if (!retval)
-               retval = put_user(status, &infop->si_status);
+       if (infop) {
+               if (!retval)
+                       retval = put_user(SIGCHLD, &infop->si_signo);
+               if (!retval)
+                       retval = put_user(0, &infop->si_errno);
+               if (!retval)
+                       retval = put_user((short)why, &infop->si_code);
+               if (!retval)
+                       retval = put_user(pid, &infop->si_pid);
+               if (!retval)
+                       retval = put_user(uid, &infop->si_uid);
+               if (!retval)
+                       retval = put_user(status, &infop->si_status);
+       }
         if (!retval)
                 retval = pid;
         return retval;
@@ -1485,13 +1481,14 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
   * then ->notask_error is 0 if @p is an eligible child,
   * or another error from security_task_wait(), or still -ECHILD.
   */
-static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent,
-                               int ptrace, struct task_struct *p)
+static int wait_consider_task(struct wait_opts *wo, int ptrace,
+                               struct task_struct *p)
  {
         int ret = eligible_child(wo, p);
         if (!ret)
                 return ret;
  
+       ret = security_task_wait(p);
         if (unlikely(ret < 0)) {
                 /*
                  * If we have not yet seen any eligible child,
@@ -1553,7 +1550,7 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
                  * Do not consider detached threads.
                  */
                 if (!task_detached(p)) {
-                       int ret = wait_consider_task(wo, tsk, 0, p);
+                       int ret = wait_consider_task(wo, 0, p);
                         if (ret)
                                 return ret;
                 }
@@ -1567,7 +1564,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
         struct task_struct *p;
  
         list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
-               int ret = wait_consider_task(wo, tsk, 1, p);
+               int ret = wait_consider_task(wo, 1, p);
                 if (ret)
                         return ret;
         }
@@ -1575,15 +1572,38 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
         return 0;
  }
  
+static int child_wait_callback(wait_queue_t *wait, unsigned mode,
+                               int sync, void *key)
+{
+       struct wait_opts *wo = container_of(wait, struct wait_opts,
+                                               child_wait);
+       struct task_struct *p = key;
+
+       if (!eligible_pid(wo, p))
+               return 0;
+
+       if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
+               return 0;
+
+       return default_wake_function(wait, mode, sync, key);
+}
+
+void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
+{
+       __wake_up_sync_key(&parent->signal->wait_chldexit,
+                               TASK_INTERRUPTIBLE, 1, p);
+}
+
  static long do_wait(struct wait_opts *wo)
  {
-       DECLARE_WAITQUEUE(wait, current);
         struct task_struct *tsk;
         int retval;
  
         trace_sched_process_wait(wo->wo_pid);
  
-       add_wait_queue(&current->signal->wait_chldexit,&wait);
+       init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
+       wo->child_wait.private = current;
+       add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
  repeat:
         /*
          * If there is nothing that can match our critiera just get out.
@@ -1624,32 +1644,7 @@ notask:
         }
  end:
         __set_current_state(TASK_RUNNING);
-       remove_wait_queue(&current->signal->wait_chldexit,&wait);
-       if (wo->wo_info) {
-               struct siginfo __user *infop = wo->wo_info;
-
-               if (retval > 0)
-                       retval = 0;
-               else {
-                       /*
-                        * For a WNOHANG return, clear out all the fields
-                        * we would set so the user can easily tell the
-                        * difference.
-                        */
-                       if (!retval)
-                               retval = put_user(0, &infop->si_signo);
-                       if (!retval)
-                               retval = put_user(0, &infop->si_errno);
-                       if (!retval)
-                               retval = put_user(0, &infop->si_code);
-                       if (!retval)
-                               retval = put_user(0, &infop->si_pid);
-                       if (!retval)
-                               retval = put_user(0, &infop->si_uid);
-                       if (!retval)
-                               retval = put_user(0, &infop->si_status);
-               }
-       }
+       remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
         return retval;
  }
  
@@ -1694,6 +1689,29 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
         wo.wo_stat      = NULL;
         wo.wo_rusage    = ru;
         ret = do_wait(&wo);
+
+       if (ret > 0) {
+               ret = 0;
+       } else if (infop) {
+               /*
+                * For a WNOHANG return, clear out all the fields
+                * we would set so the user can easily tell the
+                * difference.
+                */
+               if (!ret)
+                       ret = put_user(0, &infop->si_signo);
+               if (!ret)
+                       ret = put_user(0, &infop->si_errno);
+               if (!ret)
+                       ret = put_user(0, &infop->si_code);
+               if (!ret)
+                       ret = put_user(0, &infop->si_pid);
+               if (!ret)
+                       ret = put_user(0, &infop->si_uid);
+               if (!ret)
+                       ret = put_user(0, &infop->si_status);
+       }
+
         put_pid(pid);
  
         /* avoid REGPARM breakage on x86: */
diff --git a/kernel/fork.c b/kernel/fork.c

index 51ad0b0b72664ad327c042c33272e90630ec932b..266c6af6ef1b089a1c64ee96428bd153db39217c 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -434,6 +434,14 @@ __setup("coredump_filter=", coredump_filter_setup);
  
  #include <linux/init_task.h>
  
+static void mm_init_aio(struct mm_struct *mm)
+{
+#ifdef CONFIG_AIO
+       spin_lock_init(&mm->ioctx_lock);
+       INIT_HLIST_HEAD(&mm->ioctx_list);
+#endif
+}
+
  static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
  {
         atomic_set(&mm->mm_users, 1);
@@ -447,10 +455,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
         set_mm_counter(mm, file_rss, 0);
         set_mm_counter(mm, anon_rss, 0);
         spin_lock_init(&mm->page_table_lock);
-       spin_lock_init(&mm->ioctx_lock);
-       INIT_HLIST_HEAD(&mm->ioctx_list);
         mm->free_area_cache = TASK_UNMAPPED_BASE;
         mm->cached_hole_size = ~0UL;
+       mm_init_aio(mm);
         mm_init_owner(mm, p);
  
         if (likely(!mm_alloc_pgd(mm))) {
@@ -511,6 +518,8 @@ void mmput(struct mm_struct *mm)
                         spin_unlock(&mmlist_lock);
                 }
                 put_swap_token(mm);
+               if (mm->binfmt)
+                       module_put(mm->binfmt->module);
                 mmdrop(mm);
         }
  }
@@ -636,9 +645,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
         mm->hiwater_rss = get_mm_rss(mm);
         mm->hiwater_vm = mm->total_vm;
  
+       if (mm->binfmt && !try_module_get(mm->binfmt->module))
+               goto free_pt;
+
         return mm;
  
  free_pt:
+       /* don't put binfmt in mmput, we haven't got module yet */
+       mm->binfmt = NULL;
         mmput(mm);
  
  fail_nomem:
@@ -979,6 +993,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
                 return ERR_PTR(-EINVAL);
  
+       /*
+        * Siblings of global init remain as zombies on exit since they are
+        * not reaped by their parent (swapper). To solve this and to avoid
+        * multi-rooted process trees, prevent global and container-inits
+        * from creating siblings.
+        */
+       if ((clone_flags & CLONE_PARENT) &&
+                               current->signal->flags & SIGNAL_UNKILLABLE)
+               return ERR_PTR(-EINVAL);
+
         retval = security_task_create(clone_flags);
         if (retval)
                 goto fork_out;
@@ -1020,9 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         if (!try_module_get(task_thread_info(p)->exec_domain->module))
                 goto bad_fork_cleanup_count;
  
-       if (p->binfmt && !try_module_get(p->binfmt->module))
-               goto bad_fork_cleanup_put_domain;
-
         p->did_exec = 0;
         delayacct_tsk_init(p);  /* Must remain after dup_task_struct() */
         copy_flags(clone_flags, p);
@@ -1310,9 +1331,6 @@ bad_fork_cleanup_cgroup:
  #endif
         cgroup_exit(p, cgroup_callbacks_done);
         delayacct_tsk_free(p);
-       if (p->binfmt)
-               module_put(p->binfmt->module);
-bad_fork_cleanup_put_domain:
         module_put(task_thread_info(p)->exec_domain->module);
  bad_fork_cleanup_count:
         atomic_dec(&p->cred->user->processes);
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig

index 654efd09f6a97cb6497d18be2d303364539ac7a9..70a298d6da712629f37e8a2e52b8edf4e19a8f36 100644 (file)
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -34,7 +34,7 @@ config GCOV_KERNEL
  config GCOV_PROFILE_ALL
         bool "Profile entire Kernel"
         depends on GCOV_KERNEL
-       depends on S390 || X86 || (PPC && EXPERIMENTAL)
+       depends on S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE
         default n
         ---help---
         This options activates profiling for the entire kernel.
diff --git a/kernel/hung_task.c b/kernel/hung_task.c

index 022a4927b78539f5e123558d13e6c012d0188ead..d4e84174740018f30bdf8aa02d62e8799676a938 100644 (file)
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -171,12 +171,12 @@ static unsigned long timeout_jiffies(unsigned long timeout)
   * Process updating of timeout sysctl
   */
  int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-                                 struct file *filp, void __user *buffer,
+                                 void __user *buffer,
                                   size_t *lenp, loff_t *ppos)
  {
         int ret;
  
-       ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+       ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
  
         if (ret || !write)
                 goto out;
diff --git a/kernel/kmod.c b/kernel/kmod.c

index 689d20f39305be5d72908dbdf85bdb187051bdcb..9fcb53a11f872e958c95bcf37f8558e9bb148d77 100644 (file)
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -143,7 +143,6 @@ struct subprocess_info {
  static int ____call_usermodehelper(void *data)
  {
         struct subprocess_info *sub_info = data;
-       enum umh_wait wait = sub_info->wait;
         int retval;
  
         BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
@@ -185,14 +184,10 @@ static int ____call_usermodehelper(void *data)
          */
         set_user_nice(current, 0);
  
-       if (wait == UMH_WAIT_EXEC)
-               complete(sub_info->complete);
-
         retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp);
  
         /* Exec failed? */
-       if (wait != UMH_WAIT_EXEC)
-               sub_info->retval = retval;
+       sub_info->retval = retval;
         do_exit(0);
  }
  
@@ -271,14 +266,16 @@ static void __call_usermodehelper(struct work_struct *work)
  
         switch (wait) {
         case UMH_NO_WAIT:
-       case UMH_WAIT_EXEC:
                 break;
  
         case UMH_WAIT_PROC:
                 if (pid > 0)
                         break;
                 sub_info->retval = pid;
-               break;
+               /* FALLTHROUGH */
+
+       case UMH_WAIT_EXEC:
+               complete(sub_info->complete);
         }
  }
  
diff --git a/kernel/module.c b/kernel/module.c

index e6bc4b28aa6250aec9f281aa69509221f06d59b5..5a29397ca4b6f1633e91b8b94cfdac14f2071ff9 100644 (file)
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1797,6 +1797,17 @@ static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs,
         }
  }
  
+static void free_modinfo(struct module *mod)
+{
+       struct module_attribute *attr;
+       int i;
+
+       for (i = 0; (attr = modinfo_attrs[i]); i++) {
+               if (attr->free)
+                       attr->free(mod);
+       }
+}
+
  #ifdef CONFIG_KALLSYMS
  
  /* lookup symbol in given range of kernel_symbols */
@@ -1862,13 +1873,93 @@ static char elf_type(const Elf_Sym *sym,
         return '?';
  }
  
+static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
+                           unsigned int shnum)
+{
+       const Elf_Shdr *sec;
+
+       if (src->st_shndx == SHN_UNDEF
+           || src->st_shndx >= shnum
+           || !src->st_name)
+               return false;
+
+       sec = sechdrs + src->st_shndx;
+       if (!(sec->sh_flags & SHF_ALLOC)
+#ifndef CONFIG_KALLSYMS_ALL
+           || !(sec->sh_flags & SHF_EXECINSTR)
+#endif
+           || (sec->sh_entsize & INIT_OFFSET_MASK))
+               return false;
+
+       return true;
+}
+
+static unsigned long layout_symtab(struct module *mod,
+                                  Elf_Shdr *sechdrs,
+                                  unsigned int symindex,
+                                  unsigned int strindex,
+                                  const Elf_Ehdr *hdr,
+                                  const char *secstrings,
+                                  unsigned long *pstroffs,
+                                  unsigned long *strmap)
+{
+       unsigned long symoffs;
+       Elf_Shdr *symsect = sechdrs + symindex;
+       Elf_Shdr *strsect = sechdrs + strindex;
+       const Elf_Sym *src;
+       const char *strtab;
+       unsigned int i, nsrc, ndst;
+
+       /* Put symbol section at end of init part of module. */
+       symsect->sh_flags |= SHF_ALLOC;
+       symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
+                                        symindex) | INIT_OFFSET_MASK;
+       DEBUGP("\t%s\n", secstrings + symsect->sh_name);
+
+       src = (void *)hdr + symsect->sh_offset;
+       nsrc = symsect->sh_size / sizeof(*src);
+       strtab = (void *)hdr + strsect->sh_offset;
+       for (ndst = i = 1; i < nsrc; ++i, ++src)
+               if (is_core_symbol(src, sechdrs, hdr->e_shnum)) {
+                       unsigned int j = src->st_name;
+
+                       while(!__test_and_set_bit(j, strmap) && strtab[j])
+                               ++j;
+                       ++ndst;
+               }
+
+       /* Append room for core symbols at end of core part. */
+       symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
+       mod->core_size = symoffs + ndst * sizeof(Elf_Sym);
+
+       /* Put string table section at end of init part of module. */
+       strsect->sh_flags |= SHF_ALLOC;
+       strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
+                                        strindex) | INIT_OFFSET_MASK;
+       DEBUGP("\t%s\n", secstrings + strsect->sh_name);
+
+       /* Append room for core symbols' strings at end of core part. */
+       *pstroffs = mod->core_size;
+       __set_bit(0, strmap);
+       mod->core_size += bitmap_weight(strmap, strsect->sh_size);
+
+       return symoffs;
+}
+
  static void add_kallsyms(struct module *mod,
                          Elf_Shdr *sechdrs,
+                        unsigned int shnum,
                          unsigned int symindex,
                          unsigned int strindex,
-                        const char *secstrings)
+                        unsigned long symoffs,
+                        unsigned long stroffs,
+                        const char *secstrings,
+                        unsigned long *strmap)
  {
-       unsigned int i;
+       unsigned int i, ndst;
+       const Elf_Sym *src;
+       Elf_Sym *dst;
+       char *s;
  
         mod->symtab = (void *)sechdrs[symindex].sh_addr;
         mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
@@ -1878,13 +1969,44 @@ static void add_kallsyms(struct module *mod,
         for (i = 0; i < mod->num_symtab; i++)
                 mod->symtab[i].st_info
                         = elf_type(&mod->symtab[i], sechdrs, secstrings, mod);
+
+       mod->core_symtab = dst = mod->module_core + symoffs;
+       src = mod->symtab;
+       *dst = *src;
+       for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
+               if (!is_core_symbol(src, sechdrs, shnum))
+                       continue;
+               dst[ndst] = *src;
+               dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name);
+               ++ndst;
+       }
+       mod->core_num_syms = ndst;
+
+       mod->core_strtab = s = mod->module_core + stroffs;
+       for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i)
+               if (test_bit(i, strmap))
+                       *++s = mod->strtab[i];
  }
  #else
+static inline unsigned long layout_symtab(struct module *mod,
+                                         Elf_Shdr *sechdrs,
+                                         unsigned int symindex,
+                                         unsigned int strindex,
+                                         const Elf_Hdr *hdr,
+                                         const char *secstrings,
+                                         unsigned long *pstroffs,
+                                         unsigned long *strmap)
+{
+}
  static inline void add_kallsyms(struct module *mod,
                                 Elf_Shdr *sechdrs,
+                               unsigned int shnum,
                                 unsigned int symindex,
                                 unsigned int strindex,
-                               const char *secstrings)
+                               unsigned long symoffs,
+                               unsigned long stroffs,
+                               const char *secstrings,
+                               const unsigned long *strmap)
  {
  }
  #endif /* CONFIG_KALLSYMS */
@@ -1959,6 +2081,9 @@ static noinline struct module *load_module(void __user *umod,
         struct module *mod;
         long err = 0;
         void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+#ifdef CONFIG_KALLSYMS
+       unsigned long symoffs, stroffs, *strmap;
+#endif
         mm_segment_t old_fs;
  
         DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2040,11 +2165,6 @@ static noinline struct module *load_module(void __user *umod,
         /* Don't keep modinfo and version sections. */
         sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
         sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
-#ifdef CONFIG_KALLSYMS
-       /* Keep symbol and string tables for decoding later. */
-       sechdrs[symindex].sh_flags |= SHF_ALLOC;
-       sechdrs[strindex].sh_flags |= SHF_ALLOC;
-#endif
  
         /* Check module struct version now, before we try to use module. */
         if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -2080,6 +2200,13 @@ static noinline struct module *load_module(void __user *umod,
                 goto free_hdr;
         }
  
+       strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size)
+                        * sizeof(long), GFP_KERNEL);
+       if (!strmap) {
+               err = -ENOMEM;
+               goto free_mod;
+       }
+
         if (find_module(mod->name)) {
                 err = -EEXIST;
                 goto free_mod;
@@ -2109,6 +2236,8 @@ static noinline struct module *load_module(void __user *umod,
            this is done generically; there doesn't appear to be any
            special cases for the architectures. */
         layout_sections(mod, hdr, sechdrs, secstrings);
+       symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr,
+                               secstrings, &stroffs, strmap);
  
         /* Do the allocs. */
         ptr = module_alloc_update_bounds(mod->core_size);
@@ -2313,7 +2442,10 @@ static noinline struct module *load_module(void __user *umod,
         percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
                        sechdrs[pcpuindex].sh_size);
  
-       add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+       add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
+                    symoffs, stroffs, secstrings, strmap);
+       kfree(strmap);
+       strmap = NULL;
  
         if (!mod->taints) {
                 struct _ddebug *debug;
@@ -2385,13 +2517,14 @@ static noinline struct module *load_module(void __user *umod,
         synchronize_sched();
         module_arch_cleanup(mod);
   cleanup:
+       free_modinfo(mod);
         kobject_del(&mod->mkobj.kobj);
         kobject_put(&mod->mkobj.kobj);
   free_unload:
         module_unload_free(mod);
  #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
- free_init:
         percpu_modfree(mod->refptr);
+ free_init:
  #endif
         module_free(mod, mod->module_init);
   free_core:
@@ -2402,6 +2535,7 @@ static noinline struct module *load_module(void __user *umod,
                 percpu_modfree(percpu);
   free_mod:
         kfree(args);
+       kfree(strmap);
   free_hdr:
         vfree(hdr);
         return ERR_PTR(err);
@@ -2491,6 +2625,11 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
         /* Drop initial reference. */
         module_put(mod);
         trim_init_extable(mod);
+#ifdef CONFIG_KALLSYMS
+       mod->num_symtab = mod->core_num_syms;
+       mod->symtab = mod->core_symtab;
+       mod->strtab = mod->core_strtab;
+#endif
         module_free(mod, mod->module_init);
         mod->module_init = NULL;
         mod->init_size = 0;
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c

index 5aa854f9e5ae0cae90d37d7594d265cdbab6fc01..2a5dfec8efe0504fc974a9500e934c51b78a5207 100644 (file)
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -42,8 +42,8 @@ int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
   *       (hence either you are in the same cgroup as task, or in an
   *        ancestor cgroup thereof)
   */
-static int ns_can_attach(struct cgroup_subsys *ss,
-               struct cgroup *new_cgroup, struct task_struct *task)
+static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup,
+                        struct task_struct *task, bool threadgroup)
  {
         if (current != task) {
                 if (!capable(CAP_SYS_ADMIN))
@@ -56,6 +56,18 @@ static int ns_can_attach(struct cgroup_subsys *ss,
         if (!cgroup_is_descendant(new_cgroup, task))
                 return -EPERM;
  
+       if (threadgroup) {
+               struct task_struct *c;
+               rcu_read_lock();
+               list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+                       if (!cgroup_is_descendant(new_cgroup, c)) {
+                               rcu_read_unlock();
+                               return -EPERM;
+                       }
+               }
+               rcu_read_unlock();
+       }
+
         return 0;
  }
  
diff --git a/kernel/params.c b/kernel/params.c

index 7f6912ced2bab2c957da0ef3edc6e70625560837..9da58eabdcb246dd3b156d7799ca49bf2799a304 100644 (file)
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -23,6 +23,7 @@
  #include <linux/device.h>
  #include <linux/err.h>
  #include <linux/slab.h>
+#include <linux/ctype.h>
  
  #if 0
  #define DEBUGP printk
@@ -87,7 +88,7 @@ static char *next_arg(char *args, char **param, char **val)
         }
  
         for (i = 0; args[i]; i++) {
-               if (args[i] == ' ' && !in_quote)
+               if (isspace(args[i]) && !in_quote)
                         break;
                 if (equals == 0) {
                         if (args[i] == '=')
@@ -121,7 +122,7 @@ static char *next_arg(char *args, char **param, char **val)
                 next = args + i;
  
         /* Chew up trailing spaces. */
-       while (*next == ' ')
+       while (isspace(*next))
                 next++;
         return next;
  }
@@ -138,7 +139,7 @@ int parse_args(const char *name,
         DEBUGP("Parsing ARGS: %s\n", args);
  
         /* Chew leading spaces */
-       while (*args == ' ')
+       while (isspace(*args))
                 args++;
  
         while (*args) {
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c

index 821722ae58a732760f62a00912554e3779fc2d04..86b3796b0436726e5b0208021cc990bfdc7044e2 100644 (file)
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -118,7 +118,7 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old
  {
         if (!(flags & CLONE_NEWPID))
                 return get_pid_ns(old_ns);
-       if (flags & CLONE_THREAD)
+       if (flags & (CLONE_THREAD|CLONE_PARENT))
                 return ERR_PTR(-EINVAL);
         return create_pid_namespace(old_ns);
  }
diff --git a/kernel/power/swap.c b/kernel/power/swap.c

index 8ba052c86d487e07e71d40c81cd8c783e3d931a9..b101cdc4df3f644d1ffb170861bc9c971a9adb72 100644 (file)
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -13,7 +13,6 @@
  
  #include <linux/module.h>
  #include <linux/file.h>
-#include <linux/utsname.h>
  #include <linux/delay.h>
  #include <linux/bitops.h>
  #include <linux/genhd.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c

index 307c285af59e89141412181d547f0fca7d4c7073..23bd09cd042ea9e6987763bb4261a331de24e4a7 100644 (file)
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -266,9 +266,10 @@ static int ignoring_children(struct sighand_struct *sigh)
   * or self-reaping.  Do notification now if it would have happened earlier.
   * If it should reap itself, return true.
   *
- * If it's our own child, there is no notification to do.
- * But if our normal children self-reap, then this child
- * was prevented by ptrace and we must reap it now.
+ * If it's our own child, there is no notification to do. But if our normal
+ * children self-reap, then this child was prevented by ptrace and we must
+ * reap it now, in that case we must also wake up sub-threads sleeping in
+ * do_wait().
   */
  static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
  {
@@ -278,8 +279,10 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
                 if (!task_detached(p) && thread_group_empty(p)) {
                         if (!same_thread_group(p->real_parent, tracer))
                                 do_notify_parent(p, p->exit_signal);
-                       else if (ignoring_children(tracer->sighand))
+                       else if (ignoring_children(tracer->sighand)) {
+                               __wake_up_parent(p, tracer);
                                 p->exit_signal = -1;
+                       }
                 }
                 if (task_detached(p)) {
                         /* Mark it as in the process of being reaped. */
diff --git a/kernel/res_counter.c b/kernel/res_counter.c

index e1338f074314d7e85fca035ac4ab55f20704b8cd..88faec23e83301e5b64490318aba2ac722dfb00c 100644 (file)
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -19,6 +19,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent)
  {
         spin_lock_init(&counter->lock);
         counter->limit = RESOURCE_MAX;
+       counter->soft_limit = RESOURCE_MAX;
         counter->parent = parent;
  }
  
@@ -36,17 +37,27 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
  }
  
  int res_counter_charge(struct res_counter *counter, unsigned long val,
-                       struct res_counter **limit_fail_at)
+                       struct res_counter **limit_fail_at,
+                       struct res_counter **soft_limit_fail_at)
  {
         int ret;
         unsigned long flags;
         struct res_counter *c, *u;
  
         *limit_fail_at = NULL;
+       if (soft_limit_fail_at)
+               *soft_limit_fail_at = NULL;
         local_irq_save(flags);
         for (c = counter; c != NULL; c = c->parent) {
                 spin_lock(&c->lock);
                 ret = res_counter_charge_locked(c, val);
+               /*
+                * With soft limits, we return the highest ancestor
+                * that exceeds its soft limit
+                */
+               if (soft_limit_fail_at &&
+                       !res_counter_soft_limit_check_locked(c))
+                       *soft_limit_fail_at = c;
                 spin_unlock(&c->lock);
                 if (ret < 0) {
                         *limit_fail_at = c;
@@ -74,7 +85,8 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
         counter->usage -= val;
  }
  
-void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+void res_counter_uncharge(struct res_counter *counter, unsigned long val,
+                               bool *was_soft_limit_excess)
  {
         unsigned long flags;
         struct res_counter *c;
@@ -82,6 +94,9 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val)
         local_irq_save(flags);
         for (c = counter; c != NULL; c = c->parent) {
                 spin_lock(&c->lock);
+               if (was_soft_limit_excess)
+                       *was_soft_limit_excess =
+                               !res_counter_soft_limit_check_locked(c);
                 res_counter_uncharge_locked(c, val);
                 spin_unlock(&c->lock);
         }
@@ -101,6 +116,8 @@ res_counter_member(struct res_counter *counter, int member)
                 return &counter->limit;
         case RES_FAILCNT:
                 return &counter->failcnt;
+       case RES_SOFT_LIMIT:
+               return &counter->soft_limit;
         };
  
         BUG();
diff --git a/kernel/sched.c b/kernel/sched.c

index 2f76e06bea583d28e95d0466f35eb41d569b4986..ee61f454a98b9005ed7055b6f3e4bba5f2036ebe 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -10312,7 +10312,7 @@ static int sched_rt_global_constraints(void)
  #endif /* CONFIG_RT_GROUP_SCHED */
  
  int sched_rt_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                 loff_t *ppos)
  {
         int ret;
@@ -10323,7 +10323,7 @@ int sched_rt_handler(struct ctl_table *table, int write,
         old_period = sysctl_sched_rt_period;
         old_runtime = sysctl_sched_rt_runtime;
  
-       ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
  
         if (!ret && write) {
                 ret = sched_rt_global_constraints();
@@ -10377,8 +10377,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
  }
  
  static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                     struct task_struct *tsk)
+cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
  {
  #ifdef CONFIG_RT_GROUP_SCHED
         if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
@@ -10388,15 +10387,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
         if (tsk->sched_class != &fair_sched_class)
                 return -EINVAL;
  #endif
+       return 0;
+}
  
+static int
+cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+                     struct task_struct *tsk, bool threadgroup)
+{
+       int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
+       if (retval)
+               return retval;
+       if (threadgroup) {
+               struct task_struct *c;
+               rcu_read_lock();
+               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+                       retval = cpu_cgroup_can_attach_task(cgrp, c);
+                       if (retval) {
+                               rcu_read_unlock();
+                               return retval;
+                       }
+               }
+               rcu_read_unlock();
+       }
         return 0;
  }
  
  static void
  cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                       struct cgroup *old_cont, struct task_struct *tsk)
+                 struct cgroup *old_cont, struct task_struct *tsk,
+                 bool threadgroup)
  {
         sched_move_task(tsk);
+       if (threadgroup) {
+               struct task_struct *c;
+               rcu_read_lock();
+               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+                       sched_move_task(c);
+               }
+               rcu_read_unlock();
+       }
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index ecc637a0d591b74ced8c17eea315631296b0b205..4e777b47eedac1f5f779c002091e2dc8b696abe1 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
  
  #ifdef CONFIG_SCHED_DEBUG
  int sched_nr_latency_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                 loff_t *ppos)
  {
-       int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  
         if (ret || !write)
                 return ret;
diff --git a/kernel/signal.c b/kernel/signal.c

index 64c5deeaca5d9a70f64f391bc73fde46e300bff1..6705320784fd2b07a518c4d2bb47520d0b342aa8 100644 (file)
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -705,7 +705,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
  
                 if (why) {
                         /*
-                        * The first thread which returns from finish_stop()
+                        * The first thread which returns from do_signal_stop()
                          * will take ->siglock, notice SIGNAL_CLD_MASK, and
                          * notify its parent. See get_signal_to_deliver().
                          */
@@ -971,6 +971,20 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
         return send_signal(sig, info, t, 0);
  }
  
+int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
+                       bool group)
+{
+       unsigned long flags;
+       int ret = -ESRCH;
+
+       if (lock_task_sighand(p, &flags)) {
+               ret = send_signal(sig, info, p, group);
+               unlock_task_sighand(p, &flags);
+       }
+
+       return ret;
+}
+
  /*
   * Force a signal that the process can't ignore: if necessary
   * we unblock the signal and change any SIG_IGN to SIG_DFL.
@@ -1036,12 +1050,6 @@ void zap_other_threads(struct task_struct *p)
         }
  }
  
-int __fatal_signal_pending(struct task_struct *tsk)
-{
-       return sigismember(&tsk->pending.signal, SIGKILL);
-}
-EXPORT_SYMBOL(__fatal_signal_pending);
-
  struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
  {
         struct sighand_struct *sighand;
@@ -1068,18 +1076,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long
   */
  int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
  {
-       unsigned long flags;
-       int ret;
+       int ret = check_kill_permission(sig, info, p);
  
-       ret = check_kill_permission(sig, info, p);
-
-       if (!ret && sig) {
-               ret = -ESRCH;
-               if (lock_task_sighand(p, &flags)) {
-                       ret = __group_send_sig_info(sig, info, p);
-                       unlock_task_sighand(p, &flags);
-               }
-       }
+       if (!ret && sig)
+               ret = do_send_sig_info(sig, info, p, true);
  
         return ret;
  }
@@ -1224,15 +1224,9 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
   * These are for backward compatibility with the rest of the kernel source.
   */
  
-/*
- * The caller must ensure the task can't exit.
- */
  int
  send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
  {
-       int ret;
-       unsigned long flags;
-
         /*
          * Make sure legacy kernel users don't send in bad values
          * (normal paths check this in check_kill_permission).
@@ -1240,10 +1234,7 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
         if (!valid_signal(sig))
                 return -EINVAL;
  
-       spin_lock_irqsave(&p->sighand->siglock, flags);
-       ret = specific_send_sig_info(sig, info, p);
-       spin_unlock_irqrestore(&p->sighand->siglock, flags);
-       return ret;
+       return do_send_sig_info(sig, info, p, false);
  }
  
  #define __si_special(priv) \
@@ -1382,15 +1373,6 @@ ret:
         return ret;
  }
  
-/*
- * Wake up any threads in the parent blocked in wait* syscalls.
- */
-static inline void __wake_up_parent(struct task_struct *p,
-                                   struct task_struct *parent)
-{
-       wake_up_interruptible_sync(&parent->signal->wait_chldexit);
-}
-
  /*
   * Let a parent know about the death of a child.
   * For a stopped/continued status change, use do_notify_parent_cldstop instead.
@@ -1673,29 +1655,6 @@ void ptrace_notify(int exit_code)
         spin_unlock_irq(&current->sighand->siglock);
  }
  
-static void
-finish_stop(int stop_count)
-{
-       /*
-        * If there are no other threads in the group, or if there is
-        * a group stop in progress and we are the last to stop,
-        * report to the parent.  When ptraced, every thread reports itself.
-        */
-       if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) {
-               read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(current, CLD_STOPPED);
-               read_unlock(&tasklist_lock);
-       }
-
-       do {
-               schedule();
-       } while (try_to_freeze());
-       /*
-        * Now we don't run again until continued.
-        */
-       current->exit_code = 0;
-}
-
  /*
   * This performs the stopping for SIGSTOP and other stop signals.
   * We have to stop all threads in the thread group.
@@ -1705,15 +1664,9 @@ finish_stop(int stop_count)
  static int do_signal_stop(int signr)
  {
         struct signal_struct *sig = current->signal;
-       int stop_count;
+       int notify;
  
-       if (sig->group_stop_count > 0) {
-               /*
-                * There is a group stop in progress.  We don't need to
-                * start another one.
-                */
-               stop_count = --sig->group_stop_count;
-       } else {
+       if (!sig->group_stop_count) {
                 struct task_struct *t;
  
                 if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
@@ -1725,7 +1678,7 @@ static int do_signal_stop(int signr)
                  */
                 sig->group_exit_code = signr;
  
-               stop_count = 0;
+               sig->group_stop_count = 1;
                 for (t = next_thread(current); t != current; t = next_thread(t))
                         /*
                          * Setting state to TASK_STOPPED for a group
@@ -1734,19 +1687,44 @@ static int do_signal_stop(int signr)
                          */
                         if (!(t->flags & PF_EXITING) &&
                             !task_is_stopped_or_traced(t)) {
-                               stop_count++;
+                               sig->group_stop_count++;
                                 signal_wake_up(t, 0);
                         }
-               sig->group_stop_count = stop_count;
         }
+       /*
+        * If there are no other threads in the group, or if there is
+        * a group stop in progress and we are the last to stop, report
+        * to the parent.  When ptraced, every thread reports itself.
+        */
+       notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
+       notify = tracehook_notify_jctl(notify, CLD_STOPPED);
+       /*
+        * tracehook_notify_jctl() can drop and reacquire siglock, so
+        * we keep ->group_stop_count != 0 before the call. If SIGCONT
+        * or SIGKILL comes in between ->group_stop_count == 0.
+        */
+       if (sig->group_stop_count) {
+               if (!--sig->group_stop_count)
+                       sig->flags = SIGNAL_STOP_STOPPED;
+               current->exit_code = sig->group_exit_code;
+               __set_current_state(TASK_STOPPED);
+       }
+       spin_unlock_irq(&current->sighand->siglock);
  
-       if (stop_count == 0)
-               sig->flags = SIGNAL_STOP_STOPPED;
-       current->exit_code = sig->group_exit_code;
-       __set_current_state(TASK_STOPPED);
+       if (notify) {
+               read_lock(&tasklist_lock);
+               do_notify_parent_cldstop(current, notify);
+               read_unlock(&tasklist_lock);
+       }
+
+       /* Now we don't run again until woken by SIGCONT or SIGKILL */
+       do {
+               schedule();
+       } while (try_to_freeze());
+
+       tracehook_finish_jctl();
+       current->exit_code = 0;
  
-       spin_unlock_irq(&current->sighand->siglock);
-       finish_stop(stop_count);
         return 1;
  }
  
@@ -1815,14 +1793,15 @@ relock:
                 int why = (signal->flags & SIGNAL_STOP_CONTINUED)
                                 ? CLD_CONTINUED : CLD_STOPPED;
                 signal->flags &= ~SIGNAL_CLD_MASK;
-               spin_unlock_irq(&sighand->siglock);
  
-               if (unlikely(!tracehook_notify_jctl(1, why)))
-                       goto relock;
+               why = tracehook_notify_jctl(why, CLD_CONTINUED);
+               spin_unlock_irq(&sighand->siglock);
  
-               read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(current->group_leader, why);
-               read_unlock(&tasklist_lock);
+               if (why) {
+                       read_lock(&tasklist_lock);
+                       do_notify_parent_cldstop(current->group_leader, why);
+                       read_unlock(&tasklist_lock);
+               }
                 goto relock;
         }
  
@@ -1987,14 +1966,14 @@ void exit_signals(struct task_struct *tsk)
         if (unlikely(tsk->signal->group_stop_count) &&
                         !--tsk->signal->group_stop_count) {
                 tsk->signal->flags = SIGNAL_STOP_STOPPED;
-               group_stop = 1;
+               group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
         }
  out:
         spin_unlock_irq(&tsk->sighand->siglock);
  
-       if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) {
+       if (unlikely(group_stop)) {
                 read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(tsk, CLD_STOPPED);
+               do_notify_parent_cldstop(tsk, group_stop);
                 read_unlock(&tasklist_lock);
         }
  }
@@ -2290,7 +2269,6 @@ static int
  do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
  {
         struct task_struct *p;
-       unsigned long flags;
         int error = -ESRCH;
  
         rcu_read_lock();
@@ -2300,14 +2278,16 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
                 /*
                  * The null signal is a permissions and process existence
                  * probe.  No signal is actually delivered.
-                *
-                * If lock_task_sighand() fails we pretend the task dies
-                * after receiving the signal. The window is tiny, and the
-                * signal is private anyway.
                  */
-               if (!error && sig && lock_task_sighand(p, &flags)) {
-                       error = specific_send_sig_info(sig, info, p);
-                       unlock_task_sighand(p, &flags);
+               if (!error && sig) {
+                       error = do_send_sig_info(sig, info, p, false);
+                       /*
+                        * If lock_task_sighand() failed we pretend the task
+                        * dies after receiving the signal. The window is tiny,
+                        * and the signal is private anyway.
+                        */
+                       if (unlikely(error == -ESRCH))
+                               error = 0;
                 }
         }
         rcu_read_unlock();
diff --git a/kernel/slow-work.c b/kernel/slow-work.c

index 09d7519557d35181fab35591a17516efe5aba723..0d31135efbf4cab0b98babcaa05355eaf5130dfa 100644 (file)
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -26,10 +26,10 @@ static void slow_work_cull_timeout(unsigned long);
  static void slow_work_oom_timeout(unsigned long);
  
  #ifdef CONFIG_SYSCTL
-static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *,
+static int slow_work_min_threads_sysctl(struct ctl_table *, int,
                                         void __user *, size_t *, loff_t *);
  
-static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *,
+static int slow_work_max_threads_sysctl(struct ctl_table *, int ,
                                         void __user *, size_t *, loff_t *);
  #endif
  
@@ -493,10 +493,10 @@ static void slow_work_oom_timeout(unsigned long data)
   * Handle adjustment of the minimum number of threads
   */
  static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
-                                       struct file *filp, void __user *buffer,
+                                       void __user *buffer,
                                         size_t *lenp, loff_t *ppos)
  {
-       int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
         int n;
  
         if (ret == 0) {
@@ -521,10 +521,10 @@ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
   * Handle adjustment of the maximum number of threads
   */
  static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
-                                       struct file *filp, void __user *buffer,
+                                       void __user *buffer,
                                         size_t *lenp, loff_t *ppos)
  {
-       int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
         int n;
  
         if (ret == 0) {
diff --git a/kernel/smp.c b/kernel/smp.c

index fd47a256a24e4e95005203ebe117a4e47fd9b44c..c9d1c7835c2fa150e154ae95d35e16364b8e385a 100644 (file)
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -347,13 +347,6 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
         generic_exec_single(cpu, data, wait);
  }
  
-/* Deprecated: shim for archs using old arch_send_call_function_ipi API. */
-
-#ifndef arch_send_call_function_ipi_mask
-# define arch_send_call_function_ipi_mask(maskp) \
-        arch_send_call_function_ipi(*(maskp))
-#endif
-
  /**
   * smp_call_function_many(): Run a function on a set of other CPUs.
   * @mask: The set of cpus to run on (only runs on online subset).
diff --git a/kernel/softlockup.c b/kernel/softlockup.c

index 88796c330838dd66c24b8628f8b56ce27d3a3d23..81324d12eb35a5db7fae8a0ae3d18c76ca38ce67 100644 (file)
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -90,11 +90,11 @@ void touch_all_softlockup_watchdogs(void)
  EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
  
  int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-                            struct file *filp, void __user *buffer,
+                            void __user *buffer,
                              size_t *lenp, loff_t *ppos)
  {
         touch_all_softlockup_watchdogs();
-       return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+       return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  }
  
  /*
diff --git a/kernel/sys.c b/kernel/sys.c

index ebcb15611728c510ef59312e3a0cd398757e0b8f..255475d163e0cdb62602306a28134d67005e87c0 100644 (file)
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1542,6 +1542,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
                                 current->timer_slack_ns = arg2;
                         error = 0;
                         break;
+               case PR_MCE_KILL:
+                       if (arg4 | arg5)
+                               return -EINVAL;
+                       switch (arg2) {
+                       case 0:
+                               if (arg3 != 0)
+                                       return -EINVAL;
+                               current->flags &= ~PF_MCE_PROCESS;
+                               break;
+                       case 1:
+                               current->flags |= PF_MCE_PROCESS;
+                               if (arg3 != 0)
+                                       current->flags |= PF_MCE_EARLY;
+                               else
+                                       current->flags &= ~PF_MCE_EARLY;
+                               break;
+                       default:
+                               return -EINVAL;
+                       }
+                       error = 0;
+                       break;
+
                 default:
                         error = -EINVAL;
                         break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c

index 515bc230ac2aa61533306d6655197b0abf3ecc2e..e06d0b8d195191fa818d166c362ba652e4802d38 100644 (file)
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -49,6 +49,7 @@ cond_syscall(sys_sendmsg);
  cond_syscall(compat_sys_sendmsg);
  cond_syscall(sys_recvmsg);
  cond_syscall(compat_sys_recvmsg);
+cond_syscall(compat_sys_recvfrom);
  cond_syscall(sys_socketcall);
  cond_syscall(sys_futex);
  cond_syscall(compat_sys_futex);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 0dfaa47d7cb6d0ec2ebde15695cc7e4a99fffcea..0d949c517412ee16822a5ca7d6e7c79218543741 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -26,7 +26,6 @@
  #include <linux/proc_fs.h>
  #include <linux/security.h>
  #include <linux/ctype.h>
-#include <linux/utsname.h>
  #include <linux/kmemcheck.h>
  #include <linux/smp_lock.h>
  #include <linux/fs.h>
@@ -77,6 +76,7 @@ extern int max_threads;
  extern int core_uses_pid;
  extern int suid_dumpable;
  extern char core_pattern[];
+extern unsigned int core_pipe_limit;
  extern int pid_max;
  extern int min_free_kbytes;
  extern int pid_max_min, pid_max_max;
@@ -163,9 +163,9 @@ extern int max_lock_depth;
  #endif
  
  #ifdef CONFIG_PROC_SYSCTL
-static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
+static int proc_do_cad_pid(struct ctl_table *table, int write,
                   void __user *buffer, size_t *lenp, loff_t *ppos);
-static int proc_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write,
                                void __user *buffer, size_t *lenp, loff_t *ppos);
  #endif
  
@@ -424,6 +424,14 @@ static struct ctl_table kern_table[] = {
                 .proc_handler   = &proc_dostring,
                 .strategy       = &sysctl_string,
         },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "core_pipe_limit",
+               .data           = &core_pipe_limit,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
  #ifdef CONFIG_PROC_SYSCTL
         {
                 .procname       = "tainted",
@@ -1390,6 +1398,31 @@ static struct ctl_table vm_table[] = {
                 .mode           = 0644,
                 .proc_handler   = &scan_unevictable_handler,
         },
+#ifdef CONFIG_MEMORY_FAILURE
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "memory_failure_early_kill",
+               .data           = &sysctl_memory_failure_early_kill,
+               .maxlen         = sizeof(sysctl_memory_failure_early_kill),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "memory_failure_recovery",
+               .data           = &sysctl_memory_failure_recovery,
+               .maxlen         = sizeof(sysctl_memory_failure_recovery),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+#endif
+
  /*
   * NOTE: do not add new entries to this table unless you have read
   * Documentation/sysctl/ctl_unnumbered.txt
@@ -2218,7 +2251,7 @@ void sysctl_head_put(struct ctl_table_header *head)
  #ifdef CONFIG_PROC_SYSCTL
  
  static int _proc_do_string(void* data, int maxlen, int write,
-                          struct file *filp, void __user *buffer,
+                          void __user *buffer,
                            size_t *lenp, loff_t *ppos)
  {
         size_t len;
@@ -2279,7 +2312,6 @@ static int _proc_do_string(void* data, int maxlen, int write,
   * proc_dostring - read a string sysctl
   * @table: the sysctl table
   * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
   * @buffer: the user buffer
   * @lenp: the size of the user buffer
   * @ppos: file position
@@ -2293,10 +2325,10 @@ static int _proc_do_string(void* data, int maxlen, int write,
   *
   * Returns 0 on success.
   */
-int proc_dostring(struct ctl_table *table, int write, struct file *filp,
+int proc_dostring(struct ctl_table *table, int write,
                   void __user *buffer, size_t *lenp, loff_t *ppos)
  {
-       return _proc_do_string(table->data, table->maxlen, write, filp,
+       return _proc_do_string(table->data, table->maxlen, write,
                                buffer, lenp, ppos);
  }
  
@@ -2321,7 +2353,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
  }
  
  static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
-                 int write, struct file *filp, void __user *buffer,
+                 int write, void __user *buffer,
                   size_t *lenp, loff_t *ppos,
                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
                               int write, void *data),
@@ -2428,13 +2460,13 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
  #undef TMPBUFLEN
  }
  
-static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+static int do_proc_dointvec(struct ctl_table *table, int write,
                   void __user *buffer, size_t *lenp, loff_t *ppos,
                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
                               int write, void *data),
                   void *data)
  {
-       return __do_proc_dointvec(table->data, table, write, filp,
+       return __do_proc_dointvec(table->data, table, write,
                         buffer, lenp, ppos, conv, data);
  }
  
@@ -2442,7 +2474,6 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil
   * proc_dointvec - read a vector of integers
   * @table: the sysctl table
   * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
   * @buffer: the user buffer
   * @lenp: the size of the user buffer
   * @ppos: file position
@@ -2452,10 +2483,10 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil
   *
   * Returns 0 on success.
   */
-int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec(struct ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
  {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+    return do_proc_dointvec(table,write,buffer,lenp,ppos,
                             NULL,NULL);
  }
  
@@ -2463,7 +2494,7 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
   * Taint values can only be increased
   * This means we can safely use a temporary.
   */
-static int proc_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write,
                                void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct ctl_table t;
@@ -2475,7 +2506,7 @@ static int proc_taint(struct ctl_table *table, int write, struct file *filp,
  
         t = *table;
         t.data = &tmptaint;
-       err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
+       err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
         if (err < 0)
                 return err;
  
@@ -2527,7 +2558,6 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
   * proc_dointvec_minmax - read a vector of integers with min/max values
   * @table: the sysctl table
   * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
   * @buffer: the user buffer
   * @lenp: the size of the user buffer
   * @ppos: file position
@@ -2540,19 +2570,18 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
   *
   * Returns 0 on success.
   */
-int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_minmax(struct ctl_table *table, int write,
                   void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct do_proc_dointvec_minmax_conv_param param = {
                 .min = (int *) table->extra1,
                 .max = (int *) table->extra2,
         };
-       return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
+       return do_proc_dointvec(table, write, buffer, lenp, ppos,
                                 do_proc_dointvec_minmax_conv, &param);
  }
  
  static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
-                                    struct file *filp,
                                      void __user *buffer,
                                      size_t *lenp, loff_t *ppos,
                                      unsigned long convmul,
@@ -2657,21 +2686,19 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
  }
  
  static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
-                                    struct file *filp,
                                      void __user *buffer,
                                      size_t *lenp, loff_t *ppos,
                                      unsigned long convmul,
                                      unsigned long convdiv)
  {
         return __do_proc_doulongvec_minmax(table->data, table, write,
-                       filp, buffer, lenp, ppos, convmul, convdiv);
+                       buffer, lenp, ppos, convmul, convdiv);
  }
  
  /**
   * proc_doulongvec_minmax - read a vector of long integers with min/max values
   * @table: the sysctl table
   * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
   * @buffer: the user buffer
   * @lenp: the size of the user buffer
   * @ppos: file position
@@ -2684,17 +2711,16 @@ static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
   *
   * Returns 0 on success.
   */
-int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
                            void __user *buffer, size_t *lenp, loff_t *ppos)
  {
-    return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
+    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
  }
  
  /**
   * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
   * @table: the sysctl table
   * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
   * @buffer: the user buffer
   * @lenp: the size of the user buffer
   * @ppos: file position
@@ -2709,11 +2735,10 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp
   * Returns 0 on success.
   */
  int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
-                                     struct file *filp,
                                       void __user *buffer,
                                       size_t *lenp, loff_t *ppos)
  {
-    return do_proc_doulongvec_minmax(table, write, filp, buffer,
+    return do_proc_doulongvec_minmax(table, write, buffer,
                                      lenp, ppos, HZ, 1000l);
  }
  
@@ -2789,7 +2814,6 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
   * proc_dointvec_jiffies - read a vector of integers as seconds
   * @table: the sysctl table
   * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
   * @buffer: the user buffer
   * @lenp: the size of the user buffer
   * @ppos: file position
@@ -2801,10 +2825,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
   *
   * Returns 0 on success.
   */
-int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
                           void __user *buffer, size_t *lenp, loff_t *ppos)
  {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+    return do_proc_dointvec(table,write,buffer,lenp,ppos,
                             do_proc_dointvec_jiffies_conv,NULL);
  }
  
@@ -2812,7 +2836,6 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
   * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
   * @table: the sysctl table
   * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
   * @buffer: the user buffer
   * @lenp: the size of the user buffer
   * @ppos: pointer to the file position
@@ -2824,10 +2847,10 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
   *
   * Returns 0 on success.
   */
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
                                  void __user *buffer, size_t *lenp, loff_t *ppos)
  {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+    return do_proc_dointvec(table,write,buffer,lenp,ppos,
                             do_proc_dointvec_userhz_jiffies_conv,NULL);
  }
  
@@ -2835,7 +2858,6 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file
   * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
   * @table: the sysctl table
   * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
   * @buffer: the user buffer
   * @lenp: the size of the user buffer
   * @ppos: file position
@@ -2848,14 +2870,14 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file
   *
   * Returns 0 on success.
   */
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
                              void __user *buffer, size_t *lenp, loff_t *ppos)
  {
-       return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
+       return do_proc_dointvec(table, write, buffer, lenp, ppos,
                                 do_proc_dointvec_ms_jiffies_conv, NULL);
  }
  
-static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
+static int proc_do_cad_pid(struct ctl_table *table, int write,
                            void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct pid *new_pid;
@@ -2864,7 +2886,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
  
         tmp = pid_vnr(cad_pid);
  
-       r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
+       r = __do_proc_dointvec(&tmp, table, write, buffer,
                                lenp, ppos, NULL, NULL);
         if (r || !write)
                 return r;
@@ -2879,50 +2901,49 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
  
  #else /* CONFIG_PROC_FS */
  
-int proc_dostring(struct ctl_table *table, int write, struct file *filp,
+int proc_dostring(struct ctl_table *table, int write,
                   void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         return -ENOSYS;
  }
  
-int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec(struct ctl_table *table, int write,
                   void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         return -ENOSYS;
  }
  
-int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_minmax(struct ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         return -ENOSYS;
  }
  
-int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         return -ENOSYS;
  }
  
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         return -ENOSYS;
  }
  
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
                              void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         return -ENOSYS;
  }
  
-int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         return -ENOSYS;
  }
  
  int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
-                                     struct file *filp,
                                       void __user *buffer,
                                       size_t *lenp, loff_t *ppos)
  {
diff --git a/kernel/time/Makefile b/kernel/time/Makefile

index 0b0a6366c9d482b8cabbbdc3e290087e2622c0bf..ee266620b06ca336a9246489ff9d01d1dd7cd91d 100644 (file)
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,4 @@
-obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
+obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o
  
  obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)                += clockevents.o
  obj-$(CONFIG_GENERIC_CLOCKEVENTS)              += tick-common.o
diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c

new file mode 100644 (file)

index 0000000..86628e7
--- /dev/null
+++ b/kernel/time/timeconv.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+ * This file is part of the GNU C Library.
+ * Contributed by Paul Eggert (eggert@twinsun.com).
+ *
+ * The GNU C Library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * The GNU C Library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Converts the calendar time to broken-down time representation
+ * Based on code from glibc-2.6
+ *
+ * 2009-7-14:
+ *   Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com>
+ */
+
+#include <linux/time.h>
+#include <linux/module.h>
+
+/*
+ * Nonzero if YEAR is a leap year (every 4 years,
+ * except every 100th isn't, and every 400th is).
+ */
+static int __isleap(long year)
+{
+       return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0);
+}
+
+/* do a mathdiv for long type */
+static long math_div(long a, long b)
+{
+       return a / b - (a % b < 0);
+}
+
+/* How many leap years between y1 and y2, y1 must less or equal to y2 */
+static long leaps_between(long y1, long y2)
+{
+       long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100)
+               + math_div(y1 - 1, 400);
+       long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100)
+               + math_div(y2 - 1, 400);
+       return leaps2 - leaps1;
+}
+
+/* How many days come before each month (0-12). */
+static const unsigned short __mon_yday[2][13] = {
+       /* Normal years. */
+       {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
+       /* Leap years. */
+       {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
+};
+
+#define SECS_PER_HOUR  (60 * 60)
+#define SECS_PER_DAY   (SECS_PER_HOUR * 24)
+
+/**
+ * time_to_tm - converts the calendar time to local broken-down time
+ *
+ * @totalsecs  the number of seconds elapsed since 00:00:00 on January 1, 1970,
+ *             Coordinated Universal Time (UTC).
+ * @offset     offset seconds adding to totalsecs.
+ * @result     pointer to struct tm variable to receive broken-down time
+ */
+void time_to_tm(time_t totalsecs, int offset, struct tm *result)
+{
+       long days, rem, y;
+       const unsigned short *ip;
+
+       days = totalsecs / SECS_PER_DAY;
+       rem = totalsecs % SECS_PER_DAY;
+       rem += offset;
+       while (rem < 0) {
+               rem += SECS_PER_DAY;
+               --days;
+       }
+       while (rem >= SECS_PER_DAY) {
+               rem -= SECS_PER_DAY;
+               ++days;
+       }
+
+       result->tm_hour = rem / SECS_PER_HOUR;
+       rem %= SECS_PER_HOUR;
+       result->tm_min = rem / 60;
+       result->tm_sec = rem % 60;
+
+       /* January 1, 1970 was a Thursday. */
+       result->tm_wday = (4 + days) % 7;
+       if (result->tm_wday < 0)
+               result->tm_wday += 7;
+
+       y = 1970;
+
+       while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
+               /* Guess a corrected year, assuming 365 days per year. */
+               long yg = y + math_div(days, 365);
+
+               /* Adjust DAYS and Y to match the guessed year. */
+               days -= (yg - y) * 365 + leaps_between(y, yg);
+               y = yg;
+       }
+
+       result->tm_year = y - 1900;
+
+       result->tm_yday = days;
+
+       ip = __mon_yday[__isleap(y)];
+       for (y = 11; days < ip[y]; y--)
+               continue;
+       days -= ip[y];
+
+       result->tm_mon = y;
+       result->tm_mday = days + 1;
+}
+EXPORT_SYMBOL(time_to_tm);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c

index 23df7771c937acfab46e4f695950a69f35fbe44b..a142579765bf6635ef8f426ac2e3f99b95daaaf0 100644 (file)
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3015,7 +3015,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
  
  int
  ftrace_enable_sysctl(struct ctl_table *table, int write,
-                    struct file *file, void __user *buffer, size_t *lenp,
+                    void __user *buffer, size_t *lenp,
                      loff_t *ppos)
  {
         int ret;
@@ -3025,7 +3025,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
  
         mutex_lock(&ftrace_lock);
  
-       ret  = proc_dointvec(table, write, file, buffer, lenp, ppos);
+       ret  = proc_dointvec(table, write, buffer, lenp, ppos);
  
         if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
                 goto out;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c

index 6c0f6a8a22ebd5eafce5adf55423d4302734cd18..411af37f4be4755b2bb1670d7cef18d16fc39c94 100644 (file)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1984,11 +1984,9 @@ __tracing_open(struct inode *inode, struct file *file)
         if (current_trace)
                 *iter->trace = *current_trace;
  
-       if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
+       if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
                 goto fail;
  
-       cpumask_clear(iter->started);
-
         if (current_trace && current_trace->print_max)
                 iter->tr = &max_tr;
         else
@@ -4389,7 +4387,7 @@ __init static int tracer_alloc_buffers(void)
         if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
                 goto out_free_buffer_mask;
  
-       if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
+       if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
                 goto out_free_tracing_cpumask;
  
         /* To save memory, keep the ring buffer size to its minimum */
@@ -4400,7 +4398,6 @@ __init static int tracer_alloc_buffers(void)
  
         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
         cpumask_copy(tracing_cpumask, cpu_all_mask);
-       cpumask_clear(tracing_reader_cpumask);
  
         /* TODO: make the number of buffers hot pluggable with CPUS */
         global_trace.buffer = ring_buffer_alloc(ring_buf_size,
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c

index 0f6facb050a11631d182cc1fd85b236fbf8befa8..8504ac71e4e8f9831edd7cc0d3186f98fdcad6c1 100644 (file)
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -296,14 +296,14 @@ static const struct file_operations stack_trace_fops = {
  
  int
  stack_trace_sysctl(struct ctl_table *table, int write,
-                  struct file *file, void __user *buffer, size_t *lenp,
+                  void __user *buffer, size_t *lenp,
                    loff_t *ppos)
  {
         int ret;
  
         mutex_lock(&stack_sysctl_mutex);
  
-       ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
  
         if (ret || !write ||
             (last_stack_tracer_enabled == !!stack_tracer_enabled))
diff --git a/kernel/uid16.c b/kernel/uid16.c

index 0314501688b9745f7601793fe17c0a14d7e1e147..419209893d87b528488203ea258a39021fd70ad3 100644 (file)
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -4,7 +4,6 @@
   */
  
  #include <linux/mm.h>
-#include <linux/utsname.h>
  #include <linux/mman.h>
  #include <linux/notifier.h>
  #include <linux/reboot.h>
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c

index 92359cc747a7c52cb85e362ab3b9178e4756ee2d..69eae358a726d8a8848d7f3e141c22762493c7b4 100644 (file)
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -42,14 +42,14 @@ static void put_uts(ctl_table *table, int write, void *which)
   *     Special case of dostring for the UTS structure. This has locks
   *     to observe. Should this be in kernel/sys.c ????
   */
-static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+static int proc_do_uts_string(ctl_table *table, int write,
                   void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct ctl_table uts_table;
         int r;
         memcpy(&uts_table, table, sizeof(uts_table));
         uts_table.data = get_uts(table, write);
-       r = proc_dostring(&uts_table,write,filp,buffer,lenp, ppos);
+       r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
         put_uts(table, write, uts_table.data);
         return r;
  }
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index d57b12f59c8c3f0686eca5cbd416e5378177da8b..891155817bc6e05aa52abc88e2fe7f93a9c41baa 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -50,6 +50,14 @@ config MAGIC_SYSRQ
           keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
           unless you really know what this hack does.
  
+config STRIP_ASM_SYMS
+       bool "Strip assembler-generated symbols during link"
+       default n
+       help
+         Strip internal assembler-generated symbols during a link (symbols
+         that look like '.Lxxx') so they don't pollute the output of
+         get_wchan() and suchlike.
+
  config UNUSED_SYMBOLS
         bool "Enable unused/obsolete exported symbols"
         default y if X86
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c

index 68dfce59c1b80be510544c0effc3f87669527b8f..fc686c7a0a0da688dd9dee52156dc7f6c0c380ab 100644 (file)
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -27,6 +27,11 @@
  
  #define GZIP_IOBUF_SIZE (16*1024)
  
+static int nofill(void *buffer, unsigned int len)
+{
+       return -1;
+}
+
  /* Included from initramfs et al code */
  STATIC int INIT gunzip(unsigned char *buf, int len,
                        int(*fill)(void*, unsigned int),
@@ -76,6 +81,9 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
                 goto gunzip_nomem4;
         }
  
+       if (!fill)
+               fill = nofill;
+
         if (len == 0)
                 len = fill(zbuf, GZIP_IOBUF_SIZE);
  
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c

index 0b954e04bd3015bb08332b2a30a13e1ff6414fea..ca82fde81c8fc48a39a22495fbe9d489142d04aa 100644 (file)
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -82,6 +82,11 @@ struct rc {
  #define RC_MODEL_TOTAL_BITS 11
  
  
+static int nofill(void *buffer, unsigned int len)
+{
+       return -1;
+}
+
  /* Called twice: once at startup and once in rc_normalize() */
  static void INIT rc_read(struct rc *rc)
  {
@@ -97,7 +102,10 @@ static inline void INIT rc_init(struct rc *rc,
                                        int (*fill)(void*, unsigned int),
                                        char *buffer, int buffer_size)
  {
-       rc->fill = fill;
+       if (fill)
+               rc->fill = fill;
+       else
+               rc->fill = nofill;
         rc->buffer = (uint8_t *)buffer;
         rc->buffer_size = buffer_size;
         rc->buffer_end = rc->buffer + rc->buffer_size;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c

index 73a14b8c6d1f860796ebe7982ff0a7ca1d6c2081..b91839e9e892a08eb7726dc7b668d289c16f09ad 100644 (file)
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -671,7 +671,7 @@ static char *ip4_string(char *p, const u8 *addr, bool leading_zeros)
         return p;
  }
  
-static char *ip6_compressed_string(char *p, const struct in6_addr *addr)
+static char *ip6_compressed_string(char *p, const char *addr)
  {
         int i;
         int j;
@@ -683,7 +683,12 @@ static char *ip6_compressed_string(char *p, const struct in6_addr *addr)
         u8 hi;
         u8 lo;
         bool needcolon = false;
-       bool useIPv4 = ipv6_addr_v4mapped(addr) || ipv6_addr_is_isatap(addr);
+       bool useIPv4;
+       struct in6_addr in6;
+
+       memcpy(&in6, addr, sizeof(struct in6_addr));
+
+       useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6);
  
         memset(zerolength, 0, sizeof(zerolength));
  
@@ -695,7 +700,7 @@ static char *ip6_compressed_string(char *p, const struct in6_addr *addr)
         /* find position of longest 0 run */
         for (i = 0; i < range; i++) {
                 for (j = i; j < range; j++) {
-                       if (addr->s6_addr16[j] != 0)
+                       if (in6.s6_addr16[j] != 0)
                                 break;
                         zerolength[i]++;
                 }
@@ -722,7 +727,7 @@ static char *ip6_compressed_string(char *p, const struct in6_addr *addr)
                         needcolon = false;
                 }
                 /* hex u16 without leading 0s */
-               word = ntohs(addr->s6_addr16[i]);
+               word = ntohs(in6.s6_addr16[i]);
                 hi = word >> 8;
                 lo = word & 0xff;
                 if (hi) {
@@ -741,19 +746,19 @@ static char *ip6_compressed_string(char *p, const struct in6_addr *addr)
         if (useIPv4) {
                 if (needcolon)
                         *p++ = ':';
-               p = ip4_string(p, &addr->s6_addr[12], false);
+               p = ip4_string(p, &in6.s6_addr[12], false);
         }
  
         *p = '\0';
         return p;
  }
  
-static char *ip6_string(char *p, const struct in6_addr *addr, const char *fmt)
+static char *ip6_string(char *p, const char *addr, const char *fmt)
  {
         int i;
         for (i = 0; i < 8; i++) {
-               p = pack_hex_byte(p, addr->s6_addr[2 * i]);
-               p = pack_hex_byte(p, addr->s6_addr[2 * i + 1]);
+               p = pack_hex_byte(p, *addr++);
+               p = pack_hex_byte(p, *addr++);
                 if (fmt[0] == 'I' && i != 7)
                         *p++ = ':';
         }
@@ -768,9 +773,9 @@ static char *ip6_addr_string(char *buf, char *end, const u8 *addr,
         char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")];
  
         if (fmt[0] == 'I' && fmt[2] == 'c')
-               ip6_compressed_string(ip6_addr, (const struct in6_addr *)addr);
+               ip6_compressed_string(ip6_addr, addr);
         else
-               ip6_string(ip6_addr, (const struct in6_addr *)addr, fmt);
+               ip6_string(ip6_addr, addr, fmt);
  
         return string(buf, end, ip6_addr, spec);
  }
diff --git a/mm/Kconfig b/mm/Kconfig

index 71eb0b4cce8dbc425aa476d54f2048e5679c7dea..247760729593d37f841655dd54ec4572523255f7 100644 (file)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -245,6 +245,20 @@ config DEFAULT_MMAP_MIN_ADDR
           /proc/sys/vm/mmap_min_addr tunable.
  
  
+config MEMORY_FAILURE
+       depends on MMU
+       depends on X86_MCE
+       bool "Enable recovery from hardware memory errors"
+       help
+         Enables code to recover from some memory failures on systems
+         with MCA recovery. This allows a system to continue running
+         even when some of its memory has uncorrected errors. This requires
+         special hardware support and typically ECC memory.
+
+config HWPOISON_INJECT
+       tristate "Poison pages injector"
+       depends on MEMORY_FAILURE && DEBUG_KERNEL
+
  config NOMMU_INITIAL_TRIM_EXCESS
         int "Turn on mmap() excess space trimming before booting"
         depends on !MMU
diff --git a/mm/Makefile b/mm/Makefile

index 88193d73cd1a30dd623e94eb9b5bed0c96cf08e0..515fd793c17fa989cffe0f3a686c8086e2f7ddca 100644 (file)
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -41,5 +41,7 @@ obj-$(CONFIG_SMP) += allocpercpu.o
  endif
  obj-$(CONFIG_QUICKLIST) += quicklist.o
  obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
+obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
  obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
  obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
diff --git a/mm/filemap.c b/mm/filemap.c

index bcc7372aebbc4375d0763e4f3acd8d096bcb612d..6c84e598b4a9f7a0c2901387f32307c4e96ebaae 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -58,7 +58,7 @@
  /*
   * Lock ordering:
   *
- *  ->i_mmap_lock              (vmtruncate)
+ *  ->i_mmap_lock              (truncate_pagecache)
   *    ->private_lock           (__free_pte->__set_page_dirty_buffers)
   *      ->swap_lock            (exclusive_swap_page, others)
   *        ->mapping->tree_lock
@@ -104,6 +104,10 @@
   *
   *  ->task->proc_lock
   *    ->dcache_lock            (proc_pid_lookup)
+ *
+ *  (code doesn't rely on that order, so you could switch it around)
+ *  ->tasklist_lock             (memory_failure, collect_procs_ao)
+ *    ->i_mmap_lock
   */
  
  /*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 815dbd4a6dcb919f28d4331aceae088b8e5f62d7..6f048fcc749ca48b3bae0a1a37a792f15e0c203d 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1537,7 +1537,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
  
  #ifdef CONFIG_SYSCTL
  int hugetlb_sysctl_handler(struct ctl_table *table, int write,
-                          struct file *file, void __user *buffer,
+                          void __user *buffer,
                            size_t *length, loff_t *ppos)
  {
         struct hstate *h = &default_hstate;
@@ -1548,7 +1548,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
  
         table->data = &tmp;
         table->maxlen = sizeof(unsigned long);
-       proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+       proc_doulongvec_minmax(table, write, buffer, length, ppos);
  
         if (write)
                 h->max_huge_pages = set_max_huge_pages(h, tmp);
@@ -1557,10 +1557,10 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
  }
  
  int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
-                       struct file *file, void __user *buffer,
+                       void __user *buffer,
                         size_t *length, loff_t *ppos)
  {
-       proc_dointvec(table, write, file, buffer, length, ppos);
+       proc_dointvec(table, write, buffer, length, ppos);
         if (hugepages_treat_as_movable)
                 htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
         else
@@ -1569,7 +1569,7 @@ int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
  }
  
  int hugetlb_overcommit_handler(struct ctl_table *table, int write,
-                       struct file *file, void __user *buffer,
+                       void __user *buffer,
                         size_t *length, loff_t *ppos)
  {
         struct hstate *h = &default_hstate;
@@ -1580,7 +1580,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
  
         table->data = &tmp;
         table->maxlen = sizeof(unsigned long);
-       proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+       proc_doulongvec_minmax(table, write, buffer, length, ppos);
  
         if (write) {
                 spin_lock(&hugetlb_lock);
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c

new file mode 100644 (file)

index 0000000..e1d8513
--- /dev/null
+++ b/mm/hwpoison-inject.c
@@ -0,0 +1,41 @@
+/* Inject a hwpoison memory failure on a arbitary pfn */
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+static struct dentry *hwpoison_dir, *corrupt_pfn;
+
+static int hwpoison_inject(void *data, u64 val)
+{
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       printk(KERN_INFO "Injecting memory failure at pfn %Lx\n", val);
+       return __memory_failure(val, 18, 0);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
+
+static void pfn_inject_exit(void)
+{
+       if (hwpoison_dir)
+               debugfs_remove_recursive(hwpoison_dir);
+}
+
+static int pfn_inject_init(void)
+{
+       hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
+       if (hwpoison_dir == NULL)
+               return -ENOMEM;
+       corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
+                                         NULL, &hwpoison_fops);
+       if (corrupt_pfn == NULL) {
+               pfn_inject_exit();
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+module_init(pfn_inject_init);
+module_exit(pfn_inject_exit);
+MODULE_LICENSE("GPL");
diff --git a/mm/ksm.c b/mm/ksm.c

index 37cc37325094b20d3c6153bdc27314d3c81398c9..f7edac356f465275031110db70c1e57aafbc5cda 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -30,6 +30,7 @@
  #include <linux/slab.h>
  #include <linux/rbtree.h>
  #include <linux/mmu_notifier.h>
+#include <linux/swap.h>
  #include <linux/ksm.h>
  
  #include <asm/tlbflush.h>
@@ -162,10 +163,10 @@ static unsigned long ksm_pages_unshared;
  static unsigned long ksm_rmap_items;
  
  /* Limit on the number of unswappable pages used */
-static unsigned long ksm_max_kernel_pages = 2000;
+static unsigned long ksm_max_kernel_pages;
  
  /* Number of pages ksmd should scan in one batch */
-static unsigned int ksm_thread_pages_to_scan = 200;
+static unsigned int ksm_thread_pages_to_scan = 100;
  
  /* Milliseconds ksmd should sleep between batches */
  static unsigned int ksm_thread_sleep_millisecs = 20;
@@ -173,7 +174,7 @@ static unsigned int ksm_thread_sleep_millisecs = 20;
  #define KSM_RUN_STOP   0
  #define KSM_RUN_MERGE  1
  #define KSM_RUN_UNMERGE        2
-static unsigned int ksm_run = KSM_RUN_MERGE;
+static unsigned int ksm_run = KSM_RUN_STOP;
  
  static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
  static DEFINE_MUTEX(ksm_thread_mutex);
@@ -183,6 +184,11 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock);
                 sizeof(struct __struct), __alignof__(struct __struct),\
                 (__flags), NULL)
  
+static void __init ksm_init_max_kernel_pages(void)
+{
+       ksm_max_kernel_pages = nr_free_buffer_pages() / 4;
+}
+
  static int __init ksm_slab_init(void)
  {
         rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
@@ -1667,6 +1673,8 @@ static int __init ksm_init(void)
         struct task_struct *ksm_thread;
         int err;
  
+       ksm_init_max_kernel_pages();
+
         err = ksm_slab_init();
         if (err)
                 goto out;
diff --git a/mm/madvise.c b/mm/madvise.c

index d9ae2067952e5d2b5d09b260bc443d1ba0899d6a..35b1479b7c9d080ed97b772da44603c4f757093c 100644 (file)
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -218,6 +218,32 @@ static long madvise_remove(struct vm_area_struct *vma,
         return error;
  }
  
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Error injection support for memory error handling.
+ */
+static int madvise_hwpoison(unsigned long start, unsigned long end)
+{
+       int ret = 0;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       for (; start < end; start += PAGE_SIZE) {
+               struct page *p;
+               int ret = get_user_pages(current, current->mm, start, 1,
+                                               0, 0, &p, NULL);
+               if (ret != 1)
+                       return ret;
+               printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
+                      page_to_pfn(p), start);
+               /* Ignore return value for now */
+               __memory_failure(page_to_pfn(p), 0, 1);
+               put_page(p);
+       }
+       return ret;
+}
+#endif
+
  static long
  madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
                 unsigned long start, unsigned long end, int behavior)
@@ -308,6 +334,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
         int write;
         size_t len;
  
+#ifdef CONFIG_MEMORY_FAILURE
+       if (behavior == MADV_HWPOISON)
+               return madvise_hwpoison(start, start+len_in);
+#endif
         if (!madvise_behavior_valid(behavior))
                 return error;
  
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 9b10d8753784c5e9fbbe33239a277a514fe99c67..e2b98a6875c079b36f103b724130462591094735 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -29,6 +29,7 @@
  #include <linux/rcupdate.h>
  #include <linux/limits.h>
  #include <linux/mutex.h>
+#include <linux/rbtree.h>
  #include <linux/slab.h>
  #include <linux/swap.h>
  #include <linux/spinlock.h>
@@ -43,6 +44,7 @@
  
  struct cgroup_subsys mem_cgroup_subsys __read_mostly;
  #define MEM_CGROUP_RECLAIM_RETRIES     5
+struct mem_cgroup *root_mem_cgroup __read_mostly;
  
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
  /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
@@ -53,6 +55,7 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/
  #endif
  
  static DEFINE_MUTEX(memcg_tasklist);   /* can be hold under cgroup_mutex */
+#define SOFTLIMIT_EVENTS_THRESH (1000)
  
  /*
   * Statistics for memory cgroup.
@@ -66,6 +69,8 @@ enum mem_cgroup_stat_index {
         MEM_CGROUP_STAT_MAPPED_FILE,  /* # of pages charged as file rss */
         MEM_CGROUP_STAT_PGPGIN_COUNT,   /* # of pages paged in */
         MEM_CGROUP_STAT_PGPGOUT_COUNT,  /* # of pages paged out */
+       MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */
+       MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
  
         MEM_CGROUP_STAT_NSTATS,
  };
@@ -78,6 +83,20 @@ struct mem_cgroup_stat {
         struct mem_cgroup_stat_cpu cpustat[0];
  };
  
+static inline void
+__mem_cgroup_stat_reset_safe(struct mem_cgroup_stat_cpu *stat,
+                               enum mem_cgroup_stat_index idx)
+{
+       stat->count[idx] = 0;
+}
+
+static inline s64
+__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat,
+                               enum mem_cgroup_stat_index idx)
+{
+       return stat->count[idx];
+}
+
  /*
   * For accounting under irq disable, no need for increment preempt count.
   */
@@ -117,6 +136,12 @@ struct mem_cgroup_per_zone {
         unsigned long           count[NR_LRU_LISTS];
  
         struct zone_reclaim_stat reclaim_stat;
+       struct rb_node          tree_node;      /* RB tree node */
+       unsigned long long      usage_in_excess;/* Set to the value by which */
+                                               /* the soft limit is exceeded*/
+       bool                    on_tree;
+       struct mem_cgroup       *mem;           /* Back pointer, we cannot */
+                                               /* use container_of        */
  };
  /* Macro for accessing counter */
  #define MEM_CGROUP_ZSTAT(mz, idx)      ((mz)->count[(idx)])
@@ -129,6 +154,26 @@ struct mem_cgroup_lru_info {
         struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES];
  };
  
+/*
+ * Cgroups above their limits are maintained in a RB-Tree, independent of
+ * their hierarchy representation
+ */
+
+struct mem_cgroup_tree_per_zone {
+       struct rb_root rb_root;
+       spinlock_t lock;
+};
+
+struct mem_cgroup_tree_per_node {
+       struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
+};
+
+struct mem_cgroup_tree {
+       struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
+};
+
+static struct mem_cgroup_tree soft_limit_tree __read_mostly;
+
  /*
   * The memory controller data structure. The memory controller controls both
   * page cache and RSS per cgroup. We would eventually like to provide
@@ -186,6 +231,13 @@ struct mem_cgroup {
         struct mem_cgroup_stat stat;
  };
  
+/*
+ * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
+ * limit reclaim to prevent infinite loops, if they ever occur.
+ */
+#define        MEM_CGROUP_MAX_RECLAIM_LOOPS            (100)
+#define        MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS (2)
+
  enum charge_type {
         MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
         MEM_CGROUP_CHARGE_TYPE_MAPPED,
@@ -200,13 +252,8 @@ enum charge_type {
  #define PCGF_CACHE     (1UL << PCG_CACHE)
  #define PCGF_USED      (1UL << PCG_USED)
  #define PCGF_LOCK      (1UL << PCG_LOCK)
-static const unsigned long
-pcg_default_flags[NR_CHARGE_TYPE] = {
-       PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */
-       PCGF_USED | PCGF_LOCK, /* Anon */
-       PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
-       0, /* FORCE */
-};
+/* Not used, but added here for completeness */
+#define PCGF_ACCT      (1UL << PCG_ACCT)
  
  /* for encoding cft->private value on file */
  #define _MEM                   (0)
@@ -215,15 +262,241 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
  #define MEMFILE_TYPE(val)      (((val) >> 16) & 0xffff)
  #define MEMFILE_ATTR(val)      ((val) & 0xffff)
  
+/*
+ * Reclaim flags for mem_cgroup_hierarchical_reclaim
+ */
+#define MEM_CGROUP_RECLAIM_NOSWAP_BIT  0x0
+#define MEM_CGROUP_RECLAIM_NOSWAP      (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
+#define MEM_CGROUP_RECLAIM_SHRINK_BIT  0x1
+#define MEM_CGROUP_RECLAIM_SHRINK      (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
+#define MEM_CGROUP_RECLAIM_SOFT_BIT    0x2
+#define MEM_CGROUP_RECLAIM_SOFT                (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
+
  static void mem_cgroup_get(struct mem_cgroup *mem);
  static void mem_cgroup_put(struct mem_cgroup *mem);
  static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
  
+static struct mem_cgroup_per_zone *
+mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
+{
+       return &mem->info.nodeinfo[nid]->zoneinfo[zid];
+}
+
+static struct mem_cgroup_per_zone *
+page_cgroup_zoneinfo(struct page_cgroup *pc)
+{
+       struct mem_cgroup *mem = pc->mem_cgroup;
+       int nid = page_cgroup_nid(pc);
+       int zid = page_cgroup_zid(pc);
+
+       if (!mem)
+               return NULL;
+
+       return mem_cgroup_zoneinfo(mem, nid, zid);
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_node_zone(int nid, int zid)
+{
+       return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_from_page(struct page *page)
+{
+       int nid = page_to_nid(page);
+       int zid = page_zonenum(page);
+
+       return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static void
+__mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+                               struct mem_cgroup_per_zone *mz,
+                               struct mem_cgroup_tree_per_zone *mctz)
+{
+       struct rb_node **p = &mctz->rb_root.rb_node;
+       struct rb_node *parent = NULL;
+       struct mem_cgroup_per_zone *mz_node;
+
+       if (mz->on_tree)
+               return;
+
+       mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res);
+       while (*p) {
+               parent = *p;
+               mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
+                                       tree_node);
+               if (mz->usage_in_excess < mz_node->usage_in_excess)
+                       p = &(*p)->rb_left;
+               /*
+                * We can't avoid mem cgroups that are over their soft
+                * limit by the same amount
+                */
+               else if (mz->usage_in_excess >= mz_node->usage_in_excess)
+                       p = &(*p)->rb_right;
+       }
+       rb_link_node(&mz->tree_node, parent, p);
+       rb_insert_color(&mz->tree_node, &mctz->rb_root);
+       mz->on_tree = true;
+}
+
+static void
+__mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+                               struct mem_cgroup_per_zone *mz,
+                               struct mem_cgroup_tree_per_zone *mctz)
+{
+       if (!mz->on_tree)
+               return;
+       rb_erase(&mz->tree_node, &mctz->rb_root);
+       mz->on_tree = false;
+}
+
+static void
+mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+                               struct mem_cgroup_per_zone *mz,
+                               struct mem_cgroup_tree_per_zone *mctz)
+{
+       spin_lock(&mctz->lock);
+       __mem_cgroup_insert_exceeded(mem, mz, mctz);
+       spin_unlock(&mctz->lock);
+}
+
+static void
+mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+                               struct mem_cgroup_per_zone *mz,
+                               struct mem_cgroup_tree_per_zone *mctz)
+{
+       spin_lock(&mctz->lock);
+       __mem_cgroup_remove_exceeded(mem, mz, mctz);
+       spin_unlock(&mctz->lock);
+}
+
+static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
+{
+       bool ret = false;
+       int cpu;
+       s64 val;
+       struct mem_cgroup_stat_cpu *cpustat;
+
+       cpu = get_cpu();
+       cpustat = &mem->stat.cpustat[cpu];
+       val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_EVENTS);
+       if (unlikely(val > SOFTLIMIT_EVENTS_THRESH)) {
+               __mem_cgroup_stat_reset_safe(cpustat, MEM_CGROUP_STAT_EVENTS);
+               ret = true;
+       }
+       put_cpu();
+       return ret;
+}
+
+static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
+{
+       unsigned long long prev_usage_in_excess, new_usage_in_excess;
+       bool updated_tree = false;
+       struct mem_cgroup_per_zone *mz;
+       struct mem_cgroup_tree_per_zone *mctz;
+
+       mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page));
+       mctz = soft_limit_tree_from_page(page);
+
+       /*
+        * We do updates in lazy mode, mem's are removed
+        * lazily from the per-zone, per-node rb tree
+        */
+       prev_usage_in_excess = mz->usage_in_excess;
+
+       new_usage_in_excess = res_counter_soft_limit_excess(&mem->res);
+       if (prev_usage_in_excess) {
+               mem_cgroup_remove_exceeded(mem, mz, mctz);
+               updated_tree = true;
+       }
+       if (!new_usage_in_excess)
+               goto done;
+       mem_cgroup_insert_exceeded(mem, mz, mctz);
+
+done:
+       if (updated_tree) {
+               spin_lock(&mctz->lock);
+               mz->usage_in_excess = new_usage_in_excess;
+               spin_unlock(&mctz->lock);
+       }
+}
+
+static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
+{
+       int node, zone;
+       struct mem_cgroup_per_zone *mz;
+       struct mem_cgroup_tree_per_zone *mctz;
+
+       for_each_node_state(node, N_POSSIBLE) {
+               for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+                       mz = mem_cgroup_zoneinfo(mem, node, zone);
+                       mctz = soft_limit_tree_node_zone(node, zone);
+                       mem_cgroup_remove_exceeded(mem, mz, mctz);
+               }
+       }
+}
+
+static inline unsigned long mem_cgroup_get_excess(struct mem_cgroup *mem)
+{
+       return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT;
+}
+
+static struct mem_cgroup_per_zone *
+__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+       struct rb_node *rightmost = NULL;
+       struct mem_cgroup_per_zone *mz = NULL;
+
+retry:
+       rightmost = rb_last(&mctz->rb_root);
+       if (!rightmost)
+               goto done;              /* Nothing to reclaim from */
+
+       mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
+       /*
+        * Remove the node now but someone else can add it back,
+        * we will to add it back at the end of reclaim to its correct
+        * position in the tree.
+        */
+       __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
+       if (!res_counter_soft_limit_excess(&mz->mem->res) ||
+               !css_tryget(&mz->mem->css))
+               goto retry;
+done:
+       return mz;
+}
+
+static struct mem_cgroup_per_zone *
+mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+       struct mem_cgroup_per_zone *mz;
+
+       spin_lock(&mctz->lock);
+       mz = __mem_cgroup_largest_soft_limit_node(mctz);
+       spin_unlock(&mctz->lock);
+       return mz;
+}
+
+static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
+                                        bool charge)
+{
+       int val = (charge) ? 1 : -1;
+       struct mem_cgroup_stat *stat = &mem->stat;
+       struct mem_cgroup_stat_cpu *cpustat;
+       int cpu = get_cpu();
+
+       cpustat = &stat->cpustat[cpu];
+       __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val);
+       put_cpu();
+}
+
  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
                                          struct page_cgroup *pc,
                                          bool charge)
  {
-       int val = (charge)? 1 : -1;
+       int val = (charge) ? 1 : -1;
         struct mem_cgroup_stat *stat = &mem->stat;
         struct mem_cgroup_stat_cpu *cpustat;
         int cpu = get_cpu();
@@ -240,28 +513,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
         else
                 __mem_cgroup_stat_add_safe(cpustat,
                                 MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
+       __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_EVENTS, 1);
         put_cpu();
  }
  
-static struct mem_cgroup_per_zone *
-mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
-{
-       return &mem->info.nodeinfo[nid]->zoneinfo[zid];
-}
-
-static struct mem_cgroup_per_zone *
-page_cgroup_zoneinfo(struct page_cgroup *pc)
-{
-       struct mem_cgroup *mem = pc->mem_cgroup;
-       int nid = page_cgroup_nid(pc);
-       int zid = page_cgroup_zid(pc);
-
-       if (!mem)
-               return NULL;
-
-       return mem_cgroup_zoneinfo(mem, nid, zid);
-}
-
  static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
                                         enum lru_list idx)
  {
@@ -354,6 +609,11 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
         return ret;
  }
  
+static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
+{
+       return (mem == root_mem_cgroup);
+}
+
  /*
   * Following LRU functions are allowed to be used without PCG_LOCK.
   * Operations are called by routine of global LRU independently from memcg.
@@ -371,22 +631,24 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
  void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
  {
         struct page_cgroup *pc;
-       struct mem_cgroup *mem;
         struct mem_cgroup_per_zone *mz;
  
         if (mem_cgroup_disabled())
                 return;
         pc = lookup_page_cgroup(page);
         /* can happen while we handle swapcache. */
-       if (list_empty(&pc->lru) || !pc->mem_cgroup)
+       if (!TestClearPageCgroupAcctLRU(pc))
                 return;
+       VM_BUG_ON(!pc->mem_cgroup);
         /*
          * We don't check PCG_USED bit. It's cleared when the "page" is finally
          * removed from global LRU.
          */
         mz = page_cgroup_zoneinfo(pc);
-       mem = pc->mem_cgroup;
         MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+       if (mem_cgroup_is_root(pc->mem_cgroup))
+               return;
+       VM_BUG_ON(list_empty(&pc->lru));
         list_del_init(&pc->lru);
         return;
  }
@@ -410,8 +672,8 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
          * For making pc->mem_cgroup visible, insert smp_rmb() here.
          */
         smp_rmb();
-       /* unused page is not rotated. */
-       if (!PageCgroupUsed(pc))
+       /* unused or root page is not rotated. */
+       if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup))
                 return;
         mz = page_cgroup_zoneinfo(pc);
         list_move(&pc->lru, &mz->lists[lru]);
@@ -425,6 +687,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
         if (mem_cgroup_disabled())
                 return;
         pc = lookup_page_cgroup(page);
+       VM_BUG_ON(PageCgroupAcctLRU(pc));
         /*
          * Used bit is set without atomic ops but after smp_wmb().
          * For making pc->mem_cgroup visible, insert smp_rmb() here.
@@ -435,6 +698,9 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
  
         mz = page_cgroup_zoneinfo(pc);
         MEM_CGROUP_ZSTAT(mz, lru) += 1;
+       SetPageCgroupAcctLRU(pc);
+       if (mem_cgroup_is_root(pc->mem_cgroup))
+               return;
         list_add(&pc->lru, &mz->lists[lru]);
  }
  
@@ -469,7 +735,7 @@ static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page)
  
         spin_lock_irqsave(&zone->lru_lock, flags);
         /* link when the page is linked to LRU but page_cgroup isn't */
-       if (PageLRU(page) && list_empty(&pc->lru))
+       if (PageLRU(page) && !PageCgroupAcctLRU(pc))
                 mem_cgroup_add_lru_list(page, page_lru(page));
         spin_unlock_irqrestore(&zone->lru_lock, flags);
  }
@@ -855,28 +1121,62 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
   * If shrink==true, for avoiding to free too much, this returns immedieately.
   */
  static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
-                                  gfp_t gfp_mask, bool noswap, bool shrink)
+                                               struct zone *zone,
+                                               gfp_t gfp_mask,
+                                               unsigned long reclaim_options)
  {
         struct mem_cgroup *victim;
         int ret, total = 0;
         int loop = 0;
+       bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
+       bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
+       bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
+       unsigned long excess = mem_cgroup_get_excess(root_mem);
  
         /* If memsw_is_minimum==1, swap-out is of-no-use. */
         if (root_mem->memsw_is_minimum)
                 noswap = true;
  
-       while (loop < 2) {
+       while (1) {
                 victim = mem_cgroup_select_victim(root_mem);
-               if (victim == root_mem)
+               if (victim == root_mem) {
                         loop++;
+                       if (loop >= 2) {
+                               /*
+                                * If we have not been able to reclaim
+                                * anything, it might because there are
+                                * no reclaimable pages under this hierarchy
+                                */
+                               if (!check_soft || !total) {
+                                       css_put(&victim->css);
+                                       break;
+                               }
+                               /*
+                                * We want to do more targetted reclaim.
+                                * excess >> 2 is not to excessive so as to
+                                * reclaim too much, nor too less that we keep
+                                * coming back to reclaim from this cgroup
+                                */
+                               if (total >= (excess >> 2) ||
+                                       (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) {
+                                       css_put(&victim->css);
+                                       break;
+                               }
+                       }
+               }
                 if (!mem_cgroup_local_usage(&victim->stat)) {
                         /* this cgroup's local usage == 0 */
                         css_put(&victim->css);
                         continue;
                 }
                 /* we use swappiness of local cgroup */
-               ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap,
-                                                  get_swappiness(victim));
+               if (check_soft)
+                       ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
+                               noswap, get_swappiness(victim), zone,
+                               zone->zone_pgdat->node_id);
+               else
+                       ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
+                                               noswap, get_swappiness(victim));
                 css_put(&victim->css);
                 /*
                  * At shrinking usage, we can't check we should stop here or
@@ -886,7 +1186,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
                 if (shrink)
                         return ret;
                 total += ret;
-               if (mem_cgroup_check_under_limit(root_mem))
+               if (check_soft) {
+                       if (res_counter_check_under_soft_limit(&root_mem->res))
+                               return total;
+               } else if (mem_cgroup_check_under_limit(root_mem))
                         return 1 + total;
         }
         return total;
@@ -965,11 +1268,11 @@ done:
   */
  static int __mem_cgroup_try_charge(struct mm_struct *mm,
                         gfp_t gfp_mask, struct mem_cgroup **memcg,
-                       bool oom)
+                       bool oom, struct page *page)
  {
-       struct mem_cgroup *mem, *mem_over_limit;
+       struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit;
         int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
-       struct res_counter *fail_res;
+       struct res_counter *fail_res, *soft_fail_res = NULL;
  
         if (unlikely(test_thread_flag(TIF_MEMDIE))) {
                 /* Don't account this! */
@@ -996,20 +1299,23 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
         VM_BUG_ON(css_is_removed(&mem->css));
  
         while (1) {
-               int ret;
-               bool noswap = false;
+               int ret = 0;
+               unsigned long flags = 0;
  
-               ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
+               if (mem_cgroup_is_root(mem))
+                       goto done;
+               ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
+                                               &soft_fail_res);
                 if (likely(!ret)) {
                         if (!do_swap_account)
                                 break;
                         ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
-                                                       &fail_res);
+                                                       &fail_res, NULL);
                         if (likely(!ret))
                                 break;
                         /* mem+swap counter fails */
-                       res_counter_uncharge(&mem->res, PAGE_SIZE);
-                       noswap = true;
+                       res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+                       flags |= MEM_CGROUP_RECLAIM_NOSWAP;
                         mem_over_limit = mem_cgroup_from_res_counter(fail_res,
                                                                         memsw);
                 } else
@@ -1020,8 +1326,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
                 if (!(gfp_mask & __GFP_WAIT))
                         goto nomem;
  
-               ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
-                                                       noswap, false);
+               ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
+                                               gfp_mask, flags);
                 if (ret)
                         continue;
  
@@ -1046,13 +1352,24 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
                         goto nomem;
                 }
         }
+       /*
+        * Insert just the ancestor, we should trickle down to the correct
+        * cgroup for reclaim, since the other nodes will be below their
+        * soft limit
+        */
+       if (soft_fail_res) {
+               mem_over_soft_limit =
+                       mem_cgroup_from_res_counter(soft_fail_res, res);
+               if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
+                       mem_cgroup_update_tree(mem_over_soft_limit, page);
+       }
+done:
         return 0;
  nomem:
         css_put(&mem->css);
         return -ENOMEM;
  }
  
-
  /*
   * A helper function to get mem_cgroup from ID. must be called under
   * rcu_read_lock(). The caller must check css_is_removed() or some if
@@ -1119,15 +1436,38 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
         lock_page_cgroup(pc);
         if (unlikely(PageCgroupUsed(pc))) {
                 unlock_page_cgroup(pc);
-               res_counter_uncharge(&mem->res, PAGE_SIZE);
-               if (do_swap_account)
-                       res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+               if (!mem_cgroup_is_root(mem)) {
+                       res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+                       if (do_swap_account)
+                               res_counter_uncharge(&mem->memsw, PAGE_SIZE,
+                                                       NULL);
+               }
                 css_put(&mem->css);
                 return;
         }
+
         pc->mem_cgroup = mem;
+       /*
+        * We access a page_cgroup asynchronously without lock_page_cgroup().
+        * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
+        * is accessed after testing USED bit. To make pc->mem_cgroup visible
+        * before USED bit, we need memory barrier here.
+        * See mem_cgroup_add_lru_list(), etc.
+        */
         smp_wmb();
-       pc->flags = pcg_default_flags[ctype];
+       switch (ctype) {
+       case MEM_CGROUP_CHARGE_TYPE_CACHE:
+       case MEM_CGROUP_CHARGE_TYPE_SHMEM:
+               SetPageCgroupCache(pc);
+               SetPageCgroupUsed(pc);
+               break;
+       case MEM_CGROUP_CHARGE_TYPE_MAPPED:
+               ClearPageCgroupCache(pc);
+               SetPageCgroupUsed(pc);
+               break;
+       default:
+               break;
+       }
  
         mem_cgroup_charge_statistics(mem, pc, true);
  
@@ -1178,7 +1518,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
         if (pc->mem_cgroup != from)
                 goto out;
  
-       res_counter_uncharge(&from->res, PAGE_SIZE);
+       if (!mem_cgroup_is_root(from))
+               res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
         mem_cgroup_charge_statistics(from, pc, false);
  
         page = pc->page;
@@ -1197,8 +1538,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
                                                 1);
         }
  
-       if (do_swap_account)
-               res_counter_uncharge(&from->memsw, PAGE_SIZE);
+       if (do_swap_account && !mem_cgroup_is_root(from))
+               res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
         css_put(&from->css);
  
         css_get(&to->css);
@@ -1238,7 +1579,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
         parent = mem_cgroup_from_cont(pcg);
  
  
-       ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
+       ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page);
         if (ret || !parent)
                 return ret;
  
@@ -1268,9 +1609,11 @@ uncharge:
         /* drop extra refcnt by try_charge() */
         css_put(&parent->css);
         /* uncharge if move fails */
-       res_counter_uncharge(&parent->res, PAGE_SIZE);
-       if (do_swap_account)
-               res_counter_uncharge(&parent->memsw, PAGE_SIZE);
+       if (!mem_cgroup_is_root(parent)) {
+               res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
+               if (do_swap_account)
+                       res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+       }
         return ret;
  }
  
@@ -1295,7 +1638,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
         prefetchw(pc);
  
         mem = memcg;
-       ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
+       ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page);
         if (ret || !mem)
                 return ret;
  
@@ -1414,14 +1757,14 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
         if (!mem)
                 goto charge_cur_mm;
         *ptr = mem;
-       ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
+       ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page);
         /* drop extra refcnt from tryget */
         css_put(&mem->css);
         return ret;
  charge_cur_mm:
         if (unlikely(!mm))
                 mm = &init_mm;
-       return __mem_cgroup_try_charge(mm, mask, ptr, true);
+       return __mem_cgroup_try_charge(mm, mask, ptr, true, page);
  }
  
  static void
@@ -1459,7 +1802,10 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
                          * This recorded memcg can be obsolete one. So, avoid
                          * calling css_tryget
                          */
-                       res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+                       if (!mem_cgroup_is_root(memcg))
+                               res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
+                                                       NULL);
+                       mem_cgroup_swap_statistics(memcg, false);
                         mem_cgroup_put(memcg);
                 }
                 rcu_read_unlock();
@@ -1484,9 +1830,11 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
                 return;
         if (!mem)
                 return;
-       res_counter_uncharge(&mem->res, PAGE_SIZE);
-       if (do_swap_account)
-               res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+       if (!mem_cgroup_is_root(mem)) {
+               res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+               if (do_swap_account)
+                       res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+       }
         css_put(&mem->css);
  }
  
@@ -1500,6 +1848,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
         struct page_cgroup *pc;
         struct mem_cgroup *mem = NULL;
         struct mem_cgroup_per_zone *mz;
+       bool soft_limit_excess = false;
  
         if (mem_cgroup_disabled())
                 return NULL;
@@ -1538,9 +1887,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
                 break;
         }
  
-       res_counter_uncharge(&mem->res, PAGE_SIZE);
-       if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
-               res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+       if (!mem_cgroup_is_root(mem)) {
+               res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
+               if (do_swap_account &&
+                               (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
+                       res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+       }
+       if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
+               mem_cgroup_swap_statistics(mem, true);
         mem_cgroup_charge_statistics(mem, pc, false);
  
         ClearPageCgroupUsed(pc);
@@ -1554,6 +1908,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
         mz = page_cgroup_zoneinfo(pc);
         unlock_page_cgroup(pc);
  
+       if (soft_limit_excess && mem_cgroup_soft_limit_check(mem))
+               mem_cgroup_update_tree(mem, page);
         /* at swapout, this memcg will be accessed to record to swap */
         if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
                 css_put(&mem->css);
@@ -1629,7 +1985,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
                  * We uncharge this because swap is freed.
                  * This memcg can be obsolete one. We avoid calling css_tryget
                  */
-               res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+               if (!mem_cgroup_is_root(memcg))
+                       res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+               mem_cgroup_swap_statistics(memcg, false);
                 mem_cgroup_put(memcg);
         }
         rcu_read_unlock();
@@ -1658,7 +2016,8 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
         unlock_page_cgroup(pc);
  
         if (mem) {
-               ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
+               ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false,
+                                               page);
                 css_put(&mem->css);
         }
         *ptr = mem;
@@ -1798,8 +2157,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
                 if (!ret)
                         break;
  
-               progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
-                                                  false, true);
+               progress = mem_cgroup_hierarchical_reclaim(memcg, NULL,
+                                               GFP_KERNEL,
+                                               MEM_CGROUP_RECLAIM_SHRINK);
                 curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
                 /* Usage is reduced ? */
                 if (curusage >= oldusage)
@@ -1851,7 +2211,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
                 if (!ret)
                         break;
  
-               mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true);
+               mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
+                                               MEM_CGROUP_RECLAIM_NOSWAP |
+                                               MEM_CGROUP_RECLAIM_SHRINK);
                 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
                 /* Usage is reduced ? */
                 if (curusage >= oldusage)
@@ -1862,6 +2224,97 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
         return ret;
  }
  
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+                                               gfp_t gfp_mask, int nid,
+                                               int zid)
+{
+       unsigned long nr_reclaimed = 0;
+       struct mem_cgroup_per_zone *mz, *next_mz = NULL;
+       unsigned long reclaimed;
+       int loop = 0;
+       struct mem_cgroup_tree_per_zone *mctz;
+
+       if (order > 0)
+               return 0;
+
+       mctz = soft_limit_tree_node_zone(nid, zid);
+       /*
+        * This loop can run a while, specially if mem_cgroup's continuously
+        * keep exceeding their soft limit and putting the system under
+        * pressure
+        */
+       do {
+               if (next_mz)
+                       mz = next_mz;
+               else
+                       mz = mem_cgroup_largest_soft_limit_node(mctz);
+               if (!mz)
+                       break;
+
+               reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
+                                               gfp_mask,
+                                               MEM_CGROUP_RECLAIM_SOFT);
+               nr_reclaimed += reclaimed;
+               spin_lock(&mctz->lock);
+
+               /*
+                * If we failed to reclaim anything from this memory cgroup
+                * it is time to move on to the next cgroup
+                */
+               next_mz = NULL;
+               if (!reclaimed) {
+                       do {
+                               /*
+                                * Loop until we find yet another one.
+                                *
+                                * By the time we get the soft_limit lock
+                                * again, someone might have aded the
+                                * group back on the RB tree. Iterate to
+                                * make sure we get a different mem.
+                                * mem_cgroup_largest_soft_limit_node returns
+                                * NULL if no other cgroup is present on
+                                * the tree
+                                */
+                               next_mz =
+                               __mem_cgroup_largest_soft_limit_node(mctz);
+                               if (next_mz == mz) {
+                                       css_put(&next_mz->mem->css);
+                                       next_mz = NULL;
+                               } else /* next_mz == NULL or other memcg */
+                                       break;
+                       } while (1);
+               }
+               mz->usage_in_excess =
+                       res_counter_soft_limit_excess(&mz->mem->res);
+               __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
+               /*
+                * One school of thought says that we should not add
+                * back the node to the tree if reclaim returns 0.
+                * But our reclaim could return 0, simply because due
+                * to priority we are exposing a smaller subset of
+                * memory to reclaim from. Consider this as a longer
+                * term TODO.
+                */
+               if (mz->usage_in_excess)
+                       __mem_cgroup_insert_exceeded(mz->mem, mz, mctz);
+               spin_unlock(&mctz->lock);
+               css_put(&mz->mem->css);
+               loop++;
+               /*
+                * Could not reclaim anything and there are no more
+                * mem cgroups to try or we seem to be looping without
+                * reclaiming anything.
+                */
+               if (!nr_reclaimed &&
+                       (next_mz == NULL ||
+                       loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
+                       break;
+       } while (!nr_reclaimed);
+       if (next_mz)
+               css_put(&next_mz->mem->css);
+       return nr_reclaimed;
+}
+
  /*
   * This routine traverse page_cgroup in given list and drop them all.
   * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
@@ -2046,20 +2499,64 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
         return retval;
  }
  
+struct mem_cgroup_idx_data {
+       s64 val;
+       enum mem_cgroup_stat_index idx;
+};
+
+static int
+mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data)
+{
+       struct mem_cgroup_idx_data *d = data;
+       d->val += mem_cgroup_read_stat(&mem->stat, d->idx);
+       return 0;
+}
+
+static void
+mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem,
+                               enum mem_cgroup_stat_index idx, s64 *val)
+{
+       struct mem_cgroup_idx_data d;
+       d.idx = idx;
+       d.val = 0;
+       mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat);
+       *val = d.val;
+}
+
  static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
  {
         struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
-       u64 val = 0;
+       u64 idx_val, val;
         int type, name;
  
         type = MEMFILE_TYPE(cft->private);
         name = MEMFILE_ATTR(cft->private);
         switch (type) {
         case _MEM:
-               val = res_counter_read_u64(&mem->res, name);
+               if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+                       mem_cgroup_get_recursive_idx_stat(mem,
+                               MEM_CGROUP_STAT_CACHE, &idx_val);
+                       val = idx_val;
+                       mem_cgroup_get_recursive_idx_stat(mem,
+                               MEM_CGROUP_STAT_RSS, &idx_val);
+                       val += idx_val;
+                       val <<= PAGE_SHIFT;
+               } else
+                       val = res_counter_read_u64(&mem->res, name);
                 break;
         case _MEMSWAP:
-               val = res_counter_read_u64(&mem->memsw, name);
+               if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+                       mem_cgroup_get_recursive_idx_stat(mem,
+                               MEM_CGROUP_STAT_CACHE, &idx_val);
+                       val = idx_val;
+                       mem_cgroup_get_recursive_idx_stat(mem,
+                               MEM_CGROUP_STAT_RSS, &idx_val);
+                       val += idx_val;
+                       mem_cgroup_get_recursive_idx_stat(mem,
+                               MEM_CGROUP_STAT_SWAPOUT, &idx_val);
+                       val <<= PAGE_SHIFT;
+               } else
+                       val = res_counter_read_u64(&mem->memsw, name);
                 break;
         default:
                 BUG();
@@ -2083,6 +2580,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
         name = MEMFILE_ATTR(cft->private);
         switch (name) {
         case RES_LIMIT:
+               if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
+                       ret = -EINVAL;
+                       break;
+               }
                 /* This function does all necessary parse...reuse it */
                 ret = res_counter_memparse_write_strategy(buffer, &val);
                 if (ret)
@@ -2092,6 +2593,20 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
                 else
                         ret = mem_cgroup_resize_memsw_limit(memcg, val);
                 break;
+       case RES_SOFT_LIMIT:
+               ret = res_counter_memparse_write_strategy(buffer, &val);
+               if (ret)
+                       break;
+               /*
+                * For memsw, soft limits are hard to implement in terms
+                * of semantics, for now, we support soft limits for
+                * control without swap
+                */
+               if (type == _MEM)
+                       ret = res_counter_set_soft_limit(&memcg->res, val);
+               else
+                       ret = -EINVAL;
+               break;
         default:
                 ret = -EINVAL; /* should be BUG() ? */
                 break;
@@ -2149,6 +2664,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
                         res_counter_reset_failcnt(&mem->memsw);
                 break;
         }
+
         return 0;
  }
  
@@ -2160,6 +2676,7 @@ enum {
         MCS_MAPPED_FILE,
         MCS_PGPGIN,
         MCS_PGPGOUT,
+       MCS_SWAP,
         MCS_INACTIVE_ANON,
         MCS_ACTIVE_ANON,
         MCS_INACTIVE_FILE,
@@ -2181,6 +2698,7 @@ struct {
         {"mapped_file", "total_mapped_file"},
         {"pgpgin", "total_pgpgin"},
         {"pgpgout", "total_pgpgout"},
+       {"swap", "total_swap"},
         {"inactive_anon", "total_inactive_anon"},
         {"active_anon", "total_active_anon"},
         {"inactive_file", "total_inactive_file"},
@@ -2205,6 +2723,10 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
         s->stat[MCS_PGPGIN] += val;
         val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
         s->stat[MCS_PGPGOUT] += val;
+       if (do_swap_account) {
+               val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT);
+               s->stat[MCS_SWAP] += val * PAGE_SIZE;
+       }
  
         /* per zone stat */
         val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);
@@ -2236,8 +2758,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
         memset(&mystat, 0, sizeof(mystat));
         mem_cgroup_get_local_stat(mem_cont, &mystat);
  
-       for (i = 0; i < NR_MCS_STAT; i++)
+       for (i = 0; i < NR_MCS_STAT; i++) {
+               if (i == MCS_SWAP && !do_swap_account)
+                       continue;
                 cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]);
+       }
  
         /* Hierarchical information */
         {
@@ -2250,9 +2775,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
  
         memset(&mystat, 0, sizeof(mystat));
         mem_cgroup_get_total_stat(mem_cont, &mystat);
-       for (i = 0; i < NR_MCS_STAT; i++)
+       for (i = 0; i < NR_MCS_STAT; i++) {
+               if (i == MCS_SWAP && !do_swap_account)
+                       continue;
                 cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]);
-
+       }
  
  #ifdef CONFIG_DEBUG_VM
         cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
@@ -2344,6 +2871,12 @@ static struct cftype mem_cgroup_files[] = {
                 .write_string = mem_cgroup_write,
                 .read_u64 = mem_cgroup_read,
         },
+       {
+               .name = "soft_limit_in_bytes",
+               .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
+               .write_string = mem_cgroup_write,
+               .read_u64 = mem_cgroup_read,
+       },
         {
                 .name = "failcnt",
                 .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
@@ -2438,6 +2971,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
                 mz = &pn->zoneinfo[zone];
                 for_each_lru(l)
                         INIT_LIST_HEAD(&mz->lists[l]);
+               mz->usage_in_excess = 0;
+               mz->on_tree = false;
+               mz->mem = mem;
         }
         return 0;
  }
@@ -2483,6 +3019,7 @@ static void __mem_cgroup_free(struct mem_cgroup *mem)
  {
         int node;
  
+       mem_cgroup_remove_from_trees(mem);
         free_css_id(&mem_cgroup_subsys, &mem->css);
  
         for_each_node_state(node, N_POSSIBLE)
@@ -2531,6 +3068,31 @@ static void __init enable_swap_cgroup(void)
  }
  #endif
  
+static int mem_cgroup_soft_limit_tree_init(void)
+{
+       struct mem_cgroup_tree_per_node *rtpn;
+       struct mem_cgroup_tree_per_zone *rtpz;
+       int tmp, node, zone;
+
+       for_each_node_state(node, N_POSSIBLE) {
+               tmp = node;
+               if (!node_state(node, N_NORMAL_MEMORY))
+                       tmp = -1;
+               rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
+               if (!rtpn)
+                       return 1;
+
+               soft_limit_tree.rb_tree_per_node[node] = rtpn;
+
+               for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+                       rtpz = &rtpn->rb_tree_per_zone[zone];
+                       rtpz->rb_root = RB_ROOT;
+                       spin_lock_init(&rtpz->lock);
+               }
+       }
+       return 0;
+}
+
  static struct cgroup_subsys_state * __ref
  mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
  {
@@ -2545,10 +3107,15 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
         for_each_node_state(node, N_POSSIBLE)
                 if (alloc_mem_cgroup_per_zone_info(mem, node))
                         goto free_out;
+
         /* root ? */
         if (cont->parent == NULL) {
                 enable_swap_cgroup();
                 parent = NULL;
+               root_mem_cgroup = mem;
+               if (mem_cgroup_soft_limit_tree_init())
+                       goto free_out;
+
         } else {
                 parent = mem_cgroup_from_cont(cont->parent);
                 mem->use_hierarchy = parent->use_hierarchy;
@@ -2577,6 +3144,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
         return &mem->css;
  free_out:
         __mem_cgroup_free(mem);
+       root_mem_cgroup = NULL;
         return ERR_PTR(error);
  }
  
@@ -2612,7 +3180,8 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
  static void mem_cgroup_move_task(struct cgroup_subsys *ss,
                                 struct cgroup *cont,
                                 struct cgroup *old_cont,
-                               struct task_struct *p)
+                               struct task_struct *p,
+                               bool threadgroup)
  {
         mutex_lock(&memcg_tasklist);
         /*
diff --git a/mm/memory-failure.c b/mm/memory-failure.c

new file mode 100644 (file)

index 0000000..729d4b1
--- /dev/null
+++ b/mm/memory-failure.c
@@ -0,0 +1,832 @@
+/*
+ * Copyright (C) 2008, 2009 Intel Corporation
+ * Authors: Andi Kleen, Fengguang Wu
+ *
+ * This software may be redistributed and/or modified under the terms of
+ * the GNU General Public License ("GPL") version 2 only as published by the
+ * Free Software Foundation.
+ *
+ * High level machine check handler. Handles pages reported by the
+ * hardware as being corrupted usually due to a 2bit ECC memory or cache
+ * failure.
+ *
+ * Handles page cache pages in various states. The tricky part
+ * here is that we can access any page asynchronous to other VM
+ * users, because memory failures could happen anytime and anywhere,
+ * possibly violating some of their assumptions. This is why this code
+ * has to be extremely careful. Generally it tries to use normal locking
+ * rules, as in get the standard locks, even if that means the
+ * error handling takes potentially a long time.
+ *
+ * The operation to map back from RMAP chains to processes has to walk
+ * the complete process list and has non linear complexity with the number
+ * mappings. In short it can be quite slow. But since memory corruptions
+ * are rare we hope to get away with this.
+ */
+
+/*
+ * Notebook:
+ * - hugetlb needs more code
+ * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages
+ * - pass bad pages to kdump next kernel
+ */
+#define DEBUG 1                /* remove me in 2.6.34 */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/page-flags.h>
+#include <linux/sched.h>
+#include <linux/rmap.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/backing-dev.h>
+#include "internal.h"
+
+int sysctl_memory_failure_early_kill __read_mostly = 0;
+
+int sysctl_memory_failure_recovery __read_mostly = 1;
+
+atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0);
+
+/*
+ * Send all the processes who have the page mapped an ``action optional''
+ * signal.
+ */
+static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
+                       unsigned long pfn)
+{
+       struct siginfo si;
+       int ret;
+
+       printk(KERN_ERR
+               "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n",
+               pfn, t->comm, t->pid);
+       si.si_signo = SIGBUS;
+       si.si_errno = 0;
+       si.si_code = BUS_MCEERR_AO;
+       si.si_addr = (void *)addr;
+#ifdef __ARCH_SI_TRAPNO
+       si.si_trapno = trapno;
+#endif
+       si.si_addr_lsb = PAGE_SHIFT;
+       /*
+        * Don't use force here, it's convenient if the signal
+        * can be temporarily blocked.
+        * This could cause a loop when the user sets SIGBUS
+        * to SIG_IGN, but hopefully noone will do that?
+        */
+       ret = send_sig_info(SIGBUS, &si, t);  /* synchronous? */
+       if (ret < 0)
+               printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
+                      t->comm, t->pid, ret);
+       return ret;
+}
+
+/*
+ * Kill all processes that have a poisoned page mapped and then isolate
+ * the page.
+ *
+ * General strategy:
+ * Find all processes having the page mapped and kill them.
+ * But we keep a page reference around so that the page is not
+ * actually freed yet.
+ * Then stash the page away
+ *
+ * There's no convenient way to get back to mapped processes
+ * from the VMAs. So do a brute-force search over all
+ * running processes.
+ *
+ * Remember that machine checks are not common (or rather
+ * if they are common you have other problems), so this shouldn't
+ * be a performance issue.
+ *
+ * Also there are some races possible while we get from the
+ * error detection to actually handle it.
+ */
+
+struct to_kill {
+       struct list_head nd;
+       struct task_struct *tsk;
+       unsigned long addr;
+       unsigned addr_valid:1;
+};
+
+/*
+ * Failure handling: if we can't find or can't kill a process there's
+ * not much we can do. We just print a message and ignore otherwise.
+ */
+
+/*
+ * Schedule a process for later kill.
+ * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
+ * TBD would GFP_NOIO be enough?
+ */
+static void add_to_kill(struct task_struct *tsk, struct page *p,
+                      struct vm_area_struct *vma,
+                      struct list_head *to_kill,
+                      struct to_kill **tkc)
+{
+       struct to_kill *tk;
+
+       if (*tkc) {
+               tk = *tkc;
+               *tkc = NULL;
+       } else {
+               tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
+               if (!tk) {
+                       printk(KERN_ERR
+               "MCE: Out of memory while machine check handling\n");
+                       return;
+               }
+       }
+       tk->addr = page_address_in_vma(p, vma);
+       tk->addr_valid = 1;
+
+       /*
+        * In theory we don't have to kill when the page was
+        * munmaped. But it could be also a mremap. Since that's
+        * likely very rare kill anyways just out of paranoia, but use
+        * a SIGKILL because the error is not contained anymore.
+        */
+       if (tk->addr == -EFAULT) {
+               pr_debug("MCE: Unable to find user space address %lx in %s\n",
+                       page_to_pfn(p), tsk->comm);
+               tk->addr_valid = 0;
+       }
+       get_task_struct(tsk);
+       tk->tsk = tsk;
+       list_add_tail(&tk->nd, to_kill);
+}
+
+/*
+ * Kill the processes that have been collected earlier.
+ *
+ * Only do anything when DOIT is set, otherwise just free the list
+ * (this is used for clean pages which do not need killing)
+ * Also when FAIL is set do a force kill because something went
+ * wrong earlier.
+ */
+static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
+                         int fail, unsigned long pfn)
+{
+       struct to_kill *tk, *next;
+
+       list_for_each_entry_safe (tk, next, to_kill, nd) {
+               if (doit) {
+                       /*
+                        * In case something went wrong with munmaping
+                        * make sure the process doesn't catch the
+                        * signal and then access the memory. Just kill it.
+                        * the signal handlers
+                        */
+                       if (fail || tk->addr_valid == 0) {
+                               printk(KERN_ERR
+               "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
+                                       pfn, tk->tsk->comm, tk->tsk->pid);
+                               force_sig(SIGKILL, tk->tsk);
+                       }
+
+                       /*
+                        * In theory the process could have mapped
+                        * something else on the address in-between. We could
+                        * check for that, but we need to tell the
+                        * process anyways.
+                        */
+                       else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
+                                             pfn) < 0)
+                               printk(KERN_ERR
+               "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
+                                       pfn, tk->tsk->comm, tk->tsk->pid);
+               }
+               put_task_struct(tk->tsk);
+               kfree(tk);
+       }
+}
+
+static int task_early_kill(struct task_struct *tsk)
+{
+       if (!tsk->mm)
+               return 0;
+       if (tsk->flags & PF_MCE_PROCESS)
+               return !!(tsk->flags & PF_MCE_EARLY);
+       return sysctl_memory_failure_early_kill;
+}
+
+/*
+ * Collect processes when the error hit an anonymous page.
+ */
+static void collect_procs_anon(struct page *page, struct list_head *to_kill,
+                             struct to_kill **tkc)
+{
+       struct vm_area_struct *vma;
+       struct task_struct *tsk;
+       struct anon_vma *av;
+
+       read_lock(&tasklist_lock);
+       av = page_lock_anon_vma(page);
+       if (av == NULL) /* Not actually mapped anymore */
+               goto out;
+       for_each_process (tsk) {
+               if (!task_early_kill(tsk))
+                       continue;
+               list_for_each_entry (vma, &av->head, anon_vma_node) {
+                       if (!page_mapped_in_vma(page, vma))
+                               continue;
+                       if (vma->vm_mm == tsk->mm)
+                               add_to_kill(tsk, page, vma, to_kill, tkc);
+               }
+       }
+       page_unlock_anon_vma(av);
+out:
+       read_unlock(&tasklist_lock);
+}
+
+/*
+ * Collect processes when the error hit a file mapped page.
+ */
+static void collect_procs_file(struct page *page, struct list_head *to_kill,
+                             struct to_kill **tkc)
+{
+       struct vm_area_struct *vma;
+       struct task_struct *tsk;
+       struct prio_tree_iter iter;
+       struct address_space *mapping = page->mapping;
+
+       /*
+        * A note on the locking order between the two locks.
+        * We don't rely on this particular order.
+        * If you have some other code that needs a different order
+        * feel free to switch them around. Or add a reverse link
+        * from mm_struct to task_struct, then this could be all
+        * done without taking tasklist_lock and looping over all tasks.
+        */
+
+       read_lock(&tasklist_lock);
+       spin_lock(&mapping->i_mmap_lock);
+       for_each_process(tsk) {
+               pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+               if (!task_early_kill(tsk))
+                       continue;
+
+               vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff,
+                                     pgoff) {
+                       /*
+                        * Send early kill signal to tasks where a vma covers
+                        * the page but the corrupted page is not necessarily
+                        * mapped it in its pte.
+                        * Assume applications who requested early kill want
+                        * to be informed of all such data corruptions.
+                        */
+                       if (vma->vm_mm == tsk->mm)
+                               add_to_kill(tsk, page, vma, to_kill, tkc);
+               }
+       }
+       spin_unlock(&mapping->i_mmap_lock);
+       read_unlock(&tasklist_lock);
+}
+
+/*
+ * Collect the processes who have the corrupted page mapped to kill.
+ * This is done in two steps for locking reasons.
+ * First preallocate one tokill structure outside the spin locks,
+ * so that we can kill at least one process reasonably reliable.
+ */
+static void collect_procs(struct page *page, struct list_head *tokill)
+{
+       struct to_kill *tk;
+
+       if (!page->mapping)
+               return;
+
+       tk = kmalloc(sizeof(struct to_kill), GFP_NOIO);
+       if (!tk)
+               return;
+       if (PageAnon(page))
+               collect_procs_anon(page, tokill, &tk);
+       else
+               collect_procs_file(page, tokill, &tk);
+       kfree(tk);
+}
+
+/*
+ * Error handlers for various types of pages.
+ */
+
+enum outcome {
+       FAILED,         /* Error handling failed */
+       DELAYED,        /* Will be handled later */
+       IGNORED,        /* Error safely ignored */
+       RECOVERED,      /* Successfully recovered */
+};
+
+static const char *action_name[] = {
+       [FAILED] = "Failed",
+       [DELAYED] = "Delayed",
+       [IGNORED] = "Ignored",
+       [RECOVERED] = "Recovered",
+};
+
+/*
+ * Error hit kernel page.
+ * Do nothing, try to be lucky and not touch this instead. For a few cases we
+ * could be more sophisticated.
+ */
+static int me_kernel(struct page *p, unsigned long pfn)
+{
+       return DELAYED;
+}
+
+/*
+ * Already poisoned page.
+ */
+static int me_ignore(struct page *p, unsigned long pfn)
+{
+       return IGNORED;
+}
+
+/*
+ * Page in unknown state. Do nothing.
+ */
+static int me_unknown(struct page *p, unsigned long pfn)
+{
+       printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn);
+       return FAILED;
+}
+
+/*
+ * Free memory
+ */
+static int me_free(struct page *p, unsigned long pfn)
+{
+       return DELAYED;
+}
+
+/*
+ * Clean (or cleaned) page cache page.
+ */
+static int me_pagecache_clean(struct page *p, unsigned long pfn)
+{
+       int err;
+       int ret = FAILED;
+       struct address_space *mapping;
+
+       if (!isolate_lru_page(p))
+               page_cache_release(p);
+
+       /*
+        * For anonymous pages we're done the only reference left
+        * should be the one m_f() holds.
+        */
+       if (PageAnon(p))
+               return RECOVERED;
+
+       /*
+        * Now truncate the page in the page cache. This is really
+        * more like a "temporary hole punch"
+        * Don't do this for block devices when someone else
+        * has a reference, because it could be file system metadata
+        * and that's not safe to truncate.
+        */
+       mapping = page_mapping(p);
+       if (!mapping) {
+               /*
+                * Page has been teared down in the meanwhile
+                */
+               return FAILED;
+       }
+
+       /*
+        * Truncation is a bit tricky. Enable it per file system for now.
+        *
+        * Open: to take i_mutex or not for this? Right now we don't.
+        */
+       if (mapping->a_ops->error_remove_page) {
+               err = mapping->a_ops->error_remove_page(mapping, p);
+               if (err != 0) {
+                       printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n",
+                                       pfn, err);
+               } else if (page_has_private(p) &&
+                               !try_to_release_page(p, GFP_NOIO)) {
+                       pr_debug("MCE %#lx: failed to release buffers\n", pfn);
+               } else {
+                       ret = RECOVERED;
+               }
+       } else {
+               /*
+                * If the file system doesn't support it just invalidate
+                * This fails on dirty or anything with private pages
+                */
+               if (invalidate_inode_page(p))
+                       ret = RECOVERED;
+               else
+                       printk(KERN_INFO "MCE %#lx: Failed to invalidate\n",
+                               pfn);
+       }
+       return ret;
+}
+
+/*
+ * Dirty cache page page
+ * Issues: when the error hit a hole page the error is not properly
+ * propagated.
+ */
+static int me_pagecache_dirty(struct page *p, unsigned long pfn)
+{
+       struct address_space *mapping = page_mapping(p);
+
+       SetPageError(p);
+       /* TBD: print more information about the file. */
+       if (mapping) {
+               /*
+                * IO error will be reported by write(), fsync(), etc.
+                * who check the mapping.
+                * This way the application knows that something went
+                * wrong with its dirty file data.
+                *
+                * There's one open issue:
+                *
+                * The EIO will be only reported on the next IO
+                * operation and then cleared through the IO map.
+                * Normally Linux has two mechanisms to pass IO error
+                * first through the AS_EIO flag in the address space
+                * and then through the PageError flag in the page.
+                * Since we drop pages on memory failure handling the
+                * only mechanism open to use is through AS_AIO.
+                *
+                * This has the disadvantage that it gets cleared on
+                * the first operation that returns an error, while
+                * the PageError bit is more sticky and only cleared
+                * when the page is reread or dropped.  If an
+                * application assumes it will always get error on
+                * fsync, but does other operations on the fd before
+                * and the page is dropped inbetween then the error
+                * will not be properly reported.
+                *
+                * This can already happen even without hwpoisoned
+                * pages: first on metadata IO errors (which only
+                * report through AS_EIO) or when the page is dropped
+                * at the wrong time.
+                *
+                * So right now we assume that the application DTRT on
+                * the first EIO, but we're not worse than other parts
+                * of the kernel.
+                */
+               mapping_set_error(mapping, EIO);
+       }
+
+       return me_pagecache_clean(p, pfn);
+}
+
+/*
+ * Clean and dirty swap cache.
+ *
+ * Dirty swap cache page is tricky to handle. The page could live both in page
+ * cache and swap cache(ie. page is freshly swapped in). So it could be
+ * referenced concurrently by 2 types of PTEs:
+ * normal PTEs and swap PTEs. We try to handle them consistently by calling
+ * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs,
+ * and then
+ *      - clear dirty bit to prevent IO
+ *      - remove from LRU
+ *      - but keep in the swap cache, so that when we return to it on
+ *        a later page fault, we know the application is accessing
+ *        corrupted data and shall be killed (we installed simple
+ *        interception code in do_swap_page to catch it).
+ *
+ * Clean swap cache pages can be directly isolated. A later page fault will
+ * bring in the known good data from disk.
+ */
+static int me_swapcache_dirty(struct page *p, unsigned long pfn)
+{
+       int ret = FAILED;
+
+       ClearPageDirty(p);
+       /* Trigger EIO in shmem: */
+       ClearPageUptodate(p);
+
+       if (!isolate_lru_page(p)) {
+               page_cache_release(p);
+               ret = DELAYED;
+       }
+
+       return ret;
+}
+
+static int me_swapcache_clean(struct page *p, unsigned long pfn)
+{
+       int ret = FAILED;
+
+       if (!isolate_lru_page(p)) {
+               page_cache_release(p);
+               ret = RECOVERED;
+       }
+       delete_from_swap_cache(p);
+       return ret;
+}
+
+/*
+ * Huge pages. Needs work.
+ * Issues:
+ * No rmap support so we cannot find the original mapper. In theory could walk
+ * all MMs and look for the mappings, but that would be non atomic and racy.
+ * Need rmap for hugepages for this. Alternatively we could employ a heuristic,
+ * like just walking the current process and hoping it has it mapped (that
+ * should be usually true for the common "shared database cache" case)
+ * Should handle free huge pages and dequeue them too, but this needs to
+ * handle huge page accounting correctly.
+ */
+static int me_huge_page(struct page *p, unsigned long pfn)
+{
+       return FAILED;
+}
+
+/*
+ * Various page states we can handle.
+ *
+ * A page state is defined by its current page->flags bits.
+ * The table matches them in order and calls the right handler.
+ *
+ * This is quite tricky because we can access page at any time
+ * in its live cycle, so all accesses have to be extremly careful.
+ *
+ * This is not complete. More states could be added.
+ * For any missing state don't attempt recovery.
+ */
+
+#define dirty          (1UL << PG_dirty)
+#define sc             (1UL << PG_swapcache)
+#define unevict                (1UL << PG_unevictable)
+#define mlock          (1UL << PG_mlocked)
+#define writeback      (1UL << PG_writeback)
+#define lru            (1UL << PG_lru)
+#define swapbacked     (1UL << PG_swapbacked)
+#define head           (1UL << PG_head)
+#define tail           (1UL << PG_tail)
+#define compound       (1UL << PG_compound)
+#define slab           (1UL << PG_slab)
+#define buddy          (1UL << PG_buddy)
+#define reserved       (1UL << PG_reserved)
+
+static struct page_state {
+       unsigned long mask;
+       unsigned long res;
+       char *msg;
+       int (*action)(struct page *p, unsigned long pfn);
+} error_states[] = {
+       { reserved,     reserved,       "reserved kernel",      me_ignore },
+       { buddy,        buddy,          "free kernel",  me_free },
+
+       /*
+        * Could in theory check if slab page is free or if we can drop
+        * currently unused objects without touching them. But just
+        * treat it as standard kernel for now.
+        */
+       { slab,         slab,           "kernel slab",  me_kernel },
+
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+       { head,         head,           "huge",         me_huge_page },
+       { tail,         tail,           "huge",         me_huge_page },
+#else
+       { compound,     compound,       "huge",         me_huge_page },
+#endif
+
+       { sc|dirty,     sc|dirty,       "swapcache",    me_swapcache_dirty },
+       { sc|dirty,     sc,             "swapcache",    me_swapcache_clean },
+
+       { unevict|dirty, unevict|dirty, "unevictable LRU", me_pagecache_dirty},
+       { unevict,      unevict,        "unevictable LRU", me_pagecache_clean},
+
+#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
+       { mlock|dirty,  mlock|dirty,    "mlocked LRU",  me_pagecache_dirty },
+       { mlock,        mlock,          "mlocked LRU",  me_pagecache_clean },
+#endif
+
+       { lru|dirty,    lru|dirty,      "LRU",          me_pagecache_dirty },
+       { lru|dirty,    lru,            "clean LRU",    me_pagecache_clean },
+       { swapbacked,   swapbacked,     "anonymous",    me_pagecache_clean },
+
+       /*
+        * Catchall entry: must be at end.
+        */
+       { 0,            0,              "unknown page state",   me_unknown },
+};
+
+#undef lru
+
+static void action_result(unsigned long pfn, char *msg, int result)
+{
+       struct page *page = NULL;
+       if (pfn_valid(pfn))
+               page = pfn_to_page(pfn);
+
+       printk(KERN_ERR "MCE %#lx: %s%s page recovery: %s\n",
+               pfn,
+               page && PageDirty(page) ? "dirty " : "",
+               msg, action_name[result]);
+}
+
+static int page_action(struct page_state *ps, struct page *p,
+                       unsigned long pfn, int ref)
+{
+       int result;
+
+       result = ps->action(p, pfn);
+       action_result(pfn, ps->msg, result);
+       if (page_count(p) != 1 + ref)
+               printk(KERN_ERR
+                      "MCE %#lx: %s page still referenced by %d users\n",
+                      pfn, ps->msg, page_count(p) - 1);
+
+       /* Could do more checks here if page looks ok */
+       /*
+        * Could adjust zone counters here to correct for the missing page.
+        */
+
+       return result == RECOVERED ? 0 : -EBUSY;
+}
+
+#define N_UNMAP_TRIES 5
+
+/*
+ * Do all that is necessary to remove user space mappings. Unmap
+ * the pages and send SIGBUS to the processes if the data was dirty.
+ */
+static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
+                                 int trapno)
+{
+       enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+       struct address_space *mapping;
+       LIST_HEAD(tokill);
+       int ret;
+       int i;
+       int kill = 1;
+
+       if (PageReserved(p) || PageCompound(p) || PageSlab(p))
+               return;
+
+       if (!PageLRU(p))
+               lru_add_drain_all();
+
+       /*
+        * This check implies we don't kill processes if their pages
+        * are in the swap cache early. Those are always late kills.
+        */
+       if (!page_mapped(p))
+               return;
+
+       if (PageSwapCache(p)) {
+               printk(KERN_ERR
+                      "MCE %#lx: keeping poisoned page in swap cache\n", pfn);
+               ttu |= TTU_IGNORE_HWPOISON;
+       }
+
+       /*
+        * Propagate the dirty bit from PTEs to struct page first, because we
+        * need this to decide if we should kill or just drop the page.
+        */
+       mapping = page_mapping(p);
+       if (!PageDirty(p) && mapping && mapping_cap_writeback_dirty(mapping)) {
+               if (page_mkclean(p)) {
+                       SetPageDirty(p);
+               } else {
+                       kill = 0;
+                       ttu |= TTU_IGNORE_HWPOISON;
+                       printk(KERN_INFO
+       "MCE %#lx: corrupted page was clean: dropped without side effects\n",
+                               pfn);
+               }
+       }
+
+       /*
+        * First collect all the processes that have the page
+        * mapped in dirty form.  This has to be done before try_to_unmap,
+        * because ttu takes the rmap data structures down.
+        *
+        * Error handling: We ignore errors here because
+        * there's nothing that can be done.
+        */
+       if (kill)
+               collect_procs(p, &tokill);
+
+       /*
+        * try_to_unmap can fail temporarily due to races.
+        * Try a few times (RED-PEN better strategy?)
+        */
+       for (i = 0; i < N_UNMAP_TRIES; i++) {
+               ret = try_to_unmap(p, ttu);
+               if (ret == SWAP_SUCCESS)
+                       break;
+               pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn,  ret);
+       }
+
+       if (ret != SWAP_SUCCESS)
+               printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
+                               pfn, page_mapcount(p));
+
+       /*
+        * Now that the dirty bit has been propagated to the
+        * struct page and all unmaps done we can decide if
+        * killing is needed or not.  Only kill when the page
+        * was dirty, otherwise the tokill list is merely
+        * freed.  When there was a problem unmapping earlier
+        * use a more force-full uncatchable kill to prevent
+        * any accesses to the poisoned memory.
+        */
+       kill_procs_ao(&tokill, !!PageDirty(p), trapno,
+                     ret != SWAP_SUCCESS, pfn);
+}
+
+int __memory_failure(unsigned long pfn, int trapno, int ref)
+{
+       struct page_state *ps;
+       struct page *p;
+       int res;
+
+       if (!sysctl_memory_failure_recovery)
+               panic("Memory failure from trap %d on page %lx", trapno, pfn);
+
+       if (!pfn_valid(pfn)) {
+               action_result(pfn, "memory outside kernel control", IGNORED);
+               return -EIO;
+       }
+
+       p = pfn_to_page(pfn);
+       if (TestSetPageHWPoison(p)) {
+               action_result(pfn, "already hardware poisoned", IGNORED);
+               return 0;
+       }
+
+       atomic_long_add(1, &mce_bad_pages);
+
+       /*
+        * We need/can do nothing about count=0 pages.
+        * 1) it's a free page, and therefore in safe hand:
+        *    prep_new_page() will be the gate keeper.
+        * 2) it's part of a non-compound high order page.
+        *    Implies some kernel user: cannot stop them from
+        *    R/W the page; let's pray that the page has been
+        *    used and will be freed some time later.
+        * In fact it's dangerous to directly bump up page count from 0,
+        * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
+        */
+       if (!get_page_unless_zero(compound_head(p))) {
+               action_result(pfn, "free or high order kernel", IGNORED);
+               return PageBuddy(compound_head(p)) ? 0 : -EBUSY;
+       }
+
+       /*
+        * Lock the page and wait for writeback to finish.
+        * It's very difficult to mess with pages currently under IO
+        * and in many cases impossible, so we just avoid it here.
+        */
+       lock_page_nosync(p);
+       wait_on_page_writeback(p);
+
+       /*
+        * Now take care of user space mappings.
+        */
+       hwpoison_user_mappings(p, pfn, trapno);
+
+       /*
+        * Torn down by someone else?
+        */
+       if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
+               action_result(pfn, "already truncated LRU", IGNORED);
+               res = 0;
+               goto out;
+       }
+
+       res = -EBUSY;
+       for (ps = error_states;; ps++) {
+               if ((p->flags & ps->mask) == ps->res) {
+                       res = page_action(ps, p, pfn, ref);
+                       break;
+               }
+       }
+out:
+       unlock_page(p);
+       return res;
+}
+EXPORT_SYMBOL_GPL(__memory_failure);
+
+/**
+ * memory_failure - Handle memory failure of a page.
+ * @pfn: Page Number of the corrupted page
+ * @trapno: Trap number reported in the signal to user space.
+ *
+ * This function is called by the low level machine check code
+ * of an architecture when it detects hardware memory corruption
+ * of a page. It tries its best to recover, which includes
+ * dropping pages, killing processes etc.
+ *
+ * The function is primarily of use for corruptions that
+ * happen outside the current execution context (e.g. when
+ * detected by a background scrubber)
+ *
+ * Must run in process context (e.g. a work queue) with interrupts
+ * enabled and no spinlocks hold.
+ */
+void memory_failure(unsigned long pfn, int trapno)
+{
+       __memory_failure(pfn, trapno, 0);
+}
diff --git a/mm/memory.c b/mm/memory.c

index b1443ac07c00a4f1a46de6bb260d00e8f52f99db..7e91b5f9f690e4ae9d7c3e58bc1530c995311089 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
                 unsigned long addr = vma->vm_start;
  
                 /*
-                * Hide vma from rmap and vmtruncate before freeing pgtables
+                * Hide vma from rmap and truncate_pagecache before freeing
+                * pgtables
                  */
                 anon_vma_unlink(vma);
                 unlink_file_vma(vma);
@@ -1325,7 +1326,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                 if (ret & VM_FAULT_ERROR) {
                                         if (ret & VM_FAULT_OOM)
                                                 return i ? i : -ENOMEM;
-                                       else if (ret & VM_FAULT_SIGBUS)
+                                       if (ret &
+                                           (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
                                                 return i ? i : -EFAULT;
                                         BUG();
                                 }
@@ -2407,7 +2409,7 @@ restart:
   * @mapping: the address space containing mmaps to be unmapped.
   * @holebegin: byte in first page to unmap, relative to the start of
   * the underlying file.  This will be rounded down to a PAGE_SIZE
- * boundary.  Note that this is different from vmtruncate(), which
+ * boundary.  Note that this is different from truncate_pagecache(), which
   * must keep the partial page.  In contrast, we must get rid of
   * partial pages.
   * @holelen: size of prospective hole in bytes.  This will be rounded
@@ -2458,63 +2460,6 @@ void unmap_mapping_range(struct address_space *mapping,
  }
  EXPORT_SYMBOL(unmap_mapping_range);
  
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @offset: file offset to start truncating
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode * inode, loff_t offset)
-{
-       if (inode->i_size < offset) {
-               unsigned long limit;
-
-               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-               if (limit != RLIM_INFINITY && offset > limit)
-                       goto out_sig;
-               if (offset > inode->i_sb->s_maxbytes)
-                       goto out_big;
-               i_size_write(inode, offset);
-       } else {
-               struct address_space *mapping = inode->i_mapping;
-
-               /*
-                * truncation of in-use swapfiles is disallowed - it would
-                * cause subsequent swapout to scribble on the now-freed
-                * blocks.
-                */
-               if (IS_SWAPFILE(inode))
-                       return -ETXTBSY;
-               i_size_write(inode, offset);
-
-               /*
-                * unmap_mapping_range is called twice, first simply for
-                * efficiency so that truncate_inode_pages does fewer
-                * single-page unmaps.  However after this first call, and
-                * before truncate_inode_pages finishes, it is possible for
-                * private pages to be COWed, which remain after
-                * truncate_inode_pages finishes, hence the second
-                * unmap_mapping_range call must be made for correctness.
-                */
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-               truncate_inode_pages(mapping, offset);
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       }
-
-       if (inode->i_op->truncate)
-               inode->i_op->truncate(inode);
-       return 0;
-
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out_big:
-       return -EFBIG;
-}
-EXPORT_SYMBOL(vmtruncate);
-
  int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
  {
         struct address_space *mapping = inode->i_mapping;
@@ -2559,8 +2504,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 goto out;
  
         entry = pte_to_swp_entry(orig_pte);
-       if (is_migration_entry(entry)) {
-               migration_entry_wait(mm, pmd, address);
+       if (unlikely(non_swap_entry(entry))) {
+               if (is_migration_entry(entry)) {
+                       migration_entry_wait(mm, pmd, address);
+               } else if (is_hwpoison_entry(entry)) {
+                       ret = VM_FAULT_HWPOISON;
+               } else {
+                       print_bad_pte(vma, address, orig_pte, NULL);
+                       ret = VM_FAULT_OOM;
+               }
                 goto out;
         }
         delayacct_set_flag(DELAYACCT_PF_SWAPIN);
@@ -2584,6 +2536,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 /* Had to read the page from swap area: Major fault */
                 ret = VM_FAULT_MAJOR;
                 count_vm_event(PGMAJFAULT);
+       } else if (PageHWPoison(page)) {
+               ret = VM_FAULT_HWPOISON;
+               delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+               goto out;
         }
  
         lock_page(page);
@@ -2760,6 +2716,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
                 return ret;
  
+       if (unlikely(PageHWPoison(vmf.page))) {
+               if (ret & VM_FAULT_LOCKED)
+                       unlock_page(vmf.page);
+               return VM_FAULT_HWPOISON;
+       }
+
         /*
          * For consistency in subsequent calls, make the faulted page always
          * locked.
diff --git a/mm/migrate.c b/mm/migrate.c

index 16052e80aaacbc182c9ea421bb7f84b02fc3a5b4..1a4bf4813780eb700ee026030bca18fedc2fbae6 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -675,7 +675,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
         }
  
         /* Establish migration ptes or remove ptes */
-       try_to_unmap(page, 1);
+       try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
  
  skip_unmap:
         if (!page_mapped(page))
diff --git a/mm/mremap.c b/mm/mremap.c

index 20a07dba6be04fb20f21bbac5ed1e289a8ccd354..97bff2547719e702150e1cdc4d4a3f6b31a23213 100644 (file)
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
         if (vma->vm_file) {
                 /*
                  * Subtle point from Rajesh Venkatasubramanian: before
-                * moving file-based ptes, we must lock vmtruncate out,
-                * since it might clean the dst vma before the src vma,
+                * moving file-based ptes, we must lock truncate_pagecache
+                * out, since it might clean the dst vma before the src vma,
                  * and we propagate stale pages into the dst afterward.
                  */
                 mapping = vma->vm_file->f_mapping;
diff --git a/mm/nommu.c b/mm/nommu.c

index 8d484241d0345e71750b71b36d37bfa7f21bdd06..56a446f059716ccb95736fa7670dd1f07e6b30b5 100644 (file)
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -82,46 +82,6 @@ DECLARE_RWSEM(nommu_region_sem);
  struct vm_operations_struct generic_file_vm_ops = {
  };
  
-/*
- * Handle all mappings that got truncated by a "truncate()"
- * system call.
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode *inode, loff_t offset)
-{
-       struct address_space *mapping = inode->i_mapping;
-       unsigned long limit;
-
-       if (inode->i_size < offset)
-               goto do_expand;
-       i_size_write(inode, offset);
-
-       truncate_inode_pages(mapping, offset);
-       goto out_truncate;
-
-do_expand:
-       limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-       if (limit != RLIM_INFINITY && offset > limit)
-               goto out_sig;
-       if (offset > inode->i_sb->s_maxbytes)
-               goto out;
-       i_size_write(inode, offset);
-
-out_truncate:
-       if (inode->i_op->truncate)
-               inode->i_op->truncate(inode);
-       return 0;
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out:
-       return -EFBIG;
-}
-
-EXPORT_SYMBOL(vmtruncate);
-
  /*
   * Return the total memory allocated for this pointer, not
   * just what the caller asked for.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c

index 5f378dd588027c227dc95551e27e4b6431e5df60..d99664e8607e761235a13b2662353df131ac5b41 100644 (file)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -155,37 +155,37 @@ static void update_completion_period(void)
  }
  
  int dirty_background_ratio_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                 loff_t *ppos)
  {
         int ret;
  
-       ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
         if (ret == 0 && write)
                 dirty_background_bytes = 0;
         return ret;
  }
  
  int dirty_background_bytes_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                 loff_t *ppos)
  {
         int ret;
  
-       ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+       ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
         if (ret == 0 && write)
                 dirty_background_ratio = 0;
         return ret;
  }
  
  int dirty_ratio_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                 loff_t *ppos)
  {
         int old_ratio = vm_dirty_ratio;
         int ret;
  
-       ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
         if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
                 update_completion_period();
                 vm_dirty_bytes = 0;
@@ -195,13 +195,13 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
  
  
  int dirty_bytes_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                 loff_t *ppos)
  {
         unsigned long old_bytes = vm_dirty_bytes;
         int ret;
  
-       ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+       ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
         if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
                 update_completion_period();
                 vm_dirty_ratio = 0;
@@ -686,9 +686,9 @@ static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
   * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
   */
  int dirty_writeback_centisecs_handler(ctl_table *table, int write,
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
  {
-       proc_dointvec(table, write, file, buffer, length, ppos);
+       proc_dointvec(table, write, buffer, length, ppos);
         return 0;
  }
  
@@ -1149,6 +1149,13 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
  EXPORT_SYMBOL(redirty_page_for_writepage);
  
  /*
+ * Dirty a page.
+ *
+ * For pages with a mapping this should be done under the page lock
+ * for the benefit of asynchronous memory errors who prefer a consistent
+ * dirty state. This rule can be broken in some special cases,
+ * but should be better not to.
+ *
   * If the mapping doesn't provide a set_page_dirty a_op, then
   * just fall through and assume that it wants buffer_heads.
   */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 5717f27a0704b18637221c0bd9dcd820c7bc17c3..bf720550b44d85adc294f7fd0b8ede38f73a8902 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -234,6 +234,12 @@ static void bad_page(struct page *page)
         static unsigned long nr_shown;
         static unsigned long nr_unshown;
  
+       /* Don't complain about poisoned pages */
+       if (PageHWPoison(page)) {
+               __ClearPageBuddy(page);
+               return;
+       }
+
         /*
          * Allow a burst of 60 reports, then keep quiet for that minute;
          * or allow a steady drip of one report per second.
@@ -666,7 +672,7 @@ static inline void expand(struct zone *zone, struct page *page,
  /*
   * This page is about to be returned from the page allocator
   */
-static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+static inline int check_new_page(struct page *page)
  {
         if (unlikely(page_mapcount(page) |
                 (page->mapping != NULL)  |
@@ -675,6 +681,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
                 bad_page(page);
                 return 1;
         }
+       return 0;
+}
+
+static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+{
+       int i;
+
+       for (i = 0; i < (1 << order); i++) {
+               struct page *p = page + i;
+               if (unlikely(check_new_page(p)))
+                       return 1;
+       }
  
         set_page_private(page, 0);
         set_page_refcounted(page);
@@ -2373,7 +2391,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order);
   * sysctl handler for numa_zonelist_order
   */
  int numa_zonelist_order_handler(ctl_table *table, int write,
-               struct file *file, void __user *buffer, size_t *length,
+               void __user *buffer, size_t *length,
                 loff_t *ppos)
  {
         char saved_string[NUMA_ZONELIST_ORDER_LEN];
@@ -2382,7 +2400,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
         if (write)
                 strncpy(saved_string, (char*)table->data,
                         NUMA_ZONELIST_ORDER_LEN);
-       ret = proc_dostring(table, write, file, buffer, length, ppos);
+       ret = proc_dostring(table, write, buffer, length, ppos);
         if (ret)
                 return ret;
         if (write) {
@@ -4706,9 +4724,9 @@ module_init(init_per_zone_wmark_min)
   *     changes.
   */
  int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
  {
-       proc_dointvec(table, write, file, buffer, length, ppos);
+       proc_dointvec(table, write, buffer, length, ppos);
         if (write)
                 setup_per_zone_wmarks();
         return 0;
@@ -4716,12 +4734,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
  
  #ifdef CONFIG_NUMA
  int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
  {
         struct zone *zone;
         int rc;
  
-       rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
         if (rc)
                 return rc;
  
@@ -4732,12 +4750,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
  }
  
  int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
  {
         struct zone *zone;
         int rc;
  
-       rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
         if (rc)
                 return rc;
  
@@ -4758,9 +4776,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
   * if in function of the boot time zone sizes.
   */
  int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
  {
-       proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       proc_dointvec_minmax(table, write, buffer, length, ppos);
         setup_per_zone_lowmem_reserve();
         return 0;
  }
@@ -4772,13 +4790,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
   */
  
  int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
  {
         struct zone *zone;
         unsigned int cpu;
         int ret;
  
-       ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
         if (!write || (ret == -EINVAL))
                 return ret;
         for_each_populated_zone(zone) {
diff --git a/mm/quicklist.c b/mm/quicklist.c

index 6eedf7e473d1e89f09fcc5814e342bee01540ce4..6633965bb27bcb609be4578dcb8bcd30cd3e39c8 100644 (file)
--- a/mm/quicklist.c
+++ b/mm/quicklist.c
@@ -29,7 +29,6 @@ static unsigned long max_pages(unsigned long min_pages)
         int node = numa_node_id();
         struct zone *zones = NODE_DATA(node)->node_zones;
         int num_cpus_on_node;
-       const struct cpumask *cpumask_on_node = cpumask_of_node(node);
  
         node_free_pages =
  #ifdef CONFIG_ZONE_DMA
@@ -42,7 +41,7 @@ static unsigned long max_pages(unsigned long min_pages)
  
         max = node_free_pages / FRACTION_OF_NODE_MEM;
  
-       num_cpus_on_node = cpus_weight_nr(*cpumask_on_node);
+       num_cpus_on_node = cpumask_weight(cpumask_of_node(node));
         max /= num_cpus_on_node;
  
         return max(max, min_pages);
diff --git a/mm/rmap.c b/mm/rmap.c

index 720fc03a7bc454de75fa86f542770ab9b9660788..28aafe2b530668b03c766619a83873ee2a91087e 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -36,6 +36,11 @@
   *                 mapping->tree_lock (widely used, in set_page_dirty,
   *                           in arch-dependent flush_dcache_mmap_lock,
   *                           within inode_lock in __sync_single_inode)
+ *
+ * (code doesn't rely on that order so it could be switched around)
+ * ->tasklist_lock
+ *   anon_vma->lock      (memory_failure, collect_procs_anon)
+ *     pte map lock
   */
  
  #include <linux/mm.h>
@@ -191,7 +196,7 @@ void __init anon_vma_init(void)
   * Getting a lock on a stable anon_vma from a page off the LRU is
   * tricky: page_lock_anon_vma rely on RCU to guard against the races.
   */
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
  {
         struct anon_vma *anon_vma;
         unsigned long anon_mapping;
@@ -211,7 +216,7 @@ out:
         return NULL;
  }
  
-static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma(struct anon_vma *anon_vma)
  {
         spin_unlock(&anon_vma->lock);
         rcu_read_unlock();
@@ -311,7 +316,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
   * if the page is not mapped into the page tables of this VMA.  Only
   * valid for normal file or anonymous VMAs.
   */
-static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
  {
         unsigned long address;
         pte_t *pte;
@@ -756,7 +761,7 @@ void page_remove_rmap(struct page *page)
   * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
   */
  static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
-                               int migration)
+                               enum ttu_flags flags)
  {
         struct mm_struct *mm = vma->vm_mm;
         unsigned long address;
@@ -778,11 +783,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
          * If it's recently referenced (perhaps page_referenced
          * skipped over this mm) then we should reactivate it.
          */
-       if (!migration) {
+       if (!(flags & TTU_IGNORE_MLOCK)) {
                 if (vma->vm_flags & VM_LOCKED) {
                         ret = SWAP_MLOCK;
                         goto out_unmap;
                 }
+       }
+       if (!(flags & TTU_IGNORE_ACCESS)) {
                 if (ptep_clear_flush_young_notify(vma, address, pte)) {
                         ret = SWAP_FAIL;
                         goto out_unmap;
@@ -800,7 +807,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         /* Update high watermark before we lower rss */
         update_hiwater_rss(mm);
  
-       if (PageAnon(page)) {
+       if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
+               if (PageAnon(page))
+                       dec_mm_counter(mm, anon_rss);
+               else
+                       dec_mm_counter(mm, file_rss);
+               set_pte_at(mm, address, pte,
+                               swp_entry_to_pte(make_hwpoison_entry(page)));
+       } else if (PageAnon(page)) {
                 swp_entry_t entry = { .val = page_private(page) };
  
                 if (PageSwapCache(page)) {
@@ -822,12 +836,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                          * pte. do_swap_page() will wait until the migration
                          * pte is removed and then restart fault handling.
                          */
-                       BUG_ON(!migration);
+                       BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
                         entry = make_migration_entry(page, pte_write(pteval));
                 }
                 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
                 BUG_ON(pte_file(*pte));
-       } else if (PAGE_MIGRATION && migration) {
+       } else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
                 /* Establish migration entry for a file page */
                 swp_entry_t entry;
                 entry = make_migration_entry(page, pte_write(pteval));
@@ -996,12 +1010,13 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
   * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
   * 'LOCKED.
   */
-static int try_to_unmap_anon(struct page *page, int unlock, int migration)
+static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
  {
         struct anon_vma *anon_vma;
         struct vm_area_struct *vma;
         unsigned int mlocked = 0;
         int ret = SWAP_AGAIN;
+       int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
  
         if (MLOCK_PAGES && unlikely(unlock))
                 ret = SWAP_SUCCESS;     /* default for try_to_munlock() */
@@ -1017,7 +1032,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
                                 continue;  /* must visit all unlocked vmas */
                         ret = SWAP_MLOCK;  /* saw at least one mlocked vma */
                 } else {
-                       ret = try_to_unmap_one(page, vma, migration);
+                       ret = try_to_unmap_one(page, vma, flags);
                         if (ret == SWAP_FAIL || !page_mapped(page))
                                 break;
                 }
@@ -1041,8 +1056,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
  /**
   * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
   * @page: the page to unmap/unlock
- * @unlock:  request for unlock rather than unmap [unlikely]
- * @migration:  unmapping for migration - ignored if @unlock
+ * @flags: action and flags
   *
   * Find all the mappings of a page using the mapping pointer and the vma chains
   * contained in the address_space struct it points to.
@@ -1054,7 +1068,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
   * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
   * 'LOCKED.
   */
-static int try_to_unmap_file(struct page *page, int unlock, int migration)
+static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
  {
         struct address_space *mapping = page->mapping;
         pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -1066,6 +1080,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
         unsigned long max_nl_size = 0;
         unsigned int mapcount;
         unsigned int mlocked = 0;
+       int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
  
         if (MLOCK_PAGES && unlikely(unlock))
                 ret = SWAP_SUCCESS;     /* default for try_to_munlock() */
@@ -1078,7 +1093,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
                                 continue;       /* must visit all vmas */
                         ret = SWAP_MLOCK;
                 } else {
-                       ret = try_to_unmap_one(page, vma, migration);
+                       ret = try_to_unmap_one(page, vma, flags);
                         if (ret == SWAP_FAIL || !page_mapped(page))
                                 goto out;
                 }
@@ -1103,7 +1118,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
                         ret = SWAP_MLOCK;       /* leave mlocked == 0 */
                         goto out;               /* no need to look further */
                 }
-               if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED))
+               if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
+                       (vma->vm_flags & VM_LOCKED))
                         continue;
                 cursor = (unsigned long) vma->vm_private_data;
                 if (cursor > max_nl_cursor)
@@ -1137,7 +1153,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
         do {
                 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
                                                 shared.vm_set.list) {
-                       if (!MLOCK_PAGES && !migration &&
+                       if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
                             (vma->vm_flags & VM_LOCKED))
                                 continue;
                         cursor = (unsigned long) vma->vm_private_data;
@@ -1177,7 +1193,7 @@ out:
  /**
   * try_to_unmap - try to remove all page table mappings to a page
   * @page: the page to get unmapped
- * @migration: migration flag
+ * @flags: action and flags
   *
   * Tries to remove all the page table entries which are mapping this
   * page, used in the pageout path.  Caller must hold the page lock.
@@ -1188,16 +1204,16 @@ out:
   * SWAP_FAIL   - the page is unswappable
   * SWAP_MLOCK  - page is mlocked.
   */
-int try_to_unmap(struct page *page, int migration)
+int try_to_unmap(struct page *page, enum ttu_flags flags)
  {
         int ret;
  
         BUG_ON(!PageLocked(page));
  
         if (PageAnon(page))
-               ret = try_to_unmap_anon(page, 0, migration);
+               ret = try_to_unmap_anon(page, flags);
         else
-               ret = try_to_unmap_file(page, 0, migration);
+               ret = try_to_unmap_file(page, flags);
         if (ret != SWAP_MLOCK && !page_mapped(page))
                 ret = SWAP_SUCCESS;
         return ret;
@@ -1222,8 +1238,8 @@ int try_to_munlock(struct page *page)
         VM_BUG_ON(!PageLocked(page) || PageLRU(page));
  
         if (PageAnon(page))
-               return try_to_unmap_anon(page, 1, 0);
+               return try_to_unmap_anon(page, TTU_MUNLOCK);
         else
-               return try_to_unmap_file(page, 1, 0);
+               return try_to_unmap_file(page, TTU_MUNLOCK);
  }
  
diff --git a/mm/shmem.c b/mm/shmem.c

index b206a7a32e2a4e00bc7446ae839407f656031643..98631c26c20001931a6e4ca13032716992d6808c 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1633,8 +1633,8 @@ shmem_write_end(struct file *file, struct address_space *mapping,
         if (pos + copied > inode->i_size)
                 i_size_write(inode, pos + copied);
  
-       unlock_page(page);
         set_page_dirty(page);
+       unlock_page(page);
         page_cache_release(page);
  
         return copied;
@@ -1971,13 +1971,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
                         iput(inode);
                         return error;
                 }
-               unlock_page(page);
                 inode->i_mapping->a_ops = &shmem_aops;
                 inode->i_op = &shmem_symlink_inode_operations;
                 kaddr = kmap_atomic(page, KM_USER0);
                 memcpy(kaddr, symname, len);
                 kunmap_atomic(kaddr, KM_USER0);
                 set_page_dirty(page);
+               unlock_page(page);
                 page_cache_release(page);
         }
         if (dir->i_mode & S_ISGID)
@@ -2420,6 +2420,7 @@ static const struct address_space_operations shmem_aops = {
         .write_end      = shmem_write_end,
  #endif
         .migratepage    = migrate_page,
+       .error_remove_page = generic_error_remove_page,
  };
  
  static const struct file_operations shmem_file_operations = {
diff --git a/mm/swapfile.c b/mm/swapfile.c

index f1bf19daadc67143b099518c1bc29aa52ae04227..4de7f02f820b03bfcf36b5fc8d6827b5eecd38cb 100644 (file)
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -699,7 +699,7 @@ int free_swap_and_cache(swp_entry_t entry)
         struct swap_info_struct *p;
         struct page *page = NULL;
  
-       if (is_migration_entry(entry))
+       if (non_swap_entry(entry))
                 return 1;
  
         p = swap_info_get(entry);
@@ -2085,7 +2085,7 @@ static int __swap_duplicate(swp_entry_t entry, bool cache)
         int count;
         bool has_cache;
  
-       if (is_migration_entry(entry))
+       if (non_swap_entry(entry))
                 return -EINVAL;
  
         type = swp_type(entry);
diff --git a/mm/truncate.c b/mm/truncate.c

index ccc3ecf7cb9839a90eddc0086be770796e5b8884..450cebdabfc0470b2bcb0bc3f2de941feb6453c2 100644 (file)
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -93,11 +93,11 @@ EXPORT_SYMBOL(cancel_dirty_page);
   * its lock, b) when a concurrent invalidate_mapping_pages got there first and
   * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
   */
-static void
+static int
  truncate_complete_page(struct address_space *mapping, struct page *page)
  {
         if (page->mapping != mapping)
-               return;
+               return -EIO;
  
         if (page_has_private(page))
                 do_invalidatepage(page, 0);
@@ -108,6 +108,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
         remove_from_page_cache(page);
         ClearPageMappedToDisk(page);
         page_cache_release(page);       /* pagecache ref */
+       return 0;
  }
  
  /*
@@ -135,6 +136,51 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
         return ret;
  }
  
+int truncate_inode_page(struct address_space *mapping, struct page *page)
+{
+       if (page_mapped(page)) {
+               unmap_mapping_range(mapping,
+                                  (loff_t)page->index << PAGE_CACHE_SHIFT,
+                                  PAGE_CACHE_SIZE, 0);
+       }
+       return truncate_complete_page(mapping, page);
+}
+
+/*
+ * Used to get rid of pages on hardware memory corruption.
+ */
+int generic_error_remove_page(struct address_space *mapping, struct page *page)
+{
+       if (!mapping)
+               return -EINVAL;
+       /*
+        * Only punch for normal data pages for now.
+        * Handling other types like directories would need more auditing.
+        */
+       if (!S_ISREG(mapping->host->i_mode))
+               return -EIO;
+       return truncate_inode_page(mapping, page);
+}
+EXPORT_SYMBOL(generic_error_remove_page);
+
+/*
+ * Safely invalidate one page from its pagecache mapping.
+ * It only drops clean, unused pages. The page must be locked.
+ *
+ * Returns 1 if the page is successfully invalidated, otherwise 0.
+ */
+int invalidate_inode_page(struct page *page)
+{
+       struct address_space *mapping = page_mapping(page);
+       if (!mapping)
+               return 0;
+       if (PageDirty(page) || PageWriteback(page))
+               return 0;
+       if (page_mapped(page))
+               return 0;
+       return invalidate_complete_page(mapping, page);
+}
+
  /**
   * truncate_inode_pages - truncate range of pages specified by start & end byte offsets
   * @mapping: mapping to truncate
@@ -196,12 +242,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
                                 unlock_page(page);
                                 continue;
                         }
-                       if (page_mapped(page)) {
-                               unmap_mapping_range(mapping,
-                                 (loff_t)page_index<<PAGE_CACHE_SHIFT,
-                                 PAGE_CACHE_SIZE, 0);
-                       }
-                       truncate_complete_page(mapping, page);
+                       truncate_inode_page(mapping, page);
                         unlock_page(page);
                 }
                 pagevec_release(&pvec);
@@ -238,15 +279,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
                                 break;
                         lock_page(page);
                         wait_on_page_writeback(page);
-                       if (page_mapped(page)) {
-                               unmap_mapping_range(mapping,
-                                 (loff_t)page->index<<PAGE_CACHE_SHIFT,
-                                 PAGE_CACHE_SIZE, 0);
-                       }
+                       truncate_inode_page(mapping, page);
                         if (page->index > next)
                                 next = page->index;
                         next++;
-                       truncate_complete_page(mapping, page);
                         unlock_page(page);
                 }
                 pagevec_release(&pvec);
@@ -311,12 +347,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
                         if (lock_failed)
                                 continue;
  
-                       if (PageDirty(page) || PageWriteback(page))
-                               goto unlock;
-                       if (page_mapped(page))
-                               goto unlock;
-                       ret += invalidate_complete_page(mapping, page);
-unlock:
+                       ret += invalidate_inode_page(page);
+
                         unlock_page(page);
                         if (next > end)
                                 break;
@@ -465,3 +497,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
         return invalidate_inode_pages2_range(mapping, 0, -1);
  }
  EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
+
+/**
+ * truncate_pagecache - unmap and remove pagecache that has been truncated
+ * @inode: inode
+ * @old: old file offset
+ * @new: new file offset
+ *
+ * inode's new i_size must already be written before truncate_pagecache
+ * is called.
+ *
+ * This function should typically be called before the filesystem
+ * releases resources associated with the freed range (eg. deallocates
+ * blocks). This way, pagecache will always stay logically coherent
+ * with on-disk format, and the filesystem would not have to deal with
+ * situations such as writepage being called for a page that has already
+ * had its underlying blocks deallocated.
+ */
+void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
+{
+       if (new < old) {
+               struct address_space *mapping = inode->i_mapping;
+
+               /*
+                * unmap_mapping_range is called twice, first simply for
+                * efficiency so that truncate_inode_pages does fewer
+                * single-page unmaps.  However after this first call, and
+                * before truncate_inode_pages finishes, it is possible for
+                * private pages to be COWed, which remain after
+                * truncate_inode_pages finishes, hence the second
+                * unmap_mapping_range call must be made for correctness.
+                */
+               unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+               truncate_inode_pages(mapping, new);
+               unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+       }
+}
+EXPORT_SYMBOL(truncate_pagecache);
+
+/**
+ * vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
+ *
+ * NOTE! We have to be ready to update the memory sharing
+ * between the file and the memory map for a potential last
+ * incomplete page.  Ugly, but necessary.
+ */
+int vmtruncate(struct inode *inode, loff_t offset)
+{
+       loff_t oldsize;
+       int error;
+
+       error = inode_newsize_ok(inode, offset);
+       if (error)
+               return error;
+       oldsize = inode->i_size;
+       i_size_write(inode, offset);
+       truncate_pagecache(inode, oldsize, offset);
+       if (inode->i_op->truncate)
+               inode->i_op->truncate(inode);
+
+       return error;
+}
+EXPORT_SYMBOL(vmtruncate);
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 613e89f471d92c2710be84e9e0fdcf17b0976d0b..1219ceb8a9b2d992da20bb9a10942e7cef2d98b1 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -663,7 +663,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                  * processes. Try to unmap it here.
                  */
                 if (page_mapped(page) && mapping) {
-                       switch (try_to_unmap(page, 0)) {
+                       switch (try_to_unmap(page, TTU_UNMAP)) {
                         case SWAP_FAIL:
                                 goto activate_locked;
                         case SWAP_AGAIN:
@@ -1836,11 +1836,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
  
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  
+unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+                                               gfp_t gfp_mask, bool noswap,
+                                               unsigned int swappiness,
+                                               struct zone *zone, int nid)
+{
+       struct scan_control sc = {
+               .may_writepage = !laptop_mode,
+               .may_unmap = 1,
+               .may_swap = !noswap,
+               .swap_cluster_max = SWAP_CLUSTER_MAX,
+               .swappiness = swappiness,
+               .order = 0,
+               .mem_cgroup = mem,
+               .isolate_pages = mem_cgroup_isolate_pages,
+       };
+       nodemask_t nm  = nodemask_of_node(nid);
+
+       sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+                       (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
+       sc.nodemask = &nm;
+       sc.nr_reclaimed = 0;
+       sc.nr_scanned = 0;
+       /*
+        * NOTE: Although we can get the priority field, using it
+        * here is not a good idea, since it limits the pages we can scan.
+        * if we don't reclaim here, the shrink_zone from balance_pgdat
+        * will pick up pages from other mem cgroup's as well. We hack
+        * the priority and make it zero.
+        */
+       shrink_zone(0, zone, &sc);
+       return sc.nr_reclaimed;
+}
+
  unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                                            gfp_t gfp_mask,
                                            bool noswap,
                                            unsigned int swappiness)
  {
+       struct zonelist *zonelist;
         struct scan_control sc = {
                 .may_writepage = !laptop_mode,
                 .may_unmap = 1,
@@ -1852,7 +1886,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                 .isolate_pages = mem_cgroup_isolate_pages,
                 .nodemask = NULL, /* we don't care the placement */
         };
-       struct zonelist *zonelist;
  
         sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                         (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -1974,6 +2007,7 @@ loop_again:
                 for (i = 0; i <= end_zone; i++) {
                         struct zone *zone = pgdat->node_zones + i;
                         int nr_slab;
+                       int nid, zid;
  
                         if (!populated_zone(zone))
                                 continue;
@@ -1988,6 +2022,15 @@ loop_again:
                         temp_priority[i] = priority;
                         sc.nr_scanned = 0;
                         note_zone_scanning_priority(zone, priority);
+
+                       nid = pgdat->node_id;
+                       zid = zone_idx(zone);
+                       /*
+                        * Call soft limit reclaim before calling shrink_zone.
+                        * For now we ignore the return value
+                        */
+                       mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
+                                                       nid, zid);
                         /*
                          * We put equal pressure on every zone, unless one
                          * zone has way too many pages free already.
@@ -2801,10 +2844,10 @@ static void scan_all_zones_unevictable_pages(void)
  unsigned long scan_unevictable_pages;
  
  int scan_unevictable_handler(struct ctl_table *table, int write,
-                          struct file *file, void __user *buffer,
+                          void __user *buffer,
                            size_t *length, loff_t *ppos)
  {
-       proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+       proc_doulongvec_minmax(table, write, buffer, length, ppos);
  
         if (write && *(unsigned long *)table->data)
                 scan_all_zones_unevictable_pages();
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c

index da0f64f82b5771f65ada62e157b042119ade99b8..d6b1b054e29454fde8e92d5e4c137f2fce52971f 100644 (file)
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1781,8 +1781,8 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
                 ax25_info.idletimer = ax25_display_timer(&ax25->idletimer) / (60 * HZ);
                 ax25_info.n2count   = ax25->n2count;
                 ax25_info.state     = ax25->state;
-               ax25_info.rcv_q     = sk_wmem_alloc_get(sk);
-               ax25_info.snd_q     = sk_rmem_alloc_get(sk);
+               ax25_info.rcv_q     = sk_rmem_alloc_get(sk);
+               ax25_info.snd_q     = sk_wmem_alloc_get(sk);
                 ax25_info.vs        = ax25->vs;
                 ax25_info.vr        = ax25->vr;
                 ax25_info.va        = ax25->va;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c

index 907a82e9023d1d4cb09a82c677afad229657ca34..a16a2342f6bf2ee0b6a30329736c91acaff19bb9 100644 (file)
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -965,12 +965,12 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
  
  #ifdef CONFIG_SYSCTL
  static
-int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp,
+int brnf_sysctl_call_tables(ctl_table * ctl, int write,
                             void __user * buffer, size_t * lenp, loff_t * ppos)
  {
         int ret;
  
-       ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
  
         if (write && *(int *)(ctl->data))
                 *(int *)(ctl->data) = 1;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c

index 0bcecbf0658169fe3aea3b0af9afef237d429e09..4d11c28ca8ca0f1b2dbbedb3a9e569a983087f40 100644 (file)
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -192,11 +192,10 @@
  #define F_QUEUE_MAP_CPU (1<<14)        /* queue map mirrors smp_processor_id() */
  
  /* Thread control flag bits */
-#define T_TERMINATE   (1<<0)
-#define T_STOP        (1<<1)   /* Stop run */
-#define T_RUN         (1<<2)   /* Start run */
-#define T_REMDEVALL   (1<<3)   /* Remove all devs */
-#define T_REMDEV      (1<<4)   /* Remove one dev */
+#define T_STOP        (1<<0)   /* Stop run */
+#define T_RUN         (1<<1)   /* Start run */
+#define T_REMDEVALL   (1<<2)   /* Remove all devs */
+#define T_REMDEV      (1<<3)   /* Remove one dev */
  
  /* If lock -- can be removed after some work */
  #define   if_lock(t)           spin_lock(&(t->if_lock));
@@ -2105,7 +2104,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
  
  static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
  {
-       ktime_t start;
+       ktime_t start_time, end_time;
         s32 remaining;
         struct hrtimer_sleeper t;
  
@@ -2116,7 +2115,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
         if (remaining <= 0)
                 return;
  
-       start = ktime_now();
+       start_time = ktime_now();
         if (remaining < 100)
                 udelay(remaining);      /* really small just spin */
         else {
@@ -2135,7 +2134,10 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
                 } while (t.task && pkt_dev->running && !signal_pending(current));
                 __set_current_state(TASK_RUNNING);
         }
-       pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), start));
+       end_time = ktime_now();
+
+       pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
+       pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay);
  }
  
  static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
@@ -3365,19 +3367,29 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
         mutex_unlock(&pktgen_thread_lock);
  }
  
-static void idle(struct pktgen_dev *pkt_dev)
+static void pktgen_resched(struct pktgen_dev *pkt_dev)
  {
         ktime_t idle_start = ktime_now();
+       schedule();
+       pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
+}
  
-       if (need_resched())
-               schedule();
-       else
-               cpu_relax();
+static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
+{
+       ktime_t idle_start = ktime_now();
  
+       while (atomic_read(&(pkt_dev->skb->users)) != 1) {
+               if (signal_pending(current))
+                       break;
+
+               if (need_resched())
+                       pktgen_resched(pkt_dev);
+               else
+                       cpu_relax();
+       }
         pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
  }
  
-
  static void pktgen_xmit(struct pktgen_dev *pkt_dev)
  {
         struct net_device *odev = pkt_dev->odev;
@@ -3387,36 +3399,21 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
         u16 queue_map;
         int ret;
  
-       if (pkt_dev->delay) {
-               spin(pkt_dev, pkt_dev->next_tx);
-
-               /* This is max DELAY, this has special meaning of
-                * "never transmit"
-                */
-               if (pkt_dev->delay == ULLONG_MAX) {
-                       pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX);
-                       return;
-               }
-       }
-
-       if (!pkt_dev->skb) {
-               set_cur_queue_map(pkt_dev);
-               queue_map = pkt_dev->cur_queue_map;
-       } else {
-               queue_map = skb_get_queue_mapping(pkt_dev->skb);
+       /* If device is offline, then don't send */
+       if (unlikely(!netif_running(odev) || !netif_carrier_ok(odev))) {
+               pktgen_stop_device(pkt_dev);
+               return;
         }
  
-       txq = netdev_get_tx_queue(odev, queue_map);
-       /* Did we saturate the queue already? */
-       if (netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq)) {
-               /* If device is down, then all queues are permnantly frozen */
-               if (netif_running(odev))
-                       idle(pkt_dev);
-               else
-                       pktgen_stop_device(pkt_dev);
+       /* This is max DELAY, this has special meaning of
+        * "never transmit"
+        */
+       if (unlikely(pkt_dev->delay == ULLONG_MAX)) {
+               pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX);
                 return;
         }
  
+       /* If no skb or clone count exhausted then get new one */
         if (!pkt_dev->skb || (pkt_dev->last_ok &&
                               ++pkt_dev->clone_count >= pkt_dev->clone_skb)) {
                 /* build a new pkt */
@@ -3435,54 +3432,45 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
                 pkt_dev->clone_count = 0;       /* reset counter */
         }
  
-       /* fill_packet() might have changed the queue */
+       if (pkt_dev->delay && pkt_dev->last_ok)
+               spin(pkt_dev, pkt_dev->next_tx);
+
         queue_map = skb_get_queue_mapping(pkt_dev->skb);
         txq = netdev_get_tx_queue(odev, queue_map);
  
         __netif_tx_lock_bh(txq);
-       if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq)))
-               pkt_dev->last_ok = 0;
-       else {
-               atomic_inc(&(pkt_dev->skb->users));
+       atomic_inc(&(pkt_dev->skb->users));
  
-       retry_now:
+       if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq)))
+               ret = NETDEV_TX_BUSY;
+       else
                 ret = (*xmit)(pkt_dev->skb, odev);
-               switch (ret) {
-               case NETDEV_TX_OK:
-                       txq_trans_update(txq);
-                       pkt_dev->last_ok = 1;
-                       pkt_dev->sofar++;
-                       pkt_dev->seq_num++;
-                       pkt_dev->tx_bytes += pkt_dev->cur_pkt_size;
-                       break;
-               case NETDEV_TX_LOCKED:
-                       cpu_relax();
-                       goto retry_now;
-               default: /* Drivers are not supposed to return other values! */
-                       if (net_ratelimit())
-                               pr_info("pktgen: %s xmit error: %d\n",
-                                       odev->name, ret);
-                       pkt_dev->errors++;
-                       /* fallthru */
-               case NETDEV_TX_BUSY:
-                       /* Retry it next time */
-                       atomic_dec(&(pkt_dev->skb->users));
-                       pkt_dev->last_ok = 0;
-               }
-
-               if (pkt_dev->delay)
-                       pkt_dev->next_tx = ktime_add_ns(ktime_now(),
-                                                       pkt_dev->delay);
+
+       switch (ret) {
+       case NETDEV_TX_OK:
+               txq_trans_update(txq);
+               pkt_dev->last_ok = 1;
+               pkt_dev->sofar++;
+               pkt_dev->seq_num++;
+               pkt_dev->tx_bytes += pkt_dev->cur_pkt_size;
+               break;
+       default: /* Drivers are not supposed to return other values! */
+               if (net_ratelimit())
+                       pr_info("pktgen: %s xmit error: %d\n",
+                               odev->name, ret);
+               pkt_dev->errors++;
+               /* fallthru */
+       case NETDEV_TX_LOCKED:
+       case NETDEV_TX_BUSY:
+               /* Retry it next time */
+               atomic_dec(&(pkt_dev->skb->users));
+               pkt_dev->last_ok = 0;
         }
         __netif_tx_unlock_bh(txq);
  
         /* If pkt_dev->count is zero, then run forever */
         if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
-               while (atomic_read(&(pkt_dev->skb->users)) != 1) {
-                       if (signal_pending(current))
-                               break;
-                       idle(pkt_dev);
-               }
+               pktgen_wait_for_skb(pkt_dev);
  
                 /* Done with this */
                 pktgen_stop_device(pkt_dev);
@@ -3515,20 +3503,24 @@ static int pktgen_thread_worker(void *arg)
         while (!kthread_should_stop()) {
                 pkt_dev = next_to_run(t);
  
-               if (!pkt_dev &&
-                   (t->control & (T_STOP | T_RUN | T_REMDEVALL | T_REMDEV))
-                   == 0) {
-                       prepare_to_wait(&(t->queue), &wait,
-                                       TASK_INTERRUPTIBLE);
-                       schedule_timeout(HZ / 10);
-                       finish_wait(&(t->queue), &wait);
+               if (unlikely(!pkt_dev && t->control == 0)) {
+                       wait_event_interruptible_timeout(t->queue,
+                                                        t->control != 0,
+                                                        HZ/10);
+                       continue;
                 }
  
                 __set_current_state(TASK_RUNNING);
  
-               if (pkt_dev)
+               if (likely(pkt_dev)) {
                         pktgen_xmit(pkt_dev);
  
+                       if (need_resched())
+                               pktgen_resched(pkt_dev);
+                       else
+                               cpu_relax();
+               }
+
                 if (t->control & T_STOP) {
                         pktgen_stop(t);
                         t->control &= ~(T_STOP);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c

index 1c6a5bb6f0c8da8cba2e31021ad0f1171f0aeecf..6e1f085db06af33a1f069d22816d9b463f413939 100644 (file)
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -164,7 +164,7 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU
  static int min_priority[1];
  static int max_priority[] = { 127 }; /* From DECnet spec */
  
-static int dn_forwarding_proc(ctl_table *, int, struct file *,
+static int dn_forwarding_proc(ctl_table *, int,
                         void __user *, size_t *, loff_t *);
  static int dn_forwarding_sysctl(ctl_table *table,
                         void __user *oldval, size_t __user *oldlenp,
@@ -274,7 +274,6 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
  }
  
  static int dn_forwarding_proc(ctl_table *table, int write,
-                               struct file *filep,
                                 void __user *buffer,
                                 size_t *lenp, loff_t *ppos)
  {
@@ -290,7 +289,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
         dn_db = dev->dn_ptr;
         old = dn_db->parms.forwarding;
  
-       err = proc_dointvec(table, write, filep, buffer, lenp, ppos);
+       err = proc_dointvec(table, write, buffer, lenp, ppos);
  
         if ((err >= 0) && write) {
                 if (dn_db->parms.forwarding < 0)
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c

index 5bcd592ae6dd0f3883d25e4543e0085ca81133ea..26b0ab1e9f560b75d046c687fabf27dfed1b4f48 100644 (file)
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -165,7 +165,6 @@ static int dn_node_address_strategy(ctl_table *table,
  }
  
  static int dn_node_address_handler(ctl_table *table, int write,
-                               struct file *filp,
                                 void __user *buffer,
                                 size_t *lenp, loff_t *ppos)
  {
@@ -276,7 +275,6 @@ static int dn_def_dev_strategy(ctl_table *table,
  
  
  static int dn_def_dev_handler(ctl_table *table, int write,
-                               struct file * filp,
                                 void __user *buffer,
                                 size_t *lenp, loff_t *ppos)
  {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c

index 07336c6201f0dede9e93c75f811b64af7d8cc853..e92f1fd28aa5161bc28969ba25b3d9d0bf95418e 100644 (file)
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1270,10 +1270,10 @@ static void inet_forward_change(struct net *net)
  }
  
  static int devinet_conf_proc(ctl_table *ctl, int write,
-                            struct file *filp, void __user *buffer,
+                            void __user *buffer,
                              size_t *lenp, loff_t *ppos)
  {
-       int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
  
         if (write) {
                 struct ipv4_devconf *cnf = ctl->extra1;
@@ -1342,12 +1342,12 @@ static int devinet_conf_sysctl(ctl_table *table,
  }
  
  static int devinet_sysctl_forward(ctl_table *ctl, int write,
-                                 struct file *filp, void __user *buffer,
+                                 void __user *buffer,
                                   size_t *lenp, loff_t *ppos)
  {
         int *valp = ctl->data;
         int val = *valp;
-       int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
  
         if (write && *valp != val) {
                 struct net *net = ctl->extra2;
@@ -1372,12 +1372,12 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
  }
  
  int ipv4_doint_and_flush(ctl_table *ctl, int write,
-                        struct file *filp, void __user *buffer,
+                        void __user *buffer,
                          size_t *lenp, loff_t *ppos)
  {
         int *valp = ctl->data;
         int val = *valp;
-       int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
         struct net *net = ctl->extra2;
  
         if (write && *valp != val)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c

index d9645c94a0678fd76f64ffbbaf40f63184a7e878..41ada9904d3125c32b063b3dad025cc4898c2c60 100644 (file)
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -66,10 +66,7 @@
     solution, but it supposes maintaing new variable in ALL
     skb, even if no tunneling is used.
  
-   Current solution: t->recursion lock breaks dead loops. It looks
-   like dev->tbusy flag, but I preferred new variable, because
-   the semantics is different. One day, when hard_start_xmit
-   will be multithreaded we will have to use skb->encapsulation.
+   Current solution: HARD_TX_LOCK lock breaks dead loops.
  
  
  
@@ -678,11 +675,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
         __be32 dst;
         int    mtu;
  
-       if (tunnel->recursion++) {
-               stats->collisions++;
-               goto tx_error;
-       }
-
         if (dev->type == ARPHRD_ETHER)
                 IPCB(skb)->flags = 0;
  
@@ -820,7 +812,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
                         ip_rt_put(rt);
                         stats->tx_dropped++;
                         dev_kfree_skb(skb);
-                       tunnel->recursion--;
                         return NETDEV_TX_OK;
                 }
                 if (skb->sk)
@@ -888,7 +879,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
         nf_reset(skb);
  
         IPTUNNEL_XMIT();
-       tunnel->recursion--;
         return NETDEV_TX_OK;
  
  tx_error_icmp:
@@ -897,7 +887,6 @@ tx_error_icmp:
  tx_error:
         stats->tx_errors++;
         dev_kfree_skb(skb);
-       tunnel->recursion--;
         return NETDEV_TX_OK;
  }
  
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c

index fc7993e9061fd855d1cb34b42fa491c49dcd669d..5a0693576e821c628edfb3f436243a0f8b107d4a 100644 (file)
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -611,6 +611,9 @@ static int do_ip_setsockopt(struct sock *sk, int level,
                  *      Check the arguments are allowable
                  */
  
+               if (optlen < sizeof(struct in_addr))
+                       goto e_inval;
+
                 err = -EFAULT;
                 if (optlen >= sizeof(struct ip_mreqn)) {
                         if (copy_from_user(&mreq, optval, sizeof(mreq)))
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c

index 62548cb0923c1b53f3149b8ed1a34ef8b3fdc8ec..08ccd344de7a9a88d9f5236bb319bab86819e544 100644 (file)
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -402,11 +402,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
         __be32 dst = tiph->daddr;
         int    mtu;
  
-       if (tunnel->recursion++) {
-               stats->collisions++;
-               goto tx_error;
-       }
-
         if (skb->protocol != htons(ETH_P_IP))
                 goto tx_error;
  
@@ -485,7 +480,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
                         ip_rt_put(rt);
                         stats->tx_dropped++;
                         dev_kfree_skb(skb);
-                       tunnel->recursion--;
                         return NETDEV_TX_OK;
                 }
                 if (skb->sk)
@@ -523,7 +517,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
         nf_reset(skb);
  
         IPTUNNEL_XMIT();
-       tunnel->recursion--;
         return NETDEV_TX_OK;
  
  tx_error_icmp:
@@ -531,7 +524,6 @@ tx_error_icmp:
  tx_error:
         stats->tx_errors++;
         dev_kfree_skb(skb);
-       tunnel->recursion--;
         return NETDEV_TX_OK;
  }
  
diff --git a/net/ipv4/route.c b/net/ipv4/route.c

index df9347314538bd353431afde7a4ca49ee48b5e2a..bb41992520268b08c49f345e2170aeb644c56b7d 100644 (file)
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3036,7 +3036,7 @@ void ip_rt_multicast_event(struct in_device *in_dev)
  
  #ifdef CONFIG_SYSCTL
  static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
-                                       struct file *filp, void __user *buffer,
+                                       void __user *buffer,
                                         size_t *lenp, loff_t *ppos)
  {
         if (write) {
@@ -3046,7 +3046,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
  
                 memcpy(&ctl, __ctl, sizeof(ctl));
                 ctl.data = &flush_delay;
-               proc_dointvec(&ctl, write, filp, buffer, lenp, ppos);
+               proc_dointvec(&ctl, write, buffer, lenp, ppos);
  
                 net = (struct net *)__ctl->extra1;
                 rt_cache_flush(net, flush_delay);
@@ -3106,12 +3106,11 @@ static void rt_secret_reschedule(int old)
  }
  
  static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
-                                         struct file *filp,
                                           void __user *buffer, size_t *lenp,
                                           loff_t *ppos)
  {
         int old = ip_rt_secret_interval;
-       int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
  
         rt_secret_reschedule(old);
  
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c

index 4710d219f06ae9df6c2754906ca09663ed4da738..2dcf04d9b005cda549d2d4ee58cd98697163d1bb 100644 (file)
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -36,7 +36,7 @@ static void set_local_port_range(int range[2])
  }
  
  /* Validate changes from /proc interface. */
-static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
+static int ipv4_local_port_range(ctl_table *table, int write,
                                  void __user *buffer,
                                  size_t *lenp, loff_t *ppos)
  {
@@ -51,7 +51,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
         };
  
         inet_get_local_port_range(range, range + 1);
-       ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
  
         if (write && ret == 0) {
                 if (range[1] < range[0])
@@ -91,7 +91,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table,
  }
  
  
-static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp,
+static int proc_tcp_congestion_control(ctl_table *ctl, int write,
                                        void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         char val[TCP_CA_NAME_MAX];
@@ -103,7 +103,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
  
         tcp_get_default_congestion_control(val);
  
-       ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+       ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
         if (write && ret == 0)
                 ret = tcp_set_default_congestion_control(val);
         return ret;
@@ -129,7 +129,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table,
  }
  
  static int proc_tcp_available_congestion_control(ctl_table *ctl,
-                                                int write, struct file * filp,
+                                                int write,
                                                  void __user *buffer, size_t *lenp,
                                                  loff_t *ppos)
  {
@@ -140,13 +140,13 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl,
         if (!tbl.data)
                 return -ENOMEM;
         tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
-       ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+       ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
         kfree(tbl.data);
         return ret;
  }
  
  static int proc_allowed_congestion_control(ctl_table *ctl,
-                                          int write, struct file * filp,
+                                          int write,
                                            void __user *buffer, size_t *lenp,
                                            loff_t *ppos)
  {
@@ -158,7 +158,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
                 return -ENOMEM;
  
         tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
-       ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+       ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
         if (write && ret == 0)
                 ret = tcp_set_allowed_congestion_control(tbl.data);
         kfree(tbl.data);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c

index 55f486d89c88eeb1d3e1409cd115cf996c9a8b07..1fd0a3d775d26767dec15c78f599dc96b1c3c08d 100644 (file)
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3986,14 +3986,14 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
  #ifdef CONFIG_SYSCTL
  
  static
-int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_forward(ctl_table *ctl, int write,
                            void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         int *valp = ctl->data;
         int val = *valp;
         int ret;
  
-       ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
  
         if (write)
                 ret = addrconf_fixup_forwarding(ctl, valp, val);
@@ -4090,14 +4090,14 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
  }
  
  static
-int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_disable(ctl_table *ctl, int write,
                             void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         int *valp = ctl->data;
         int val = *valp;
         int ret;
  
-       ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
  
         if (write)
                 ret = addrconf_disable_ipv6(ctl, valp, val);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c

index 7d25bbe32110c1b66269b00f64440f992b3d7afe..c595bbe1ed99cc7819566a713cc0ee9975f10557 100644 (file)
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1043,11 +1043,6 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
         struct net_device_stats *stats = &t->dev->stats;
         int ret;
  
-       if (t->recursion++) {
-               stats->collisions++;
-               goto tx_err;
-       }
-
         switch (skb->protocol) {
         case htons(ETH_P_IP):
                 ret = ip4ip6_tnl_xmit(skb, dev);
@@ -1062,14 +1057,12 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
         if (ret < 0)
                 goto tx_err;
  
-       t->recursion--;
         return NETDEV_TX_OK;
  
  tx_err:
         stats->tx_errors++;
         stats->tx_dropped++;
         kfree_skb(skb);
-       t->recursion--;
         return NETDEV_TX_OK;
  }
  
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c

index 7015478797f667be60b94c762302ce24593d1fef..498b9b0b0fade607c0ece1b1ce3edaacc03ef968 100644 (file)
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1735,7 +1735,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
         }
  }
  
-int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct net_device *dev = ctl->extra1;
         struct inet6_dev *idev;
@@ -1746,16 +1746,16 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
                 ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
  
         if (strcmp(ctl->procname, "retrans_time") == 0)
-               ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+               ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
  
         else if (strcmp(ctl->procname, "base_reachable_time") == 0)
                 ret = proc_dointvec_jiffies(ctl, write,
-                                           filp, buffer, lenp, ppos);
+                                           buffer, lenp, ppos);
  
         else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
                  (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
                 ret = proc_dointvec_ms_jiffies(ctl, write,
-                                              filp, buffer, lenp, ppos);
+                                              buffer, lenp, ppos);
         else
                 ret = -1;
  
diff --git a/net/ipv6/route.c b/net/ipv6/route.c

index 77aecbe8ff6caf261c04700d6bea889bb2221a7e..d6fe7646a8ff7d8599c3565e6e6c9deb68732cef 100644 (file)
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2524,13 +2524,13 @@ static const struct file_operations rt6_stats_seq_fops = {
  #ifdef CONFIG_SYSCTL
  
  static
-int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
                               void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         struct net *net = current->nsproxy->net_ns;
         int delay = net->ipv6.sysctl.flush_delay;
         if (write) {
-               proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+               proc_dointvec(ctl, write, buffer, lenp, ppos);
                 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
                 return 0;
         } else
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c

index 0ae4f6448187ef288c0692d6eb3b3697a2b6aa25..fcb53962884797c8508dc71a85547d7fe21333a4 100644 (file)
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -626,11 +626,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
         struct in6_addr *addr6;
         int addr_type;
  
-       if (tunnel->recursion++) {
-               stats->collisions++;
-               goto tx_error;
-       }
-
         if (skb->protocol != htons(ETH_P_IPV6))
                 goto tx_error;
  
@@ -753,7 +748,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
                         ip_rt_put(rt);
                         stats->tx_dropped++;
                         dev_kfree_skb(skb);
-                       tunnel->recursion--;
                         return NETDEV_TX_OK;
                 }
                 if (skb->sk)
@@ -794,7 +788,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
         nf_reset(skb);
  
         IPTUNNEL_XMIT();
-       tunnel->recursion--;
         return NETDEV_TX_OK;
  
  tx_error_icmp:
@@ -802,7 +795,6 @@ tx_error_icmp:
  tx_error:
         stats->tx_errors++;
         dev_kfree_skb(skb);
-       tunnel->recursion--;
         return NETDEV_TX_OK;
  }
  
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c

index 57f8817c3979029a38a680d1ea65d948766d2ef9..5c86567e5a78c3d84c511f038dd18a9834f9efe2 100644 (file)
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -73,12 +73,12 @@ static int min_lap_keepalive_time = 100;    /* 100us */
  /* For other sysctl, I've no idea of the range. Maybe Dag could help
   * us on that - Jean II */
  
-static int do_devname(ctl_table *table, int write, struct file *filp,
+static int do_devname(ctl_table *table, int write,
                       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         int ret;
  
-       ret = proc_dostring(table, write, filp, buffer, lenp, ppos);
+       ret = proc_dostring(table, write, buffer, lenp, ppos);
         if (ret == 0 && write) {
                 struct ias_value *val;
  
@@ -90,12 +90,12 @@ static int do_devname(ctl_table *table, int write, struct file *filp,
  }
  
  
-static int do_discovery(ctl_table *table, int write, struct file *filp,
+static int do_discovery(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         int ret;
  
-       ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
         if (ret)
                return ret;
  
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c

index 039901109fa1736a4eecc1ede72e96dfd0dd2db3..71e10cabf811f6a8825faea8b0ea72472625bafb 100644 (file)
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -90,8 +90,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
                 bss->dtim_period = tim_ie->dtim_period;
         }
  
-       /* set default value for buggy APs */
-       if (!elems->tim || bss->dtim_period == 0)
+       /* set default value for buggy AP/no TIM element */
+       if (bss->dtim_period == 0)
                 bss->dtim_period = 1;
  
         bss->supp_rates_len = 0;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c

index fba2892b99e10157a90bcf73969a61647c390605..446e9bd4b4bc2b90aa850f8734ee64ced9f03fe1 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1496,14 +1496,14 @@ static int ip_vs_zero_all(void)
  
  
  static int
-proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
+proc_do_defense_mode(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         int *valp = table->data;
         int val = *valp;
         int rc;
  
-       rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       rc = proc_dointvec(table, write, buffer, lenp, ppos);
         if (write && (*valp != val)) {
                 if ((*valp < 0) || (*valp > 3)) {
                         /* Restore the correct value */
@@ -1517,7 +1517,7 @@ proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
  
  
  static int
-proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
+proc_do_sync_threshold(ctl_table *table, int write,
                        void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         int *valp = table->data;
@@ -1527,7 +1527,7 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
         /* backup the value first */
         memcpy(val, valp, sizeof(val));
  
-       rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       rc = proc_dointvec(table, write, buffer, lenp, ppos);
         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
                 /* Restore the correct value */
                 memcpy(valp, val, sizeof(val));
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c

index 4e620305f28c765548c5c5b35b1a5b833034bf30..c93494fef8ef3cfdc8eb22352c59554d0c751eb2 100644 (file)
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -226,7 +226,7 @@ static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
  static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
  static struct ctl_table_header *nf_log_dir_header;
  
-static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
+static int nf_log_proc_dostring(ctl_table *table, int write,
                          void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         const struct nf_logger *logger;
@@ -260,7 +260,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
                         table->data = "NONE";
                 else
                         table->data = logger->name;
-               r = proc_dostring(table, write, filp, buffer, lenp, ppos);
+               r = proc_dostring(table, write, buffer, lenp, ppos);
                 mutex_unlock(&nf_log_mutex);
         }
  
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c

index 55180b99562a7bec3788a99906f2c0fd480fbfca..a4bafbf150974da5daa3ba622f10b7b589604272 100644 (file)
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1609,6 +1609,16 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups)
         return err;
  }
  
+void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
+{
+       struct sock *sk;
+       struct hlist_node *node;
+       struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
+
+       sk_for_each_bound(sk, node, &tbl->mc_list)
+               netlink_update_socket_mc(nlk_sk(sk), group, 0);
+}
+
  /**
   * netlink_clear_multicast_users - kick off multicast listeners
   *
@@ -1619,15 +1629,8 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups)
   */
  void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
  {
-       struct sock *sk;
-       struct hlist_node *node;
-       struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
-
         netlink_table_grab();
-
-       sk_for_each_bound(sk, node, &tbl->mc_list)
-               netlink_update_socket_mc(nlk_sk(sk), group, 0);
-
+       __netlink_clear_multicast_users(ksk, group);
         netlink_table_ungrab();
  }
  
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c

index 566941e03363fbdd7591ded5ed16b41f0a903c7c..44ff3f3810faa6c94e15c9b5f6b40312cf1c1182 100644 (file)
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -220,10 +220,12 @@ static void __genl_unregister_mc_group(struct genl_family *family,
         struct net *net;
         BUG_ON(grp->family != family);
  
+       netlink_table_grab();
         rcu_read_lock();
         for_each_net_rcu(net)
-               netlink_clear_multicast_users(net->genl_sock, grp->id);
+               __netlink_clear_multicast_users(net->genl_sock, grp->id);
         rcu_read_unlock();
+       netlink_table_ungrab();
  
         clear_bit(grp->id, mc_groups);
         list_del(&grp->list);
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c

index a662e62a99cfa0adcf7589aa457a19bfeb21b48a..f60c0c2aacba5f1a99fe4a2fe159f43a09b78f07 100644 (file)
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -168,6 +168,12 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
                 goto drop;
         }
  
+       /* Broadcast sending is not implemented */
+       if (pn_addr(dst) == PNADDR_BROADCAST) {
+               err = -EOPNOTSUPP;
+               goto drop;
+       }
+
         skb_reset_transport_header(skb);
         WARN_ON(skb_headroom(skb) & 1); /* HW assumes word alignment */
         skb_push(skb, sizeof(struct phonethdr));
diff --git a/net/phonet/socket.c b/net/phonet/socket.c

index 7a4ee397d2f773e3bfb8d8e0820ad5ec79859a16..07aa9f08d5fbdd0a66eb4713103b2f687089a8ea 100644 (file)
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -113,6 +113,8 @@ void pn_sock_unhash(struct sock *sk)
  }
  EXPORT_SYMBOL(pn_sock_unhash);
  
+static DEFINE_MUTEX(port_mutex);
+
  static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
  {
         struct sock *sk = sock->sk;
@@ -140,9 +142,11 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
                 err = -EINVAL; /* attempt to rebind */
                 goto out;
         }
+       WARN_ON(sk_hashed(sk));
+       mutex_lock(&port_mutex);
         err = sk->sk_prot->get_port(sk, pn_port(handle));
         if (err)
-               goto out;
+               goto out_port;
  
         /* get_port() sets the port, bind() sets the address if applicable */
         pn->sobject = pn_object(saddr, pn_port(pn->sobject));
@@ -150,6 +154,8 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
  
         /* Enable RX on the socket */
         sk->sk_prot->hash(sk);
+out_port:
+       mutex_unlock(&port_mutex);
  out:
         release_sock(sk);
         return err;
@@ -357,8 +363,6 @@ const struct proto_ops phonet_stream_ops = {
  };
  EXPORT_SYMBOL(phonet_stream_ops);
  
-static DEFINE_MUTEX(port_mutex);
-
  /* allocate port for a socket */
  int pn_sock_get_port(struct sock *sk, unsigned short sport)
  {
@@ -370,9 +374,7 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport)
  
         memset(&try_sa, 0, sizeof(struct sockaddr_pn));
         try_sa.spn_family = AF_PHONET;
-
-       mutex_lock(&port_mutex);
-
+       WARN_ON(!mutex_is_locked(&port_mutex));
         if (!sport) {
                 /* search free port */
                 int port, pmin, pmax;
@@ -401,8 +403,6 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport)
                 else
                         sock_put(tmpsk);
         }
-       mutex_unlock(&port_mutex);
-
         /* the port must be in use already */
         return -EADDRINUSE;
  
diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c

index 7b5749ee2765c221532e9c3624f66d27e0843db5..2220f33223267287b0801830465641331d7602a4 100644 (file)
--- a/net/phonet/sysctl.c
+++ b/net/phonet/sysctl.c
@@ -56,7 +56,7 @@ void phonet_get_local_port_range(int *min, int *max)
         } while (read_seqretry(&local_port_range_lock, seq));
  }
  
-static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
+static int proc_local_port_range(ctl_table *table, int write,
                                 void __user *buffer,
                                 size_t *lenp, loff_t *ppos)
  {
@@ -70,7 +70,7 @@ static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
                 .extra2 = &local_port_range_max,
         };
  
-       ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
  
         if (write && ret == 0) {
                 if (range[1] < range[0])
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c

index c70dd7f5258e18b440f02a072a9a62337cd61d87..1db618f56ecb5e59b6869abaa730894ba4a654e6 100644 (file)
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -8,7 +8,6 @@
  
  #include <linux/types.h>
  #include <linux/module.h>
-#include <linux/utsname.h>
  #include <linux/sunrpc/clnt.h>
  
  #ifdef RPC_DEBUG
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c

index a417d5ab5dd753432ecf403646356a86d4fa50f5..38829e20500bf787aa59189501aaf7ce323a113c 100644 (file)
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -640,10 +640,11 @@ EXPORT_SYMBOL_GPL(rpc_call_async);
  /**
   * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
   * rpc_execute against it
- * @ops: RPC call ops
+ * @req: RPC request
+ * @tk_ops: RPC call ops
   */
  struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
-                                       const struct rpc_call_ops *tk_ops)
+                               const struct rpc_call_ops *tk_ops)
  {
         struct rpc_task *task;
         struct xdr_buf *xbufp = &req->rq_snd_buf;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c

index 858a443f418fcf108070736b179eb87a19a38417..49278f830367eec97d34e862992d6f7f16e1e899 100644 (file)
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -860,7 +860,8 @@ static void rpc_clntdir_depopulate(struct dentry *dentry)
  
  /**
   * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs
- * @path: path from the rpc_pipefs root to the new directory
+ * @dentry: dentry from the rpc_pipefs root to the new directory
+ * @name: &struct qstr for the name
   * @rpc_client: rpc client to associate with this directory
   *
   * This creates a directory at the given @path associated with
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c

index 5231f7aaac0e483ebb8b457c3c6e2218464da409..42f9748ae0939f8f2f22f8c8cb93505d378bf4e3 100644 (file)
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -56,7 +56,7 @@ rpc_unregister_sysctl(void)
         }
  }
  
-static int proc_do_xprt(ctl_table *table, int write, struct file *file,
+static int proc_do_xprt(ctl_table *table, int write,
                         void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         char tmpbuf[256];
@@ -71,7 +71,7 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file,
  }
  
  static int
-proc_dodebug(ctl_table *table, int write, struct file *file,
+proc_dodebug(ctl_table *table, int write,
                                 void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         char            tmpbuf[20], c, *s;
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c

index 87101177825b83073ed153bc51367e64132b0a16..35fb68b9c8ec96ca2080530e014135d643aaabac 100644 (file)
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -80,7 +80,7 @@ struct kmem_cache *svc_rdma_ctxt_cachep;
   * current value.
   */
  static int read_reset_stat(ctl_table *table, int write,
-                          struct file *filp, void __user *buffer, size_t *lenp,
+                          void __user *buffer, size_t *lenp,
                            loff_t *ppos)
  {
         atomic_t *stat = (atomic_t *)table->data;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c

index bee41546575471cb85d036efbbed1e87e9370638..37c5475ba258b51b22c6722aeb605741c2732051 100644 (file)
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -773,6 +773,7 @@ static void xs_close(struct rpc_xprt *xprt)
         dprintk("RPC:       xs_close xprt %p\n", xprt);
  
         xs_reset_transport(transport);
+       xprt->reestablish_timeout = 0;
  
         smp_mb__before_clear_bit();
         clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
@@ -1264,6 +1265,12 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
         if (xprt->shutdown)
                 goto out;
  
+       /* Any data means we had a useful conversation, so
+        * the we don't need to delay the next reconnect
+        */
+       if (xprt->reestablish_timeout)
+               xprt->reestablish_timeout = 0;
+
         /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
         rd_desc.arg.data = xprt;
         do {
@@ -2034,6 +2041,8 @@ static void xs_connect(struct rpc_task *task)
                                    &transport->connect_worker,
                                    xprt->reestablish_timeout);
                 xprt->reestablish_timeout <<= 1;
+               if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+                       xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
                 if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
                         xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
         } else {
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c

index d16cd9ea4d002a2cf47b7b265d6c124a84a4e1b4..bf725275eb8d2e97788fa636de465f30a3a17ba0 100644 (file)
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -26,11 +26,11 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
  
         wdev->wext.connect.ie = wdev->wext.ie;
         wdev->wext.connect.ie_len = wdev->wext.ie_len;
-       wdev->wext.connect.privacy = wdev->wext.default_key != -1;
  
         if (wdev->wext.keys) {
                 wdev->wext.keys->def = wdev->wext.default_key;
                 wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key;
+               wdev->wext.connect.privacy = true;
         }
  
         if (!wdev->wext.connect.ssid_len)
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include

index c29be8f902488f6175e584f97b2c8c873036d4b2..4f9c1908593bce72b1572d0a3efb95fd3f22f36f 100644 (file)
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -83,11 +83,12 @@ TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/)
  # is automatically cleaned up.
  try-run = $(shell set -e;              \
         TMP="$(TMPOUT).$$$$.tmp";       \
+       TMPO="$(TMPOUT).$$$$.o";        \
         if ($(1)) >/dev/null 2>&1;      \
         then echo "$(2)";               \
         else echo "$(3)";               \
         fi;                             \
-       rm -f "$$TMP")
+       rm -f "$$TMP" "$$TMPO")
  
  # as-option
  # Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
@@ -105,12 +106,12 @@ as-instr = $(call try-run,\
  # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
  
  cc-option = $(call try-run,\
-       $(CC) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2))
+       $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2))
  
  # cc-option-yn
  # Usage: flag := $(call cc-option-yn,-march=winchip-c6)
  cc-option-yn = $(call try-run,\
-       $(CC) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",y,n)
+       $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",y,n)
  
  # cc-option-align
  # Prefix align with either -falign or -malign
@@ -130,10 +131,15 @@ cc-fullversion = $(shell $(CONFIG_SHELL) \
  # Usage:  EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
  cc-ifversion = $(shell [ $(call cc-version, $(CC)) $(1) $(2) ] && echo $(3))
  
+# cc-ldoption
+# Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+cc-ldoption = $(call try-run,\
+       $(CC) $(1) -nostdlib -xc /dev/null -o "$$TMP",$(1),$(2))
+
  # ld-option
-# Usage: ldflags += $(call ld-option, -Wl$(comma)--hash-style=both)
+# Usage: LDFLAGS += $(call ld-option, -X)
  ld-option = $(call try-run,\
-       $(CC) $(1) -nostdlib -xc /dev/null -o "$$TMP",$(1),$(2))
+       $(CC) /dev/null -c -o "$$TMPO" ; $(LD) $(1) "$$TMPO" -o "$$TMP",$(1),$(2))
  
  ######
  
diff --git a/scripts/Makefile.build b/scripts/Makefile.build

index 5c4b7a400c182b5725491c1c1e498d9f5e377702..341b58902ffced6d70801f6e90ea85f60bcee88d 100644 (file)
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -206,7 +206,7 @@ cmd_modversions =                                                   \
  endif
  
  ifdef CONFIG_FTRACE_MCOUNT_RECORD
-cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
+cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
         "$(if $(CONFIG_64BIT),64,32)" \
         "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
         "$(if $(part-of-module),1,0)" "$(@)";
@@ -216,6 +216,7 @@ define rule_cc_o_c
         $(call echo-cmd,checksrc) $(cmd_checksrc)                         \
         $(call echo-cmd,cc_o_c) $(cmd_cc_o_c);                            \
         $(cmd_modversions)                                                \
+       $(call echo-cmd,record_mcount)                                    \
         $(cmd_record_mcount)                                              \
         scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' >    \
                                                       $(dot-target).tmp;  \
@@ -269,7 +270,8 @@ targets += $(extra-y) $(MAKECMDGOALS) $(always)
  # Linker scripts preprocessor (.lds.S -> .lds)
  # ---------------------------------------------------------------------------
  quiet_cmd_cpp_lds_S = LDS     $@
-      cmd_cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ -o $@ $<
+      cmd_cpp_lds_S = $(CPP) $(cpp_flags) -P -C -U$(ARCH) \
+                            -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
  
  $(obj)/%.lds: $(src)/%.lds.S FORCE
         $(call if_changed_dep,cpp_lds_S)
diff --git a/scripts/basic/docproc.c b/scripts/basic/docproc.c

index 99ca7a698687a5a0558b987b05ffb6c581c01141..79ab973fb43a42715d61092ce58d7408109e6eff 100644 (file)
--- a/scripts/basic/docproc.c
+++ b/scripts/basic/docproc.c
@@ -71,7 +71,7 @@ FILELINE * docsection;
  
  static char *srctree, *kernsrctree;
  
-void usage (void)
+static void usage (void)
  {
         fprintf(stderr, "Usage: docproc {doc|depend} file\n");
         fprintf(stderr, "Input is read from file.tmpl. Output is sent to stdout\n");
@@ -84,7 +84,7 @@ void usage (void)
  /*
   * Execute kernel-doc with parameters given in svec
   */
-void exec_kernel_doc(char **svec)
+static void exec_kernel_doc(char **svec)
  {
         pid_t pid;
         int ret;
@@ -129,7 +129,7 @@ struct symfile
  struct symfile symfilelist[MAXFILES];
  int symfilecnt = 0;
  
-void add_new_symbol(struct symfile *sym, char * symname)
+static void add_new_symbol(struct symfile *sym, char * symname)
  {
         sym->symbollist =
            realloc(sym->symbollist, (sym->symbolcnt + 1) * sizeof(char *));
@@ -137,14 +137,14 @@ void add_new_symbol(struct symfile *sym, char * symname)
  }
  
  /* Add a filename to the list */
-struct symfile * add_new_file(char * filename)
+static struct symfile * add_new_file(char * filename)
  {
         symfilelist[symfilecnt++].filename = strdup(filename);
         return &symfilelist[symfilecnt - 1];
  }
  
  /* Check if file already are present in the list */
-struct symfile * filename_exist(char * filename)
+static struct symfile * filename_exist(char * filename)
  {
         int i;
         for (i=0; i < symfilecnt; i++)
@@ -157,20 +157,20 @@ struct symfile * filename_exist(char * filename)
   * List all files referenced within the template file.
   * Files are separated by tabs.
   */
-void adddep(char * file)                  { printf("\t%s", file); }
-void adddep2(char * file, char * line)     { line = line; adddep(file); }
-void noaction(char * line)                { line = line; }
-void noaction2(char * file, char * line)   { file = file; line = line; }
+static void adddep(char * file)                   { printf("\t%s", file); }
+static void adddep2(char * file, char * line)     { line = line; adddep(file); }
+static void noaction(char * line)                 { line = line; }
+static void noaction2(char * file, char * line)   { file = file; line = line; }
  
  /* Echo the line without further action */
-void printline(char * line)               { printf("%s", line); }
+static void printline(char * line)               { printf("%s", line); }
  
  /*
   * Find all symbols in filename that are exported with EXPORT_SYMBOL &
   * EXPORT_SYMBOL_GPL (& EXPORT_SYMBOL_GPL_FUTURE implicitly).
   * All symbols located are stored in symfilelist.
   */
-void find_export_symbols(char * filename)
+static void find_export_symbols(char * filename)
  {
         FILE * fp;
         struct symfile *sym;
@@ -227,7 +227,7 @@ void find_export_symbols(char * filename)
   * intfunc uses -nofunction
   * extfunc uses -function
   */
-void docfunctions(char * filename, char * type)
+static void docfunctions(char * filename, char * type)
  {
         int i,j;
         int symcnt = 0;
@@ -258,15 +258,15 @@ void docfunctions(char * filename, char * type)
         fflush(stdout);
         free(vec);
  }
-void intfunc(char * filename) {        docfunctions(filename, NOFUNCTION); }
-void extfunc(char * filename) { docfunctions(filename, FUNCTION);   }
+static void intfunc(char * filename) { docfunctions(filename, NOFUNCTION); }
+static void extfunc(char * filename) { docfunctions(filename, FUNCTION);   }
  
  /*
   * Document specific function(s) in a file.
   * Call kernel-doc with the following parameters:
   * kernel-doc -docbook -function function1 [-function function2]
   */
-void singfunc(char * filename, char * line)
+static void singfunc(char * filename, char * line)
  {
         char *vec[200]; /* Enough for specific functions */
          int i, idx = 0;
@@ -297,7 +297,7 @@ void singfunc(char * filename, char * line)
   * Call kernel-doc with the following parameters:
   * kernel-doc -docbook -function "doc section" filename
   */
-void docsect(char *filename, char *line)
+static void docsect(char *filename, char *line)
  {
         char *vec[6]; /* kerneldoc -docbook -function "section" file NULL */
         char *s;
@@ -324,7 +324,7 @@ void docsect(char *filename, char *line)
   * 5) Lines containing !P
   * 6) Default lines - lines not matching the above
   */
-void parse_file(FILE *infile)
+static void parse_file(FILE *infile)
  {
         char line[MAXLINESZ];
         char * s;
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c

index 8ab44861168087b01e3d3a643704e3ba6efe1129..6bf21f83837dff5a42893d57dde41e8d225948e4 100644 (file)
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -124,7 +124,7 @@ char *target;
  char *depfile;
  char *cmdline;
  
-void usage(void)
+static void usage(void)
  {
         fprintf(stderr, "Usage: fixdep <depfile> <target> <cmdline>\n");
         exit(1);
@@ -133,7 +133,7 @@ void usage(void)
  /*
   * Print out the commandline prefixed with cmd_<target filename> :=
   */
-void print_cmdline(void)
+static void print_cmdline(void)
  {
         printf("cmd_%s := %s\n\n", target, cmdline);
  }
@@ -146,7 +146,7 @@ int    len_config  = 0;
   * Grow the configuration string to a desired length.
   * Usually the first growth is plenty.
   */
-void grow_config(int len)
+static void grow_config(int len)
  {
         while (len_config + len > size_config) {
                 if (size_config == 0)
@@ -162,7 +162,7 @@ void grow_config(int len)
  /*
   * Lookup a value in the configuration string.
   */
-int is_defined_config(const char * name, int len)
+static int is_defined_config(const char * name, int len)
  {
         const char * pconfig;
         const char * plast = str_config + len_config - len;
@@ -178,7 +178,7 @@ int is_defined_config(const char * name, int len)
  /*
   * Add a new value to the configuration string.
   */
-void define_config(const char * name, int len)
+static void define_config(const char * name, int len)
  {
         grow_config(len + 1);
  
@@ -190,7 +190,7 @@ void define_config(const char * name, int len)
  /*
   * Clear the set of configuration strings.
   */
-void clear_config(void)
+static void clear_config(void)
  {
         len_config = 0;
         define_config("", 0);
@@ -199,7 +199,7 @@ void clear_config(void)
  /*
   * Record the use of a CONFIG_* word.
   */
-void use_config(char *m, int slen)
+static void use_config(char *m, int slen)
  {
         char s[PATH_MAX];
         char *p;
@@ -220,7 +220,7 @@ void use_config(char *m, int slen)
         printf("    $(wildcard include/config/%s.h) \\\n", s);
  }
  
-void parse_config_file(char *map, size_t len)
+static void parse_config_file(char *map, size_t len)
  {
         int *end = (int *) (map + len);
         /* start at +1, so that p can never be < map */
@@ -254,7 +254,7 @@ void parse_config_file(char *map, size_t len)
  }
  
  /* test is s ends in sub */
-int strrcmp(char *s, char *sub)
+static int strrcmp(char *s, char *sub)
  {
         int slen = strlen(s);
         int sublen = strlen(sub);
@@ -265,7 +265,7 @@ int strrcmp(char *s, char *sub)
         return memcmp(s + slen - sublen, sub, sublen);
  }
  
-void do_config_file(char *filename)
+static void do_config_file(char *filename)
  {
         struct stat st;
         int fd;
@@ -296,7 +296,7 @@ void do_config_file(char *filename)
         close(fd);
  }
  
-void parse_dep_file(void *map, size_t len)
+static void parse_dep_file(void *map, size_t len)
  {
         char *m = map;
         char *end = m + len;
@@ -336,7 +336,7 @@ void parse_dep_file(void *map, size_t len)
         printf("$(deps_%s):\n", target);
  }
  
-void print_deps(void)
+static void print_deps(void)
  {
         struct stat st;
         int fd;
@@ -368,7 +368,7 @@ void print_deps(void)
         close(fd);
  }
  
-void traps(void)
+static void traps(void)
  {
         static char test[] __attribute__((aligned(sizeof(int)))) = "CONF";
         int *p = (int *)test;
diff --git a/scripts/basic/hash.c b/scripts/basic/hash.c

index 3299ad7fc8c0faae87debf0860d155dbae5d7b83..2ef5d3f666b87ed52c7664cbc37ffe1e143397e8 100644 (file)
--- a/scripts/basic/hash.c
+++ b/scripts/basic/hash.c
@@ -21,7 +21,7 @@ static void usage(void)
   * http://www.cse.yorku.ca/~oz/hash.html
   */
  
-unsigned int djb2_hash(char *str)
+static unsigned int djb2_hash(char *str)
  {
         unsigned long hash = 5381;
         int c;
@@ -34,7 +34,7 @@ unsigned int djb2_hash(char *str)
         return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1));
  }
  
-unsigned int r5_hash(char *str)
+static unsigned int r5_hash(char *str)
  {
         unsigned long hash = 0;
         int c;
diff --git a/scripts/checkincludes.pl b/scripts/checkincludes.pl

index 8e6b716c191c67a1b0668caf43a8bf777b271299..676ddc07d6fa76b1a79603cf3be95a26eb7b7ec1 100755 (executable)
--- a/scripts/checkincludes.pl
+++ b/scripts/checkincludes.pl
@@ -1,24 +1,85 @@
  #!/usr/bin/perl
  #
-# checkincludes: Find files included more than once in (other) files.
+# checkincludes: find/remove files included more than once
+#
  # Copyright abandoned, 2000, Niels Kristian Bech Jensen <nkbj@image.dk>.
+# Copyright 2009 Luis R. Rodriguez <mcgrof@gmail.com>
+#
+# This script checks for duplicate includes. It also has support
+# to remove them in place. Note that this will not take into
+# consideration macros so you should run this only if you know
+# you do have real dups and do not have them under #ifdef's. You
+# could also just review the results.
+
+sub usage {
+       print "Usage: checkincludes.pl [-r]\n";
+       print "By default we just warn of duplicates\n";
+       print "To remove duplicated includes in place use -r\n";
+       exit 1;
+}
+
+my $remove = 0;
+
+if ($#ARGV < 0) {
+       usage();
+}
+
+if ($#ARGV >= 1) {
+       if ($ARGV[0] =~ /^-/) {
+               if ($ARGV[0] eq "-r") {
+                       $remove = 1;
+                       shift;
+               } else {
+                       usage();
+               }
+       }
+}
  
  foreach $file (@ARGV) {
         open(FILE, $file) or die "Cannot open $file: $!.\n";
  
         my %includedfiles = ();
+       my @file_lines = ();
  
         while (<FILE>) {
                 if (m/^\s*#\s*include\s*[<"](\S*)[>"]/o) {
                         ++$includedfiles{$1};
                 }
+               push(@file_lines, $_);
         }
-       
-       foreach $filename (keys %includedfiles) {
-               if ($includedfiles{$filename} > 1) {
-                       print "$file: $filename is included more than once.\n";
+
+       close(FILE);
+
+       if (!$remove) {
+               foreach $filename (keys %includedfiles) {
+                       if ($includedfiles{$filename} > 1) {
+                               print "$file: $filename is included more than once.\n";
+                       }
                 }
+               next;
         }
  
+       open(FILE,">$file") || die("Cannot write to $file: $!");
+
+       my $dups = 0;
+       foreach (@file_lines) {
+               if (m/^\s*#\s*include\s*[<"](\S*)[>"]/o) {
+                       foreach $filename (keys %includedfiles) {
+                               if ($1 eq $filename) {
+                                       if ($includedfiles{$filename} > 1) {
+                                               $includedfiles{$filename}--;
+                                               $dups++;
+                                       } else {
+                                               print FILE $_;
+                                       }
+                               }
+                       }
+               } else {
+                       print FILE $_;
+               }
+       }
+       if ($dups > 0) {
+               print "$file: removed $dups duplicate includes\n";
+       }
         close(FILE);
  }
diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c

index 3baaaecd6b13ef3ba684f8e5320cfef10f1f5a4e..9960d1c303f8c2f985a1a92de8e990850c33ccf6 100644 (file)
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -38,14 +38,14 @@ static int conf_cnt;
  static char line[128];
  static struct menu *rootEntry;
  
-static char nohelp_text[] = N_("Sorry, no help available for this option yet.\n");
-
-static const char *get_help(struct menu *menu)
+static void print_help(struct menu *menu)
  {
-       if (menu_has_help(menu))
-               return _(menu_get_help(menu));
-       else
-               return nohelp_text;
+       struct gstr help = str_new();
+
+       menu_get_ext_help(menu, &help);
+
+       printf("\n%s\n", str_get(&help));
+       str_free(&help);
  }
  
  static void strip(char *str)
@@ -121,7 +121,7 @@ static int conf_askvalue(struct symbol *sym, const char *def)
         return 1;
  }
  
-int conf_string(struct menu *menu)
+static int conf_string(struct menu *menu)
  {
         struct symbol *sym = menu->sym;
         const char *def;
@@ -140,7 +140,7 @@ int conf_string(struct menu *menu)
                 case '?':
                         /* print help */
                         if (line[1] == '\n') {
-                               printf("\n%s\n", get_help(menu));
+                               print_help(menu);
                                 def = NULL;
                                 break;
                         }
@@ -220,7 +220,7 @@ static int conf_sym(struct menu *menu)
                 if (sym_set_tristate_value(sym, newval))
                         return 0;
  help:
-               printf("\n%s\n", get_help(menu));
+               print_help(menu);
         }
  }
  
@@ -307,7 +307,7 @@ static int conf_choice(struct menu *menu)
                         fgets(line, 128, stdin);
                         strip(line);
                         if (line[0] == '?') {
-                               printf("\n%s\n", get_help(menu));
+                               print_help(menu);
                                 continue;
                         }
                         if (!line[0])
@@ -331,7 +331,7 @@ static int conf_choice(struct menu *menu)
                 if (!child)
                         continue;
                 if (line[strlen(line) - 1] == '?') {
-                       printf("\n%s\n", get_help(child));
+                       print_help(child);
                         continue;
                 }
                 sym_set_choice_value(sym, child->sym);
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c

index a04da3459f0fa17ee71e155bcdb56d71a27d71b5..b55e72ff2fc676d4ae36ebe9028c6dee5b44d715 100644 (file)
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -560,7 +560,7 @@ int conf_write(const char *name)
         return 0;
  }
  
-int conf_split_config(void)
+static int conf_split_config(void)
  {
         const char *name;
         char path[128];
diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c

index 579ece4fa584506a882e9e2bdd3e7fbb28d3b0c1..edd3f39a080a422175bf512c94dd1d5bf87e059b 100644 (file)
--- a/scripts/kconfig/expr.c
+++ b/scripts/kconfig/expr.c
@@ -348,7 +348,7 @@ struct expr *expr_trans_bool(struct expr *e)
  /*
   * e1 || e2 -> ?
   */
-struct expr *expr_join_or(struct expr *e1, struct expr *e2)
+static struct expr *expr_join_or(struct expr *e1, struct expr *e2)
  {
         struct expr *tmp;
         struct symbol *sym1, *sym2;
@@ -412,7 +412,7 @@ struct expr *expr_join_or(struct expr *e1, struct expr *e2)
         return NULL;
  }
  
-struct expr *expr_join_and(struct expr *e1, struct expr *e2)
+static struct expr *expr_join_and(struct expr *e1, struct expr *e2)
  {
         struct expr *tmp;
         struct symbol *sym1, *sym2;
@@ -1098,6 +1098,8 @@ void expr_fprint(struct expr *e, FILE *out)
  static void expr_print_gstr_helper(void *data, struct symbol *sym, const char *str)
  {
         str_append((struct gstr*)data, str);
+       if (sym)
+               str_printf((struct gstr*)data, " [=%s]", sym_get_string_value(sym));
  }
  
  void expr_gstr_print(struct expr *e, struct gstr *gs)
diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c

index 199b22bb49e2a7940bae8f84e38348e6aca30480..65464366fe389ab67e631c64646717c67303d05b 100644 (file)
--- a/scripts/kconfig/gconf.c
+++ b/scripts/kconfig/gconf.c
@@ -456,19 +456,9 @@ static void text_insert_help(struct menu *menu)
         GtkTextBuffer *buffer;
         GtkTextIter start, end;
         const char *prompt = _(menu_get_prompt(menu));
-       gchar *name;
-       const char *help;
+       struct gstr help = str_new();
  
-       help = menu_get_help(menu);
-
-       /* Gettextize if the help text not empty */
-       if ((help != 0) && (help[0] != 0))
-               help = _(help);
-
-       if (menu->sym && menu->sym->name)
-               name = g_strdup_printf(menu->sym->name);
-       else
-               name = g_strdup("");
+       menu_get_ext_help(menu, &help);
  
         buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w));
         gtk_text_buffer_get_bounds(buffer, &start, &end);
@@ -478,14 +468,11 @@ static void text_insert_help(struct menu *menu)
         gtk_text_buffer_get_end_iter(buffer, &end);
         gtk_text_buffer_insert_with_tags(buffer, &end, prompt, -1, tag1,
                                          NULL);
-       gtk_text_buffer_insert_at_cursor(buffer, " ", 1);
-       gtk_text_buffer_get_end_iter(buffer, &end);
-       gtk_text_buffer_insert_with_tags(buffer, &end, name, -1, tag1,
-                                        NULL);
         gtk_text_buffer_insert_at_cursor(buffer, "\n\n", 2);
         gtk_text_buffer_get_end_iter(buffer, &end);
-       gtk_text_buffer_insert_with_tags(buffer, &end, help, -1, tag2,
+       gtk_text_buffer_insert_with_tags(buffer, &end, str_get(&help), -1, tag2,
                                          NULL);
+       str_free(&help);
  }
  
  
diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade

index 803233fdd6ddbca62e255beb8db64b743553863e..b1c86c19292cb01d43b8637acddcf701bf93d844 100644 (file)
--- a/scripts/kconfig/gconf.glade
+++ b/scripts/kconfig/gconf.glade
@@ -547,7 +547,7 @@
                   <property name="headers_visible">True</property>
                   <property name="rules_hint">False</property>
                   <property name="reorderable">False</property>
-                 <property name="enable_search">True</property>
+                 <property name="enable_search">False</property>
                   <signal name="cursor_changed" handler="on_treeview2_cursor_changed" last_modification_time="Sun, 12 Jan 2003 15:58:22 GMT"/>
                   <signal name="button_press_event" handler="on_treeview1_button_press_event" last_modification_time="Sun, 12 Jan 2003 16:03:52 GMT"/>
                   <signal name="key_press_event" handler="on_treeview2_key_press_event" last_modification_time="Sun, 12 Jan 2003 16:11:44 GMT"/>
@@ -582,7 +582,7 @@
                       <property name="headers_visible">True</property>
                       <property name="rules_hint">False</property>
                       <property name="reorderable">False</property>
-                     <property name="enable_search">True</property>
+                     <property name="enable_search">False</property>
                       <signal name="cursor_changed" handler="on_treeview2_cursor_changed" last_modification_time="Sun, 12 Jan 2003 15:57:55 GMT"/>
                       <signal name="button_press_event" handler="on_treeview2_button_press_event" last_modification_time="Sun, 12 Jan 2003 15:57:58 GMT"/>
                       <signal name="key_press_event" handler="on_treeview2_key_press_event" last_modification_time="Sun, 12 Jan 2003 15:58:01 GMT"/>
diff --git a/scripts/kconfig/kxgettext.c b/scripts/kconfig/kxgettext.c

index 8d9ce22b0fc5d285ca93702d97a437aa7026f364..dcc3fcc0cc9a3d63989932ea1e5139200544fd75 100644 (file)
--- a/scripts/kconfig/kxgettext.c
+++ b/scripts/kconfig/kxgettext.c
@@ -166,7 +166,7 @@ static int message__add(const char *msg, char *option, char *file, int lineno)
         return rc;
  }
  
-void menu_build_message_list(struct menu *menu)
+static void menu_build_message_list(struct menu *menu)
  {
         struct menu *child;
  
@@ -211,7 +211,7 @@ static void message__print_gettext_msgid_msgstr(struct message *self)
                "msgstr \"\"\n", self->msg);
  }
  
-void menu__xgettext(void)
+static void menu__xgettext(void)
  {
         struct message *m = message__list;
  
diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h

index 8e69461313d19de2c133479154c123e479f059e0..ffeb532b2cff3a71705693f132dc5a5eb9d94bf4 100644 (file)
--- a/scripts/kconfig/lkc_proto.h
+++ b/scripts/kconfig/lkc_proto.h
@@ -17,6 +17,8 @@ P(menu_get_root_menu,struct menu *,(struct menu *menu));
  P(menu_get_parent_menu,struct menu *,(struct menu *menu));
  P(menu_has_help,bool,(struct menu *menu));
  P(menu_get_help,const char *,(struct menu *menu));
+P(get_symbol_str,void,(struct gstr *r, struct symbol *sym));
+P(menu_get_ext_help,void,(struct menu *menu, struct gstr *help));
  
  /* symbol.c */
  P(symbol_hash,struct symbol *,[SYMBOL_HASHSIZE]);
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c

index 25b60bc117f738fabc26efb9a2e8f4f7546cd8d1..d82953573588c4f0c0014cc5c3677e5de4dcded9 100644 (file)
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -199,8 +199,6 @@ inputbox_instructions_string[] = N_(
  setmod_text[] = N_(
         "This feature depends on another which has been configured as a module.\n"
         "As a result, this feature will be built as a module."),
-nohelp_text[] = N_(
-       "There is no help available for this kernel option.\n"),
  load_config_text[] = N_(
         "Enter the name of the configuration file you wish to load.  "
         "Accept the name shown to restore the configuration you "
@@ -284,66 +282,6 @@ static void show_textbox(const char *title, const char *text, int r, int c);
  static void show_helptext(const char *title, const char *text);
  static void show_help(struct menu *menu);
  
-static void get_prompt_str(struct gstr *r, struct property *prop)
-{
-       int i, j;
-       struct menu *submenu[8], *menu;
-
-       str_printf(r, _("Prompt: %s\n"), _(prop->text));
-       str_printf(r, _("  Defined at %s:%d\n"), prop->menu->file->name,
-               prop->menu->lineno);
-       if (!expr_is_yes(prop->visible.expr)) {
-               str_append(r, _("  Depends on: "));
-               expr_gstr_print(prop->visible.expr, r);
-               str_append(r, "\n");
-       }
-       menu = prop->menu->parent;
-       for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent)
-               submenu[i++] = menu;
-       if (i > 0) {
-               str_printf(r, _("  Location:\n"));
-               for (j = 4; --i >= 0; j += 2) {
-                       menu = submenu[i];
-                       str_printf(r, "%*c-> %s", j, ' ', _(menu_get_prompt(menu)));
-                       if (menu->sym) {
-                               str_printf(r, " (%s [=%s])", menu->sym->name ?
-                                       menu->sym->name : _("<choice>"),
-                                       sym_get_string_value(menu->sym));
-                       }
-                       str_append(r, "\n");
-               }
-       }
-}
-
-static void get_symbol_str(struct gstr *r, struct symbol *sym)
-{
-       bool hit;
-       struct property *prop;
-
-       if (sym && sym->name)
-               str_printf(r, "Symbol: %s [=%s]\n", sym->name,
-                                                   sym_get_string_value(sym));
-       for_all_prompts(sym, prop)
-               get_prompt_str(r, prop);
-       hit = false;
-       for_all_properties(sym, prop, P_SELECT) {
-               if (!hit) {
-                       str_append(r, "  Selects: ");
-                       hit = true;
-               } else
-                       str_printf(r, " && ");
-               expr_gstr_print(prop->expr, r);
-       }
-       if (hit)
-               str_append(r, "\n");
-       if (sym->rev_dep.expr) {
-               str_append(r, _("  Selected by: "));
-               expr_gstr_print(sym->rev_dep.expr, r);
-               str_append(r, "\n");
-       }
-       str_append(r, "\n\n");
-}
-
  static struct gstr get_relations_str(struct symbol **sym_arr)
  {
         struct symbol *sym;
@@ -699,19 +637,9 @@ static void show_helptext(const char *title, const char *text)
  static void show_help(struct menu *menu)
  {
         struct gstr help = str_new();
-       struct symbol *sym = menu->sym;
-
-       if (menu_has_help(menu))
-       {
-               if (sym->name) {
-                       str_printf(&help, "CONFIG_%s:\n\n", sym->name);
-                       str_append(&help, _(menu_get_help(menu)));
-                       str_append(&help, "\n");
-               }
-       } else {
-               str_append(&help, nohelp_text);
-       }
-       get_symbol_str(&help, sym);
+
+       menu_get_ext_help(menu, &help);
+
         show_helptext(_(menu_get_prompt(menu)), str_get(&help));
         str_free(&help);
  }
diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c

index 07ff8d105c9def5885221d8668a4c7859b3bbb29..059a2465c5744ffed6d3cff25e2d8135a16c0399 100644 (file)
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -9,6 +9,9 @@
  #define LKC_DIRECT_LINK
  #include "lkc.h"
  
+static const char nohelp_text[] = N_(
+       "There is no help available for this kernel option.\n");
+
  struct menu rootmenu;
  static struct menu **last_entry_ptr;
  
@@ -74,7 +77,7 @@ void menu_end_menu(void)
         current_menu = current_menu->parent;
  }
  
-struct expr *menu_check_dep(struct expr *e)
+static struct expr *menu_check_dep(struct expr *e)
  {
         if (!e)
                 return e;
@@ -184,7 +187,7 @@ static int menu_range_valid_sym(struct symbol *sym, struct symbol *sym2)
                (sym2->type == S_UNKNOWN && sym_string_valid(sym, sym2->name));
  }
  
-void sym_check_prop(struct symbol *sym)
+static void sym_check_prop(struct symbol *sym)
  {
         struct property *prop;
         struct symbol *sym2;
@@ -451,3 +454,80 @@ const char *menu_get_help(struct menu *menu)
         else
                 return "";
  }
+
+static void get_prompt_str(struct gstr *r, struct property *prop)
+{
+       int i, j;
+       struct menu *submenu[8], *menu;
+
+       str_printf(r, _("Prompt: %s\n"), _(prop->text));
+       str_printf(r, _("  Defined at %s:%d\n"), prop->menu->file->name,
+               prop->menu->lineno);
+       if (!expr_is_yes(prop->visible.expr)) {
+               str_append(r, _("  Depends on: "));
+               expr_gstr_print(prop->visible.expr, r);
+               str_append(r, "\n");
+       }
+       menu = prop->menu->parent;
+       for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent)
+               submenu[i++] = menu;
+       if (i > 0) {
+               str_printf(r, _("  Location:\n"));
+               for (j = 4; --i >= 0; j += 2) {
+                       menu = submenu[i];
+                       str_printf(r, "%*c-> %s", j, ' ', _(menu_get_prompt(menu)));
+                       if (menu->sym) {
+                               str_printf(r, " (%s [=%s])", menu->sym->name ?
+                                       menu->sym->name : _("<choice>"),
+                                       sym_get_string_value(menu->sym));
+                       }
+                       str_append(r, "\n");
+               }
+       }
+}
+
+void get_symbol_str(struct gstr *r, struct symbol *sym)
+{
+       bool hit;
+       struct property *prop;
+
+       if (sym && sym->name)
+               str_printf(r, "Symbol: %s [=%s]\n", sym->name,
+                          sym_get_string_value(sym));
+       for_all_prompts(sym, prop)
+               get_prompt_str(r, prop);
+       hit = false;
+       for_all_properties(sym, prop, P_SELECT) {
+               if (!hit) {
+                       str_append(r, "  Selects: ");
+                       hit = true;
+               } else
+                       str_printf(r, " && ");
+               expr_gstr_print(prop->expr, r);
+       }
+       if (hit)
+               str_append(r, "\n");
+       if (sym->rev_dep.expr) {
+               str_append(r, _("  Selected by: "));
+               expr_gstr_print(sym->rev_dep.expr, r);
+               str_append(r, "\n");
+       }
+       str_append(r, "\n\n");
+}
+
+void menu_get_ext_help(struct menu *menu, struct gstr *help)
+{
+       struct symbol *sym = menu->sym;
+
+       if (menu_has_help(menu)) {
+               if (sym->name) {
+                       str_printf(help, "CONFIG_%s:\n\n", sym->name);
+                       str_append(help, _(menu_get_help(menu)));
+                       str_append(help, "\n");
+               }
+       } else {
+               str_append(help, nohelp_text);
+       }
+       if (sym)
+               get_symbol_str(help, sym);
+}
diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc

index ce7d508c75200b1072bb40c96e72701f742a6ddc..00c51507cfcc44acfe434fc35e08827c14ff7f0d 100644 (file)
--- a/scripts/kconfig/qconf.cc
+++ b/scripts/kconfig/qconf.cc
@@ -1042,12 +1042,10 @@ void ConfigInfoView::menuInfo(void)
                 if (showDebug())
                         debug = debug_info(sym);
  
-               help = menu_get_help(menu);
-               /* Gettextize if the help text not empty */
-               if (help.isEmpty())
-                       help = print_filter(menu_get_help(menu));
-               else
-                       help = print_filter(_(menu_get_help(menu)));
+               struct gstr help_gstr = str_new();
+               menu_get_ext_help(menu, &help_gstr);
+               help = print_filter(str_get(&help_gstr));
+               str_free(&help_gstr);
         } else if (menu->prompt) {
                 head += "<big><b>";
                 head += print_filter(_(menu->prompt->text));
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c

index 18f3e5c33634f7729a9ad757e638757649bb043d..6c8fbbb66ebcaddefff183696d1c5d89bc2d6c09 100644 (file)
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -36,7 +36,7 @@ tristate modules_val;
  
  struct expr *sym_env_list;
  
-void sym_add_default(struct symbol *sym, const char *def)
+static void sym_add_default(struct symbol *sym, const char *def)
  {
         struct property *prop = prop_alloc(P_DEFAULT, sym);
  
@@ -125,7 +125,7 @@ struct property *sym_get_default_prop(struct symbol *sym)
         return NULL;
  }
  
-struct property *sym_get_range_prop(struct symbol *sym)
+static struct property *sym_get_range_prop(struct symbol *sym)
  {
         struct property *prop;
  
@@ -943,7 +943,7 @@ const char *prop_get_type_name(enum prop_type type)
         return "unknown";
  }
  
-void prop_add_env(const char *env)
+static void prop_add_env(const char *env)
  {
         struct symbol *sym, *sym2;
         struct property *prop;
diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl

index 89774011965d7f17289b09af086c1651e9cf7ef1..5f0fcb712e2992229eeb3606a3e425bcf4115218 100644 (file)
--- a/scripts/markup_oops.pl
+++ b/scripts/markup_oops.pl
@@ -184,10 +184,7 @@ if ($target eq "0") {
  
  # if it's a module, we need to find the .ko file and calculate a load offset
  if ($module ne "") {
-       my $dir = dirname($filename);
-       $dir = $dir . "/";
-       my $mod = $module . ".ko";
-       my $modulefile = `find $dir -name $mod | head -1`;
+       my $modulefile = `modinfo $module | grep '^filename:' | awk '{ print \$2 }'`;
         chomp($modulefile);
         $filename = $modulefile;
         if ($filename eq "") {
diff --git a/scripts/tags.sh b/scripts/tags.sh

index 4a34ec591e8c05f224f4900028f0e25f99e9eeba..d52f7a01557c1b28cf3155a640c175ce613558aa 100755 (executable)
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -101,7 +101,8 @@ exuberant()
         -I ____cacheline_aligned_in_smp                         \
         -I ____cacheline_internodealigned_in_smp                \
         -I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL                      \
-       --extra=+f --c-kinds=+px                                \
+       -I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \
+       --extra=+f --c-kinds=-px                                \
         --regex-asm='/^ENTRY\(([^)]*)\).*/\1/'                  \
         --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/'
  
diff --git a/security/device_cgroup.c b/security/device_cgroup.c

index b8186bac8b7eb08088b40914137e4f51b8e403fd..6cf8fd2b79e80df26e142aa94e6fed9d4c3e7015 100644 (file)
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -61,7 +61,8 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
  struct cgroup_subsys devices_subsys;
  
  static int devcgroup_can_attach(struct cgroup_subsys *ss,
-               struct cgroup *new_cgroup, struct task_struct *task)
+               struct cgroup *new_cgroup, struct task_struct *task,
+               bool threadgroup)
  {
         if (current != task && !capable(CAP_SYS_ADMIN))
                         return -EPERM;
diff --git a/security/keys/gc.c b/security/keys/gc.c

index 485fc6233c38e16336bc9886c0dbfceb2fa63cdd..4770be375ffece3d506ea86b769aee3bf8607595 100644 (file)
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -169,9 +169,9 @@ static void key_garbage_collector(struct work_struct *work)
  
         /* trawl through the keys looking for keyrings */
         for (;;) {
-               if (key->expiry > now && key->expiry < new_timer) {
+               if (key->expiry > limit && key->expiry < new_timer) {
                         kdebug("will expire %x in %ld",
-                              key_serial(key), key->expiry - now);
+                              key_serial(key), key->expiry - limit);
                         new_timer = key->expiry;
                 }
  
diff --git a/security/lsm_audit.c b/security/lsm_audit.c

index 500aad0ebd6acd8af2e63a7cbc8ae0965bbc5cdc..3bb90b6f1dd3db9d697e02209273008eb7a73cc1 100644 (file)
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -187,7 +187,7 @@ static inline void print_ipv6_addr(struct audit_buffer *ab,
                                    char *name1, char *name2)
  {
         if (!ipv6_addr_any(addr))
-               audit_log_format(ab, " %s=%pI6", name1, addr);
+               audit_log_format(ab, " %s=%pI6c", name1, addr);
         if (port)
                 audit_log_format(ab, " %s=%d", name2, ntohs(port));
  }
diff --git a/security/min_addr.c b/security/min_addr.c

index 14cc7b3b8d0379ef0cd17e071aa2821ea6dd4a95..c844eed7915d0d270c058c16d6b3db40ffa576d0 100644 (file)
--- a/security/min_addr.c
+++ b/security/min_addr.c
@@ -28,12 +28,12 @@ static void update_mmap_min_addr(void)
   * sysctl handler which just sets dac_mmap_min_addr = the new value and then
   * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly
   */
-int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+int mmap_min_addr_handler(struct ctl_table *table, int write,
                           void __user *buffer, size_t *lenp, loff_t *ppos)
  {
         int ret;
  
-       ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+       ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
  
         update_mmap_min_addr();
  
diff --git a/security/selinux/avc.c b/security/selinux/avc.c

index 1ed0f076aadcfebbce2b1b3eed1c679e87765bd9..b4b5da1c0a421ff69a12b80312d65c456148a47d 100644 (file)
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -868,8 +868,19 @@ u32 avc_policy_seqno(void)
  
  void avc_disable(void)
  {
-       avc_flush();
-       synchronize_rcu();
-       if (avc_node_cachep)
-               kmem_cache_destroy(avc_node_cachep);
+       /*
+        * If you are looking at this because you have realized that we are
+        * not destroying the avc_node_cachep it might be easy to fix, but
+        * I don't know the memory barrier semantics well enough to know.  It's
+        * possible that some other task dereferenced security_ops when
+        * it still pointed to selinux operations.  If that is the case it's
+        * possible that it is about to use the avc and is about to need the
+        * avc_node_cachep.  I know I could wrap the security.c security_ops call
+        * in an rcu_lock, but seriously, it's not worth it.  Instead I just flush
+        * the cache and get that memory back.
+        */
+       if (avc_node_cachep) {
+               avc_flush();
+               /* kmem_cache_destroy(avc_node_cachep); */
+       }
  }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c

index 417f7c9945229175f79842c5a9637af6fd48fc1d..bb230d5d7085a9612f915edbb50124a0077db4be 100644 (file)
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2411,7 +2411,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
         /* Wake up the parent if it is waiting so that it can recheck
          * wait permission to the new task SID. */
         read_lock(&tasklist_lock);
-       wake_up_interruptible(&current->real_parent->signal->wait_chldexit);
+       __wake_up_parent(current, current->real_parent);
         read_unlock(&tasklist_lock);
  }
  
diff --git a/usr/.gitignore b/usr/.gitignore

index 69b2e89fa1653d4b25b2e89563f6e2b7b59bd4e2..8e48117a3f3dc763a0d18f69db0a23f9acafe632 100644 (file)
--- a/usr/.gitignore
+++ b/usr/.gitignore
@@ -4,5 +4,7 @@
  gen_init_cpio
  initramfs_data.cpio
  initramfs_data.cpio.gz
+initramfs_data.cpio.bz2
+initramfs_data.cpio.lzma
  initramfs_list
  include
diff --git a/usr/Makefile b/usr/Makefile

index 245145a99c1012fa4b0f5f5d9d83a4f1f50dd0bc..1e6a9e4a72cc627fb4d0e4863dabdbc15f709274 100644 (file)
--- a/usr/Makefile
+++ b/usr/Makefile
@@ -6,7 +6,7 @@ klibcdirs:;
  PHONY += klibcdirs
  
  
-# Gzip, but no bzip2
+# Gzip
  suffix_$(CONFIG_INITRAMFS_COMPRESSION_GZIP)   = .gz
  
  # Bzip2
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 897bff3b7df914a32f309de7e6b687fa7633cfe2..034a798b0431e98209489f793b904bfda9cf463c 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -738,8 +738,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
         bool called = true;
         struct kvm_vcpu *vcpu;
  
-       if (alloc_cpumask_var(&cpus, GFP_ATOMIC))
-               cpumask_clear(cpus);
+       zalloc_cpumask_var(&cpus, GFP_ATOMIC);
  
         spin_lock(&kvm->requests_lock);
         me = smp_processor_id();
author	David S. Miller <davem@davemloft.net>
	Fri, 25 Sep 2009 18:09:08 +0000 (11:09 -0700)
committer	David S. Miller <davem@davemloft.net>
	Fri, 25 Sep 2009 18:09:08 +0000 (11:09 -0700)