*/
#include <stdio.h>
-#include <string.h>
#define EXAMPLES 6
# echo 0 > tasks
+2.3 Mounting hierarchies by name
+--------------------------------
+
+Passing the name=<x> option when mounting a cgroups hierarchy
+associates the given name with the hierarchy. This can be used when
+mounting a pre-existing hierarchy, in order to refer to it by name
+rather than by its set of active subsystems. Each hierarchy is either
+nameless, or has a unique name.
+
+The name should match [\w.-]+
+
+When passing a name=<x> option for a new hierarchy, you need to
+specify subsystems manually; the legacy behaviour of mounting all
+subsystems when none are explicitly specified is not supported when
+you give a subsystem a name.
+
+The name of the subsystem appears as part of the hierarchy description
+in /proc/mounts and /proc/<pid>/cgroups.
+
+
3. Kernel API
=============
called multiple times against a cgroup.
int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct task_struct *task)
+ struct task_struct *task, bool threadgroup)
(cgroup_mutex held by caller)
Called prior to moving a task into a cgroup; if the subsystem
task is passed, then a successful result indicates that *any*
unspecified task can be moved into the cgroup. Note that this isn't
called on a fork. If this method returns 0 (success) then this should
-remain valid while the caller holds cgroup_mutex.
+remain valid while the caller holds cgroup_mutex. If threadgroup is
+true, then a successful result indicates that all threads in the given
+thread's threadgroup can be moved together.
void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cgrp, struct task_struct *task)
+ struct cgroup *old_cgrp, struct task_struct *task,
+ bool threadgroup)
(cgroup_mutex held by caller)
Called after the task has been attached to the cgroup, to allow any
post-attachment activity that requires memory allocations or blocking.
+If threadgroup is true, the subsystem should take care of all threads
+in the specified thread's threadgroup. Currently does not support any
+subsystem that might need the old_cgrp for every thread in the group.
void fork(struct cgroup_subsy *ss, struct task_struct *task)
pages that are selected for reclaiming come from the per cgroup LRU
list.
+NOTE: Reclaim does not work for the root cgroup, since we cannot set any
+limits on the root cgroup.
+
2. Locking
The memory controller uses the following hierarchy
NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
mega or gigabytes.
NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
+NOTE: We cannot set limits on the root cgroup any more.
# cat /cgroups/0/memory.limit_in_bytes
4194304
NOTE2: This feature can be enabled/disabled per subtree.
-7. TODO
+7. Soft limits
+
+Soft limits allow for greater sharing of memory. The idea behind soft limits
+is to allow control groups to use as much of the memory as needed, provided
+
+a. There is no memory contention
+b. They do not exceed their hard limit
+
+When the system detects memory contention or low memory control groups
+are pushed back to their soft limits. If the soft limit of each control
+group is very high, they are pushed back as much as possible to make
+sure that one control group does not starve the others of memory.
+
+Please note that soft limits is a best effort feature, it comes with
+no guarantees, but it does its best to make sure that when memory is
+heavily contended for, memory is allocated based on the soft limit
+hints/setup. Currently soft limit based reclaim is setup such that
+it gets invoked from balance_pgdat (kswapd).
+
+7.1 Interface
+
+Soft limits can be setup by using the following commands (in this example we
+assume a soft limit of 256 megabytes)
+
+# echo 256M > memory.soft_limit_in_bytes
+
+If we want to change this to 1G, we can at any time use
+
+# echo 1G > memory.soft_limit_in_bytes
+
+NOTE1: Soft limits take effect over a long period of time, since they involve
+ reclaiming memory for balancing between memory cgroups
+NOTE2: It is recommended to set the soft limit always below the hard limit,
+ otherwise the hard limit will take precedence.
+
+8. TODO
1. Add support for accounting huge pages (as a separate controller)
2. Make per-cgroup scanner reclaim not-shared pages first
3.1 General format of the API:
struct dma_async_tx_descriptor *
-async_<operation>(<op specific parameters>,
- enum async_tx_flags flags,
- struct dma_async_tx_descriptor *dependency,
- dma_async_tx_callback callback_routine,
- void *callback_parameter);
+async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
3.2 Supported operations:
-memcpy - memory copy between a source and a destination buffer
-memset - fill a destination buffer with a byte value
-xor - xor a series of source buffers and write the result to a
- destination buffer
-xor_zero_sum - xor a series of source buffers and set a flag if the
- result is zero. The implementation attempts to prevent
- writes to memory
+memcpy - memory copy between a source and a destination buffer
+memset - fill a destination buffer with a byte value
+xor - xor a series of source buffers and write the result to a
+ destination buffer
+xor_val - xor a series of source buffers and set a flag if the
+ result is zero. The implementation attempts to prevent
+ writes to memory
+pq - generate the p+q (raid6 syndrome) from a series of source buffers
+pq_val - validate that a p and or q buffer are in sync with a given series of
+ sources
+datap - (raid6_datap_recov) recover a raid6 data block and the p block
+ from the given sources
+2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given
+ sources
3.3 Descriptor management:
The return value is non-NULL and points to a 'descriptor' when the operation
recycle (or free) the descriptor. A descriptor can be acked by one of the
following methods:
1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
-2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent
- descriptor of a new operation.
+2/ submitting an unacknowledged descriptor as a dependency to another
+ async_tx call will implicitly set the acknowledged state.
3/ calling async_tx_ack() on the descriptor.
3.4 When does the operation execute?
Perform a xor->copy->xor operation where each operation depends on the
result from the previous operation:
-void complete_xor_copy_xor(void *param)
+void callback(void *param)
{
- printk("complete\n");
+ struct completion *cmp = param;
+
+ complete(cmp);
}
-int run_xor_copy_xor(struct page **xor_srcs,
- int xor_src_cnt,
- struct page *xor_dest,
- size_t xor_len,
- struct page *copy_src,
- struct page *copy_dest,
- size_t copy_len)
+void run_xor_copy_xor(struct page **xor_srcs,
+ int xor_src_cnt,
+ struct page *xor_dest,
+ size_t xor_len,
+ struct page *copy_src,
+ struct page *copy_dest,
+ size_t copy_len)
{
struct dma_async_tx_descriptor *tx;
+ addr_conv_t addr_conv[xor_src_cnt];
+ struct async_submit_ctl submit;
+ addr_conv_t addr_conv[NDISKS];
+ struct completion cmp;
+
+ init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
+ addr_conv);
+ tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
- tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
- ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL);
- tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len,
- ASYNC_TX_DEP_ACK, tx, NULL, NULL);
- tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
- ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK,
- tx, complete_xor_copy_xor, NULL);
+ submit->depend_tx = tx;
+ tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
+
+ init_completion(&cmp);
+ init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
+ callback, &cmp, addr_conv);
+ tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
async_tx_issue_pending_all();
+
+ wait_for_completion(&cmp);
}
See include/linux/async_tx.h for more information on the flags. See the
---------------------------
-What: fscher and fscpos drivers
-When: June 2009
-Why: Deprecated by the new fschmd driver.
-Who: Hans de Goede <hdegoede@redhat.com>
- Jean Delvare <khali@linux-fr.org>
-
----------------------------
-
What: sysfs ui for changing p4-clockmod parameters
When: September 2009
Why: See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and
Other applications are described in the following papers:
* XCPU & Clustering
- http://www.xcpu.org/xcpu-talk.pdf
+ http://xcpu.org/papers/xcpu-talk.pdf
* KVMFS: control file system for KVM
- http://www.xcpu.org/kvmfs.pdf
- * CellFS: A New ProgrammingModel for the Cell BE
- http://www.xcpu.org/cellfs-talk.pdf
+ http://xcpu.org/papers/kvmfs.pdf
+ * CellFS: A New Programming Model for the Cell BE
+ http://xcpu.org/papers/cellfs-talk.pdf
* PROSE I/O: Using 9p to enable Application Partitions
http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf
(see rfdno and wfdno)
virtio - connect to the next virtio channel available
(from lguest or KVM with trans_virtio module)
+ rdma - connect to a specified RDMA channel
uname=name user name to attempt mount as on the remote server. The
server may override or ignore this value. Certain user
cache=mode specifies a caching policy. By default, no caches are used.
loose = no attempts are made at consistency,
intended for exclusive, read-only mounts
+ fscache = use FS-Cache for a persistent, read-only
+ cache backend.
debug=n specifies debug level. The debug level is a bitmask.
- 0x01 = display verbose error messages
- 0x02 = developer debug (DEBUG_CURRENT)
- 0x04 = display 9p trace
- 0x08 = display VFS trace
- 0x10 = display Marshalling debug
- 0x20 = display RPC debug
- 0x40 = display transport debug
- 0x80 = display allocation debug
+ 0x01 = display verbose error messages
+ 0x02 = developer debug (DEBUG_CURRENT)
+ 0x04 = display 9p trace
+ 0x08 = display VFS trace
+ 0x10 = display Marshalling debug
+ 0x20 = display RPC debug
+ 0x40 = display transport debug
+ 0x80 = display allocation debug
+ 0x100 = display protocol message debug
+ 0x200 = display Fid debug
+ 0x400 = display packet debug
+ 0x800 = display fscache tracing debug
rfdno=n the file descriptor for reading with trans=fd
any = v9fs does single attach and performs all
operations as one user
+ cachetag cache tag to use the specified persistent cache.
+ cache tags for existing cache sessions can be listed at
+ /sys/fs/9p/caches. (applies only to cache=fscache)
+
RESOURCES
=========
A Linux version of the 9p server is now maintained under the npfs project
on sourceforge (http://sourceforge.net/projects/npfs). The currently
maintained version is the single-threaded version of the server (named spfs)
-available from the same CVS repository.
+available from the same SVN repository.
There are user and developer mailing lists available through the v9fs project
on sourceforge (http://sourceforge.net/projects/v9fs).
A stand-alone version of the module (which should build for any 2.6 kernel)
is available via (http://github.com/ericvh/9p-sac/tree/master)
-News and other information is maintained on SWiK (http://swik.net/v9fs).
+News and other information is maintained on SWiK (http://swik.net/v9fs)
+and the Wiki (http://sf.net/apps/mediawiki/v9fs/index.php).
Bug reports may be issued through the kernel.org bugzilla
(http://bugzilla.kernel.org)
Contents:
1) Overview
2) Features
- 3) smount command
+ 3) Setting mount states
4) Use-case
5) Detailed semantics
6) Quiz
Here is an example:
- Lets say /mnt has a mount that is shared.
+ Let's say /mnt has a mount that is shared.
mount --make-shared /mnt
- note: mount command does not yet support the --make-shared flag.
- I have included a small C program which does the same by executing
- 'smount /mnt shared'
+ Note: mount(8) command now supports the --make-shared flag,
+ so the sample 'smount' program is no longer needed and has been
+ removed.
- #mount --bind /mnt /tmp
+ # mount --bind /mnt /tmp
The above command replicates the mount at /mnt to the mountpoint /tmp
and the contents of both the mounts remain identical.
#ls /tmp
a b c
- Now lets say we mount a device at /tmp/a
- #mount /dev/sd0 /tmp/a
+ Now let's say we mount a device at /tmp/a
+ # mount /dev/sd0 /tmp/a
#ls /tmp/a
t1 t2 t2
Here is an example:
- Lets say /mnt has a mount which is shared.
- #mount --make-shared /mnt
+ Let's say /mnt has a mount which is shared.
+ # mount --make-shared /mnt
- Lets bind mount /mnt to /tmp
- #mount --bind /mnt /tmp
+ Let's bind mount /mnt to /tmp
+ # mount --bind /mnt /tmp
the new mount at /tmp becomes a shared mount and it is a replica of
the mount at /mnt.
- Now lets make the mount at /tmp; a slave of /mnt
- #mount --make-slave /tmp
- [or smount /tmp slave]
+ Now let's make the mount at /tmp; a slave of /mnt
+ # mount --make-slave /tmp
- lets mount /dev/sd0 on /mnt/a
- #mount /dev/sd0 /mnt/a
+ let's mount /dev/sd0 on /mnt/a
+ # mount /dev/sd0 /mnt/a
#ls /mnt/a
t1 t2 t3
Note the mount event has propagated to the mount at /tmp
- However lets see what happens if we mount something on the mount at /tmp
+ However let's see what happens if we mount something on the mount at /tmp
- #mount /dev/sd1 /tmp/b
+ # mount /dev/sd1 /tmp/b
#ls /tmp/b
s1 s2 s3
2d) A unbindable mount is a unbindable private mount
- lets say we have a mount at /mnt and we make is unbindable
+ let's say we have a mount at /mnt and we make is unbindable
- #mount --make-unbindable /mnt
- [ smount /mnt unbindable ]
+ # mount --make-unbindable /mnt
- Lets try to bind mount this mount somewhere else.
+ Let's try to bind mount this mount somewhere else.
# mount --bind /mnt /tmp
mount: wrong fs type, bad option, bad superblock on /mnt,
or too many mounted file systems
Binding a unbindable mount is a invalid operation.
-3) smount command
+3) Setting mount states
- Currently the mount command is not aware of shared subtree features.
- Work is in progress to add the support in mount ( util-linux package ).
- Till then use the following program.
+ The mount command (util-linux package) can be used to set mount
+ states:
- ------------------------------------------------------------------------
- //
- //this code was developed my Miklos Szeredi <miklos@szeredi.hu>
- //and modified by Ram Pai <linuxram@us.ibm.com>
- // sample usage:
- // smount /tmp shared
- //
- #include <stdio.h>
- #include <stdlib.h>
- #include <unistd.h>
- #include <string.h>
- #include <sys/mount.h>
- #include <sys/fsuid.h>
-
- #ifndef MS_REC
- #define MS_REC 0x4000 /* 16384: Recursive loopback */
- #endif
-
- #ifndef MS_SHARED
- #define MS_SHARED 1<<20 /* Shared */
- #endif
-
- #ifndef MS_PRIVATE
- #define MS_PRIVATE 1<<18 /* Private */
- #endif
-
- #ifndef MS_SLAVE
- #define MS_SLAVE 1<<19 /* Slave */
- #endif
-
- #ifndef MS_UNBINDABLE
- #define MS_UNBINDABLE 1<<17 /* Unbindable */
- #endif
-
- int main(int argc, char *argv[])
- {
- int type;
- if(argc != 3) {
- fprintf(stderr, "usage: %s dir "
- "<rshared|rslave|rprivate|runbindable|shared|slave"
- "|private|unbindable>\n" , argv[0]);
- return 1;
- }
-
- fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]);
-
- if (strcmp(argv[2],"rshared")==0)
- type=(MS_SHARED|MS_REC);
- else if (strcmp(argv[2],"rslave")==0)
- type=(MS_SLAVE|MS_REC);
- else if (strcmp(argv[2],"rprivate")==0)
- type=(MS_PRIVATE|MS_REC);
- else if (strcmp(argv[2],"runbindable")==0)
- type=(MS_UNBINDABLE|MS_REC);
- else if (strcmp(argv[2],"shared")==0)
- type=MS_SHARED;
- else if (strcmp(argv[2],"slave")==0)
- type=MS_SLAVE;
- else if (strcmp(argv[2],"private")==0)
- type=MS_PRIVATE;
- else if (strcmp(argv[2],"unbindable")==0)
- type=MS_UNBINDABLE;
- else {
- fprintf(stderr, "invalid operation: %s\n", argv[2]);
- return 1;
- }
- setfsuid(getuid());
-
- if(mount("", argv[1], "dontcare", type, "") == -1) {
- perror("mount");
- return 1;
- }
- return 0;
- }
- -----------------------------------------------------------------------
-
- Copy the above code snippet into smount.c
- gcc -o smount smount.c
-
-
- (i) To mark all the mounts under /mnt as shared execute the following
- command:
-
- smount /mnt rshared
- the corresponding syntax planned for mount command is
- mount --make-rshared /mnt
-
- just to mark a mount /mnt as shared, execute the following
- command:
- smount /mnt shared
- the corresponding syntax planned for mount command is
- mount --make-shared /mnt
-
- (ii) To mark all the shared mounts under /mnt as slave execute the
- following
-
- command:
- smount /mnt rslave
- the corresponding syntax planned for mount command is
- mount --make-rslave /mnt
-
- just to mark a mount /mnt as slave, execute the following
- command:
- smount /mnt slave
- the corresponding syntax planned for mount command is
- mount --make-slave /mnt
-
- (iii) To mark all the mounts under /mnt as private execute the
- following command:
-
- smount /mnt rprivate
- the corresponding syntax planned for mount command is
- mount --make-rprivate /mnt
-
- just to mark a mount /mnt as private, execute the following
- command:
- smount /mnt private
- the corresponding syntax planned for mount command is
- mount --make-private /mnt
-
- NOTE: by default all the mounts are created as private. But if
- you want to change some shared/slave/unbindable mount as
- private at a later point in time, this command can help.
-
- (iv) To mark all the mounts under /mnt as unbindable execute the
- following
-
- command:
- smount /mnt runbindable
- the corresponding syntax planned for mount command is
- mount --make-runbindable /mnt
-
- just to mark a mount /mnt as unbindable, execute the following
- command:
- smount /mnt unbindable
- the corresponding syntax planned for mount command is
- mount --make-unbindable /mnt
+ mount --make-shared mountpoint
+ mount --make-slave mountpoint
+ mount --make-private mountpoint
+ mount --make-unbindable mountpoint
4) Use cases
mount --rbind / /view/v3
mount --rbind / /view/v4
- and if /usr has a versioning filesystem mounted, than that
+ and if /usr has a versioning filesystem mounted, then that
mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
/view/v4/usr too
For example:
mount --make-shared /mnt
- mount --bin /mnt /tmp
+ mount --bind /mnt /tmp
The mount at /mnt and that at /tmp are both shared and belong
to the same peer group. Anything mounted or unmounted under
then the subtree under the unbindable mount is pruned in the new
location.
- eg: lets say we have the following mount tree.
+ eg: let's say we have the following mount tree.
A
/ \
/ \ / \
D E F G
- Lets say all the mount except the mount C in the tree are
+ Let's say all the mount except the mount C in the tree are
of a type other than unbindable.
If this tree is rbound to say Z
'b' on mounts that receive propagation from mount 'B' and does not have
sub-mounts within them are unmounted.
- Example: Lets say 'B1', 'B2', 'B3' are shared mounts that propagate to
+ Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to
each other.
- lets say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
+ let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
'B1', 'B2' and 'B3' respectively.
- lets say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
+ let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
mount 'B1', 'B2' and 'B3' respectively.
if 'C1' is unmounted, all the mounts that are most-recently-mounted on
A cloned namespace contains all the mounts as that of the parent
namespace.
- Lets say 'A' and 'B' are the corresponding mounts in the parent and the
+ Let's say 'A' and 'B' are the corresponding mounts in the parent and the
child namespace.
If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to
mount --make-slave /mnt
At this point we have the first mount at /tmp and
- its root dentry is 1. Lets call this mount 'A'
+ its root dentry is 1. Let's call this mount 'A'
And then we have a second mount at /tmp1 with root
- dentry 2. Lets call this mount 'B'
+ dentry 2. Let's call this mount 'B'
Next we have a third mount at /mnt with root dentry
- mnt. Lets call this mount 'C'
+ mnt. Let's call this mount 'C'
'B' is the slave of 'A' and 'C' is a slave of 'B'
A -> B -> C
Q3 Why is unbindable mount needed?
- Lets say we want to replicate the mount tree at multiple
+ Let's say we want to replicate the mount tree at multiple
locations within the same subtree.
if one rbind mounts a tree within the same subtree 'n' times
mounts. Here is a example.
step 1:
- lets say the root tree has just two directories with
+ let's say the root tree has just two directories with
one vfsmount.
root
/ \
Unclonable mounts come in handy here.
step 1:
- lets say the root tree has just two directories with
+ let's say the root tree has just two directories with
one vfsmount.
root
/ \
/* migrate the contents of a page to the specified target */
int (*migratepage) (struct page *, struct page *);
int (*launder_page) (struct page *);
+ int (*error_remove_page) (struct mapping *mapping, struct page *page);
};
writepage: called by the VM to write a dirty page to backing store.
prevent redirtying the page, it is kept locked during the whole
operation.
+ error_remove_page: normally set to generic_error_remove_page if truncation
+ is ok for this address space. Used for memory failure handling.
+ Setting this implies you deal with pages going away under you,
+ unless you have them locked or reference counts increased.
+
+
The File Object
===============
Supported chips:
* All Intel Core family
Prefix: 'coretemp'
- CPUID: family 0x6, models 0xe, 0xf, 0x16, 0x17
+ CPUID: family 0x6, models 0xe (Pentium M DC), 0xf (Core 2 DC 65nm),
+ 0x16 (Core 2 SC 65nm), 0x17 (Penryn 45nm),
+ 0x1a (Nehalem), 0x1c (Atom), 0x1e (Lynnfield)
Datasheet: Intel 64 and IA-32 Architectures Software Developer's Manual
Volume 3A: System Programming Guide
http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
+++ /dev/null
-Kernel driver fscher
-====================
-
-Supported chips:
- * Fujitsu-Siemens Hermes chip
- Prefix: 'fscher'
- Addresses scanned: I2C 0x73
-
-Authors:
- Reinhard Nissl <rnissl@gmx.de> based on work
- from Hermann Jung <hej@odn.de>,
- Frodo Looijaard <frodol@dds.nl>,
- Philip Edelbrock <phil@netroedge.com>
-
-Description
------------
-
-This driver implements support for the Fujitsu-Siemens Hermes chip. It is
-described in the 'Register Set Specification BMC Hermes based Systemboard'
-from Fujitsu-Siemens.
-
-The Hermes chip implements a hardware-based system management, e.g. for
-controlling fan speed and core voltage. There is also a watchdog counter on
-the chip which can trigger an alarm and even shut the system down.
-
-The chip provides three temperature values (CPU, motherboard and
-auxiliary), three voltage values (+12V, +5V and battery) and three fans
-(power supply, CPU and auxiliary).
-
-Temperatures are measured in degrees Celsius. The resolution is 1 degree.
-
-Fan rotation speeds are reported in RPM (rotations per minute). The value
-can be divided by a programmable divider (1, 2 or 4) which is stored on
-the chip.
-
-Voltage sensors (also known as "in" sensors) report their values in volts.
-
-All values are reported as final values from the driver. There is no need
-for further calculations.
-
-
-Detailed description
---------------------
-
-Below you'll find a single line description of all the bit values. With
-this information, you're able to decode e. g. alarms, wdog, etc. To make
-use of the watchdog, you'll need to set the watchdog time and enable the
-watchdog. After that it is necessary to restart the watchdog time within
-the specified period of time, or a system reset will occur.
-
-* revision
- READING & 0xff = 0x??: HERMES revision identification
-
-* alarms
- READING & 0x80 = 0x80: CPU throttling active
- READING & 0x80 = 0x00: CPU running at full speed
-
- READING & 0x10 = 0x10: software event (see control:1)
- READING & 0x10 = 0x00: no software event
-
- READING & 0x08 = 0x08: watchdog event (see wdog:2)
- READING & 0x08 = 0x00: no watchdog event
-
- READING & 0x02 = 0x02: thermal event (see temp*:1)
- READING & 0x02 = 0x00: no thermal event
-
- READING & 0x01 = 0x01: fan event (see fan*:1)
- READING & 0x01 = 0x00: no fan event
-
- READING & 0x13 ! 0x00: ALERT LED is flashing
-
-* control
- READING & 0x01 = 0x01: software event
- READING & 0x01 = 0x00: no software event
-
- WRITING & 0x01 = 0x01: set software event
- WRITING & 0x01 = 0x00: clear software event
-
-* watchdog_control
- READING & 0x80 = 0x80: power off on watchdog event while thermal event
- READING & 0x80 = 0x00: watchdog power off disabled (just system reset enabled)
-
- READING & 0x40 = 0x40: watchdog timebase 60 seconds (see also wdog:1)
- READING & 0x40 = 0x00: watchdog timebase 2 seconds
-
- READING & 0x10 = 0x10: watchdog enabled
- READING & 0x10 = 0x00: watchdog disabled
-
- WRITING & 0x80 = 0x80: enable "power off on watchdog event while thermal event"
- WRITING & 0x80 = 0x00: disable "power off on watchdog event while thermal event"
-
- WRITING & 0x40 = 0x40: set watchdog timebase to 60 seconds
- WRITING & 0x40 = 0x00: set watchdog timebase to 2 seconds
-
- WRITING & 0x20 = 0x20: disable watchdog
-
- WRITING & 0x10 = 0x10: enable watchdog / restart watchdog time
-
-* watchdog_state
- READING & 0x02 = 0x02: watchdog system reset occurred
- READING & 0x02 = 0x00: no watchdog system reset occurred
-
- WRITING & 0x02 = 0x02: clear watchdog event
-
-* watchdog_preset
- READING & 0xff = 0x??: configured watch dog time in units (see wdog:3 0x40)
-
- WRITING & 0xff = 0x??: configure watch dog time in units
-
-* in* (0: +5V, 1: +12V, 2: onboard 3V battery)
- READING: actual voltage value
-
-* temp*_status (1: CPU sensor, 2: onboard sensor, 3: auxiliary sensor)
- READING & 0x02 = 0x02: thermal event (overtemperature)
- READING & 0x02 = 0x00: no thermal event
-
- READING & 0x01 = 0x01: sensor is working
- READING & 0x01 = 0x00: sensor is faulty
-
- WRITING & 0x02 = 0x02: clear thermal event
-
-* temp*_input (1: CPU sensor, 2: onboard sensor, 3: auxiliary sensor)
- READING: actual temperature value
-
-* fan*_status (1: power supply fan, 2: CPU fan, 3: auxiliary fan)
- READING & 0x04 = 0x04: fan event (fan fault)
- READING & 0x04 = 0x00: no fan event
-
- WRITING & 0x04 = 0x04: clear fan event
-
-* fan*_div (1: power supply fan, 2: CPU fan, 3: auxiliary fan)
- Divisors 2,4 and 8 are supported, both for reading and writing
-
-* fan*_pwm (1: power supply fan, 2: CPU fan, 3: auxiliary fan)
- READING & 0xff = 0x00: fan may be switched off
- READING & 0xff = 0x01: fan must run at least at minimum speed (supply: 6V)
- READING & 0xff = 0xff: fan must run at maximum speed (supply: 12V)
- READING & 0xff = 0x??: fan must run at least at given speed (supply: 6V..12V)
-
- WRITING & 0xff = 0x00: fan may be switched off
- WRITING & 0xff = 0x01: fan must run at least at minimum speed (supply: 6V)
- WRITING & 0xff = 0xff: fan must run at maximum speed (supply: 12V)
- WRITING & 0xff = 0x??: fan must run at least at given speed (supply: 6V..12V)
-
-* fan*_input (1: power supply fan, 2: CPU fan, 3: auxiliary fan)
- READING: actual RPM value
-
-
-Limitations
------------
-
-* Measuring fan speed
-It seems that the chip counts "ripples" (typical fans produce 2 ripples per
-rotation while VERAX fans produce 18) in a 9-bit register. This register is
-read out every second, then the ripple prescaler (2, 4 or 8) is applied and
-the result is stored in the 8 bit output register. Due to the limitation of
-the counting register to 9 bits, it is impossible to measure a VERAX fan
-properly (even with a prescaler of 8). At its maximum speed of 3500 RPM the
-fan produces 1080 ripples per second which causes the counting register to
-overflow twice, leading to only 186 RPM.
-
-* Measuring input voltages
-in2 ("battery") reports the voltage of the onboard lithium battery and not
-+3.3V from the power supply.
-
-* Undocumented features
-Fujitsu-Siemens Computers has not documented all features of the chip so
-far. Their software, System Guard, shows that there are a still some
-features which cannot be controlled by this implementation.
<http://mikonos.dia.unisa.it/tcfs>
'l' 40-7F linux/udf_fs_i.h in development:
<http://sourceforge.net/projects/linux-udf/>
+'m' 00-09 linux/mmtimer.h
'm' all linux/mtio.h conflict!
'm' all linux/soundcard.h conflict!
'm' all linux/synclink.h conflict!
INSTALL_PATH specifies where to place the updated kernel and system map
images. Default is /boot, but you can set it to other values.
+INSTALLKERNEL
+--------------------------------------------------
+Install script called when using "make install".
+The default name is "installkernel".
+
+The script will be called with the following arguments:
+ $1 - kernel version
+ $2 - kernel image file
+ $3 - kernel map file
+ $4 - default install path (use root directory if blank)
+
+The implmentation of "make install" is architecture specific
+and it may differ from the above.
+
+INSTALLKERNEL is provided to enable the possibility to
+specify a custom installer when cross compiling a kernel.
MODLIB
--------------------------------------------------
--- 3.9 Dependency tracking
--- 3.10 Special Rules
--- 3.11 $(CC) support functions
+ --- 3.12 $(LD) support functions
=== 4 Host Program support
--- 4.1 Simple Host Program
The second argument is optional, and if supplied will be used
if first argument is not supported.
- ld-option
- ld-option is used to check if $(CC) when used to link object files
+ cc-ldoption
+ cc-ldoption is used to check if $(CC) when used to link object files
supports the given option. An optional second option may be
specified if first option are not supported.
Example:
#arch/i386/kernel/Makefile
- vsyscall-flags += $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ vsyscall-flags += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
In the above example, vsyscall-flags will be assigned the option
-Wl$(comma)--hash-style=sysv if it is supported by $(CC).
endif
endif
+--- 3.12 $(LD) support functions
+
+ ld-option
+ ld-option is used to check if $(LD) supports the supplied option.
+ ld-option takes two options as arguments.
+ The second argument is an optional option that can be used if the
+ first option is not supported by $(LD).
+
+ Example:
+ #Makefile
+ LDFLAGS_vmlinux += $(call really-ld-option, -X)
+
+
=== 4 Host Program support
Kbuild supports building executables on the host for use during the
of error messages about running out of file handles, you might
want to increase this limit.
-The three values in file-nr denote the number of allocated
-file handles, the number of unused file handles and the maximum
-number of file handles. When the allocated file handles come
-close to the maximum, but the number of unused file handles is
-significantly greater than 0, you've encountered a peak in your
-usage of file handles and you don't need to increase the maximum.
-
+Historically, the three values in file-nr denoted the number of
+allocated file handles, the number of allocated but unused file
+handles, and the maximum number of file handles. Linux 2.6 always
+reports 0 as the number of free file handles -- this is not an
+error, it just means that the number of allocated file handles
+exactly matches the number of used file handles.
+
+Attempts to allocate more file descriptors than file-max are
+reported with printk, look for "VFS: file-max limit <number>
+reached".
==============================================================
nr_open:
- callhome [ S390 only ]
- auto_msgmni
- core_pattern
+- core_pipe_limit
- core_uses_pid
- ctrl-alt-del
- dentry-state
==============================================================
+core_pipe_limit:
+
+This sysctl is only applicable when core_pattern is configured to pipe core
+files to user space helper a (when the first character of core_pattern is a '|',
+see above). When collecting cores via a pipe to an application, it is
+occasionally usefull for the collecting application to gather data about the
+crashing process from its /proc/pid directory. In order to do this safely, the
+kernel must wait for the collecting process to exit, so as not to remove the
+crashing processes proc files prematurely. This in turn creates the possibility
+that a misbehaving userspace collecting process can block the reaping of a
+crashed process simply by never exiting. This sysctl defends against that. It
+defines how many concurrent crashing processes may be piped to user space
+applications in parallel. If this value is exceeded, then those crashing
+processes above that value are noted via the kernel log and their cores are
+skipped. 0 is a special value, indicating that unlimited processes may be
+captured in parallel, but that no waiting will take place (i.e. the collecting
+process is not guaranteed access to /proc/<crahing pid>/). This value defaults
+to 0.
+
+==============================================================
+
core_uses_pid:
The default coredump filename is "core". By setting
- legacy_va_layout
- lowmem_reserve_ratio
- max_map_count
+- memory_failure_early_kill
+- memory_failure_recovery
- min_free_kbytes
- min_slab_ratio
- min_unmapped_ratio
- vfs_cache_pressure
- zone_reclaim_mode
-
==============================================================
block_dump
The default value is 65536.
+=============================================================
+
+memory_failure_early_kill:
+
+Control how to kill processes when uncorrected memory error (typically
+a 2bit error in a memory module) is detected in the background by hardware
+that cannot be handled by the kernel. In some cases (like the page
+still having a valid copy on disk) the kernel will handle the failure
+transparently without affecting any applications. But if there is
+no other uptodate copy of the data it will kill to prevent any data
+corruptions from propagating.
+
+1: Kill all processes that have the corrupted and not reloadable page mapped
+as soon as the corruption is detected. Note this is not supported
+for a few types of pages, like kernel internally allocated data or
+the swap cache, but works for the majority of user pages.
+
+0: Only unmap the corrupted page from all processes and only kill a process
+who tries to access it.
+
+The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
+handle this if they want to.
+
+This is only active on architectures/platforms with advanced machine
+check handling and depends on the hardware capabilities.
+
+Applications can override this setting individually with the PR_MCE_KILL prctl
+
+==============================================================
+
+memory_failure_recovery
+
+Enable memory failure recovery (when supported by the platform)
+
+1: Attempt recovery.
+
+0: Always panic on a memory failure.
+
==============================================================
min_free_kbytes:
mm start up ... this is a loose form of stability on mm_users. For
example, it is used in copy_mm to protect against a racing tlb_gather_mmu
single address space optimization, so that the zap_page_range (from
-vmtruncate) does not lose sending ipi's to cloned threads that might
+truncate) does not lose sending ipi's to cloned threads that might
be spawned underneath it and go to user mode to drag in pte's into tlbs.
swap_lock
* Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com>
*/
+#define _LARGEFILE64_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <getopt.h>
#include <limits.h>
+#include <assert.h>
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
+/*
+ * pagemap kernel ABI bits
+ */
+
+#define PM_ENTRY_BYTES sizeof(uint64_t)
+#define PM_STATUS_BITS 3
+#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
+#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
+#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
+#define PM_PSHIFT_BITS 6
+#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
+#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
+#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
+#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
+#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
+
+#define PM_PRESENT PM_STATUS(4LL)
+#define PM_SWAP PM_STATUS(2LL)
+
+
/*
* kernel page flags
*/
static unsigned long opt_offset[MAX_ADDR_RANGES];
static unsigned long opt_size[MAX_ADDR_RANGES];
+#define MAX_VMAS 10240
+static int nr_vmas;
+static unsigned long pg_start[MAX_VMAS];
+static unsigned long pg_end[MAX_VMAS];
+static unsigned long voffset;
+
+static int pagemap_fd;
+
#define MAX_BIT_FILTERS 64
static int nr_bit_filters;
static uint64_t opt_mask[MAX_BIT_FILTERS];
#define PAGES_BATCH (64 << 10) /* 64k pages */
static int kpageflags_fd;
-static uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
#define HASH_SHIFT 13
#define HASH_SIZE (1 << HASH_SHIFT)
type __min2 = (y); \
__min1 < __min2 ? __min1 : __min2; })
+#define max_t(type, x, y) ({ \
+ type __max1 = (x); \
+ type __max2 = (y); \
+ __max1 > __max2 ? __max1 : __max2; })
+
static unsigned long pages2mb(unsigned long pages)
{
return (pages * page_size) >> 20;
static void show_page_range(unsigned long offset, uint64_t flags)
{
static uint64_t flags0;
+ static unsigned long voff;
static unsigned long index;
static unsigned long count;
- if (flags == flags0 && offset == index + count) {
+ if (flags == flags0 && offset == index + count &&
+ (!opt_pid || voffset == voff + count)) {
count++;
return;
}
- if (count)
- printf("%lu\t%lu\t%s\n",
+ if (count) {
+ if (opt_pid)
+ printf("%lx\t", voff);
+ printf("%lx\t%lx\t%s\n",
index, count, page_flag_name(flags0));
+ }
flags0 = flags;
index = offset;
+ voff = voffset;
count = 1;
}
static void show_page(unsigned long offset, uint64_t flags)
{
- printf("%lu\t%s\n", offset, page_flag_name(flags));
+ if (opt_pid)
+ printf("%lx\t", voffset);
+ printf("%lx\t%s\n", offset, page_flag_name(flags));
}
static void show_summary(void)
lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET);
while (count) {
+ uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
+
batch = min_t(unsigned long, count, PAGES_BATCH);
n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES);
if (n == 0)
}
}
+
+#define PAGEMAP_BATCH 4096
+static unsigned long task_pfn(unsigned long pgoff)
+{
+ static uint64_t buf[PAGEMAP_BATCH];
+ static unsigned long start;
+ static long count;
+ uint64_t pfn;
+
+ if (pgoff < start || pgoff >= start + count) {
+ if (lseek64(pagemap_fd,
+ (uint64_t)pgoff * PM_ENTRY_BYTES,
+ SEEK_SET) < 0) {
+ perror("pagemap seek");
+ exit(EXIT_FAILURE);
+ }
+ count = read(pagemap_fd, buf, sizeof(buf));
+ if (count == 0)
+ return 0;
+ if (count < 0) {
+ perror("pagemap read");
+ exit(EXIT_FAILURE);
+ }
+ if (count % PM_ENTRY_BYTES) {
+ fatal("pagemap read not aligned.\n");
+ exit(EXIT_FAILURE);
+ }
+ count /= PM_ENTRY_BYTES;
+ start = pgoff;
+ }
+
+ pfn = buf[pgoff - start];
+ if (pfn & PM_PRESENT)
+ pfn = PM_PFRAME(pfn);
+ else
+ pfn = 0;
+
+ return pfn;
+}
+
+static void walk_task(unsigned long index, unsigned long count)
+{
+ int i = 0;
+ const unsigned long end = index + count;
+
+ while (index < end) {
+
+ while (pg_end[i] <= index)
+ if (++i >= nr_vmas)
+ return;
+ if (pg_start[i] >= end)
+ return;
+
+ voffset = max_t(unsigned long, pg_start[i], index);
+ index = min_t(unsigned long, pg_end[i], end);
+
+ assert(voffset < index);
+ for (; voffset < index; voffset++) {
+ unsigned long pfn = task_pfn(voffset);
+ if (pfn)
+ walk_pfn(pfn, 1);
+ }
+ }
+}
+
+static void add_addr_range(unsigned long offset, unsigned long size)
+{
+ if (nr_addr_ranges >= MAX_ADDR_RANGES)
+ fatal("too many addr ranges\n");
+
+ opt_offset[nr_addr_ranges] = offset;
+ opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset);
+ nr_addr_ranges++;
+}
+
static void walk_addr_ranges(void)
{
int i;
}
if (!nr_addr_ranges)
- walk_pfn(0, ULONG_MAX);
+ add_addr_range(0, ULONG_MAX);
for (i = 0; i < nr_addr_ranges; i++)
- walk_pfn(opt_offset[i], opt_size[i]);
+ if (!opt_pid)
+ walk_pfn(opt_offset[i], opt_size[i]);
+ else
+ walk_task(opt_offset[i], opt_size[i]);
close(kpageflags_fd);
}
" -r|--raw Raw mode, for kernel developers\n"
" -a|--addr addr-spec Walk a range of pages\n"
" -b|--bits bits-spec Walk pages with specified bits\n"
-#if 0 /* planned features */
" -p|--pid pid Walk process address space\n"
+#if 0 /* planned features */
" -f|--file filename Walk file address space\n"
#endif
" -l|--list Show page details in ranges\n"
" N+M pages range from N to N+M-1\n"
" N,M pages range from N to M-1\n"
" N, pages range from N to end\n"
-" ,M pages range from 0 to M\n"
+" ,M pages range from 0 to M-1\n"
"bits-spec:\n"
" bit1,bit2 (flags & (bit1|bit2)) != 0\n"
" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n"
static void parse_pid(const char *str)
{
+ FILE *file;
+ char buf[5000];
+
opt_pid = parse_number(str);
-}
-static void parse_file(const char *name)
-{
+ sprintf(buf, "/proc/%d/pagemap", opt_pid);
+ pagemap_fd = open(buf, O_RDONLY);
+ if (pagemap_fd < 0) {
+ perror(buf);
+ exit(EXIT_FAILURE);
+ }
+
+ sprintf(buf, "/proc/%d/maps", opt_pid);
+ file = fopen(buf, "r");
+ if (!file) {
+ perror(buf);
+ exit(EXIT_FAILURE);
+ }
+
+ while (fgets(buf, sizeof(buf), file) != NULL) {
+ unsigned long vm_start;
+ unsigned long vm_end;
+ unsigned long long pgoff;
+ int major, minor;
+ char r, w, x, s;
+ unsigned long ino;
+ int n;
+
+ n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu",
+ &vm_start,
+ &vm_end,
+ &r, &w, &x, &s,
+ &pgoff,
+ &major, &minor,
+ &ino);
+ if (n < 10) {
+ fprintf(stderr, "unexpected line: %s\n", buf);
+ continue;
+ }
+ pg_start[nr_vmas] = vm_start / page_size;
+ pg_end[nr_vmas] = vm_end / page_size;
+ if (++nr_vmas >= MAX_VMAS) {
+ fprintf(stderr, "too many VMAs\n");
+ break;
+ }
+ }
+ fclose(file);
}
-static void add_addr_range(unsigned long offset, unsigned long size)
+static void parse_file(const char *name)
{
- if (nr_addr_ranges >= MAX_ADDR_RANGES)
- fatal("too much addr ranges\n");
-
- opt_offset[nr_addr_ranges] = offset;
- opt_size[nr_addr_ranges] = size;
- nr_addr_ranges++;
}
static void parse_addr_range(const char *optarg)
}
}
+ if (opt_list && opt_pid)
+ printf("voffset\t");
if (opt_list == 1)
- printf("offset\tcount\tflags\n");
+ printf("offset\tlen\tflags\n");
if (opt_list == 2)
printf("offset\tflags\n");
S: Supported
F: drivers/acpi/fan.c
-ACPI PCI HOTPLUG DRIVER
-M: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
-L: linux-pci@vger.kernel.org
-S: Supported
-F: drivers/pci/hotplug/acpi*
-
ACPI THERMAL DRIVER
M: Zhang Rui <rui.zhang@intel.com>
L: linux-acpi@vger.kernel.org
F: drivers/hwmon/
HARDWARE RANDOM NUMBER GENERATOR CORE
-S: Orphan
+M: Matt Mackall <mpm@selenic.com>
+M: Herbert Xu <herbert@gondor.apana.org.au>
+S: Odd fixes
F: Documentation/hw_random.txt
F: drivers/char/hw_random/
F: include/linux/hw_random.h
F: drivers/pci/
F: include/linux/pci*
-PCIE HOTPLUG DRIVER
-M: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
+PCI HOTPLUG
+M: Jesse Barnes <jbarnes@virtuousgeek.org>
L: linux-pci@vger.kernel.org
S: Supported
-F: drivers/pci/pcie/
+F: drivers/pci/hotplug
PCMCIA SUBSYSTEM
P: Linux PCMCIA Team
F: drivers/usb/gadget/lh7a40*
F: drivers/usb/host/ohci-lh7a40*
-SHPC HOTPLUG DRIVER
-M: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
-L: linux-pci@vger.kernel.org
-S: Supported
-F: drivers/pci/hotplug/shpchp*
-
SIMPLE FIRMWARE INTERFACE (SFI)
P: Len Brown
M: lenb@kernel.org
F: drivers/sfi/
F: include/linux/sfi*.h
-
SIMTEC EB110ATX (Chalice CATS)
P: Ben Dooks
M: Vincent Sanders <support@simtec.co.uk>
# Alternatively CROSS_COMPILE can be set in the environment.
# Default value for CROSS_COMPILE is not to prefix executables
# Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile
+#
+# To force ARCH and CROSS_COMPILE settings include kernel.* files
+# in the kernel tree - do not patch this file.
export KBUILD_BUILDHOST := $(SUBARCH)
-ARCH ?= $(SUBARCH)
-CROSS_COMPILE ?=
+
+# Kbuild save the ARCH and CROSS_COMPILE setting in kernel.* files.
+# Restore these settings and check that user did not specify
+# conflicting values.
+
+saved_arch := $(shell cat include/generated/kernel.arch 2> /dev/null)
+saved_cross := $(shell cat include/generated/kernel.cross 2> /dev/null)
+
+ifneq ($(CROSS_COMPILE),)
+ ifneq ($(saved_cross),)
+ ifneq ($(CROSS_COMPILE),$(saved_cross))
+ $(error CROSS_COMPILE changed from \
+ "$(saved_cross)" to \
+ to "$(CROSS_COMPILE)". \
+ Use "make mrproper" to fix it up)
+ endif
+ endif
+else
+ CROSS_COMPILE := $(saved_cross)
+endif
+
+ifneq ($(ARCH),)
+ ifneq ($(saved_arch),)
+ ifneq ($(saved_arch),$(ARCH))
+ $(error ARCH changed from \
+ "$(saved_arch)" to "$(ARCH)". \
+ Use "make mrproper" to fix it up)
+ endif
+ endif
+else
+ ifneq ($(saved_arch),)
+ ARCH := $(saved_arch)
+ else
+ ARCH := $(SUBARCH)
+ endif
+endif
# Architecture as present in compile.h
UTS_MACHINE := $(ARCH)
OBJDUMP = $(CROSS_COMPILE)objdump
AWK = awk
GENKSYMS = scripts/genksyms/genksyms
+INSTALLKERNEL := installkernel
DEPMOD = /sbin/depmod
KALLSYMS = scripts/kallsyms
PERL = perl
export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE
+export CPP AR NM STRIP OBJCOPY OBJDUMP
+export MAKE AWK GENKSYMS INSTALLKERNEL PERL UTS_MACHINE
export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
include $(srctree)/arch/$(SRCARCH)/Makefile
export KBUILD_DEFCONFIG KBUILD_KCONFIG
+# save ARCH & CROSS_COMPILE settings
+$(shell mkdir -p include/generated && \
+ echo $(ARCH) > include/generated/kernel.arch && \
+ echo $(CROSS_COMPILE) > include/generated/kernel.cross)
+
config: scripts_basic outputmakefile FORCE
$(Q)mkdir -p include/linux include/config
$(Q)$(MAKE) $(build)=scripts/kconfig $@
# revert to pre-gcc-4.4 behaviour of .eh_frame
KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm)
+# conserve stack if available
+KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
+
# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
# But warn user when we do so
warn-assign = \
# Use --build-id when available.
LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\
- $(call ld-option, -Wl$(comma)--build-id,))
+ $(call cc-ldoption, -Wl$(comma)--build-id,))
LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
-LDFLAGS_vmlinux += -X
+LDFLAGS_vmlinux += $(call ld-option, -X,)
endif
# Default kernel image to build when no specific target is given.
# All the preparing..
prepare: prepare0
-# Leave this as default for preprocessing vmlinux.lds.S, which is now
-# done in arch/$(ARCH)/kernel/Makefile
-
-export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH)
-
# The asm symlink changes when $(ARCH) changes.
# Detect this and ask user to run make mrproper
# If asm is a stale symlink (point to dir that does not exist) remove it
#define F_GETOWN 6 /* for sockets. */
#define F_SETSIG 10 /* for sockets. */
#define F_GETSIG 11 /* for sockets. */
+#define F_SETOWN_EX 12
+#define F_GETOWN_EX 13
/* for posix fcntl() and lockf() */
#define F_RDLCK 1
extern int smp_num_cpus;
extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#else /* CONFIG_SMP */
return node;
}
-static inline cpumask_t node_to_cpumask(int node)
-{
- cpumask_t node_cpu_mask = CPU_MASK_NONE;
- int cpu;
-
- for_each_online_cpu(cpu) {
- if (cpu_to_node(cpu) == node)
- cpu_set(cpu, node_cpu_mask);
- }
-
-#ifdef DEBUG_NUMA
- printk("node %d: cpu_mask: %016lx\n", node, node_cpu_mask);
-#endif
-
- return node_cpu_mask;
-}
-
extern struct cpumask node_to_cpumask_map[];
/* FIXME: This is dumb, recalculating every time. But simple. */
static const struct cpumask *cpumask_of_node(int node)
return &node_to_cpumask_map[node];
}
-#define pcibus_to_cpumask(bus) (cpu_online_map)
#define cpumask_of_pcibus(bus) (cpu_online_mask)
#endif /* !CONFIG_NUMA */
{
struct marvel_agp_aperture *aper = agp->aperture.sysdata;
return iommu_bind(aper->arena, aper->pg_start + pg_start,
- mem->page_count, mem->memory);
+ mem->page_count, mem->pages);
}
static int
{
struct titan_agp_aperture *aper = agp->aperture.sysdata;
return iommu_bind(aper->arena, aper->pg_start + pg_start,
- mem->page_count, mem->memory);
+ mem->page_count, mem->pages);
}
static int
extern int iommu_reserve(struct pci_iommu_arena *, long, long);
extern int iommu_release(struct pci_iommu_arena *, long, long);
-extern int iommu_bind(struct pci_iommu_arena *, long, long, unsigned long *);
+extern int iommu_bind(struct pci_iommu_arena *, long, long, struct page **);
extern int iommu_unbind(struct pci_iommu_arena *, long, long);
int
iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count,
- unsigned long *physaddrs)
+ struct page **pages)
{
unsigned long flags;
unsigned long *ptes;
}
for(i = 0, j = pg_start; i < pg_count; i++, j++)
- ptes[j] = mk_iommu_pte(physaddrs[i]);
+ ptes[j] = mk_iommu_pte(page_to_phys(pages[i]));
spin_unlock_irqrestore(&arena->lock, flags);
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/user.h>
-#include <linux/utsname.h>
#include <linux/time.h>
#include <linux/major.h>
#include <linux/stat.h>
\f
static void
-send_ipi_message(cpumask_t to_whom, enum ipi_message_type operation)
+send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
{
int i;
mb();
- for_each_cpu_mask(i, to_whom)
+ for_each_cpu(i, to_whom)
set_bit(operation, &ipi_data[i].bits);
mb();
- for_each_cpu_mask(i, to_whom)
+ for_each_cpu(i, to_whom)
wripir(i);
}
printk(KERN_WARNING
"smp_send_reschedule: Sending IPI to self.\n");
#endif
- send_ipi_message(cpumask_of_cpu(cpu), IPI_RESCHEDULE);
+ send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
}
void
if (hard_smp_processor_id() != boot_cpu_id)
printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n");
#endif
- send_ipi_message(to_whom, IPI_CPU_STOP);
+ send_ipi_message(&to_whom, IPI_CPU_STOP);
}
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
send_ipi_message(mask, IPI_CALL_FUNC);
}
void arch_send_call_function_single_ipi(int cpu)
{
- send_ipi_message(cpumask_of_cpu(cpu), IPI_CALL_FUNC_SINGLE);
+ send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
}
static void
ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
LDFLAGS_vmlinux += --be8
endif
-CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
+
OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
GZFLAGS :=-9
#KBUILD_CFLAGS +=-pipe
echo ' (supply initrd image via make variable INITRD=<path>)'
echo ' install - Install uncompressed kernel'
echo ' zinstall - Install compressed kernel'
- echo ' Install using (your) ~/bin/installkernel or'
- echo ' (distribution) /sbin/installkernel or'
+ echo ' Install using (your) ~/bin/$(INSTALLKERNEL) or'
+ echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
echo ' install to $$(INSTALL_PATH) and run lilo'
endef
#
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
if [ "$(basename $2)" = "zImage" ]; then
# Compressed install
#ifndef CONFIG_CPU_CACHE_VIPT
static inline void flush_cache_mm(struct mm_struct *mm)
{
- if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
__cpuc_flush_user_all();
}
static inline void
flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
vma->vm_flags);
}
static inline void
flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
{
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
unsigned long addr = user_addr & PAGE_MASK;
__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
}
unsigned long uaddr, void *kaddr,
unsigned long len, int write)
{
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
unsigned long addr = (unsigned long)kaddr;
__cpuc_coherent_kern_range(addr, addr + len);
}
void *ptr;
};
+/* No support for p+q operations */
+static inline int
+iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op)
+{
+ BUG();
+ return 0;
+}
+
+static inline void
+iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ BUG();
+}
+
+static inline void
+iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
+{
+ BUG();
+}
+
+static inline void
+iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
+ dma_addr_t addr, unsigned char coef)
+{
+ BUG();
+}
+
+static inline int
+iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op)
+{
+ BUG();
+ return 0;
+}
+
+static inline void
+iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ BUG();
+}
+
+static inline void
+iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
+{
+ BUG();
+}
+
+#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
+
+static inline void
+iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
+ dma_addr_t *src)
+{
+ BUG();
+}
+
static inline int iop_adma_get_max_xor(void)
{
return 32;
}
+static inline int iop_adma_get_max_pq(void)
+{
+ BUG();
+ return 0;
+}
+
static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
{
int id = chan->device->id;
return slot_cnt;
}
+static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
+{
+ return 0;
+}
+
static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
return 0;
}
+
+static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *chan)
+{
+ BUG();
+ return 0;
+}
+
static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
hw_desc->src[0] = val;
}
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
{
struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
- return desc_ctrl.zero_result_err;
+ return desc_ctrl.zero_result_err << SUM_CHECK_P;
}
static inline void iop_chan_append(struct iop_adma_chan *chan)
* @idx: pool index
* @unmap_src_cnt: number of xor sources
* @unmap_len: transaction bytecount
+ * @tx_list: list of descriptors that are associated with one operation
* @async_tx: support for the async_tx api
* @group_list: list of slots that make up a multi-descriptor transaction
* for example transfer lengths larger than the supported hw max
u16 idx;
u16 unmap_src_cnt;
size_t unmap_len;
+ struct list_head tx_list;
struct dma_async_tx_descriptor async_tx;
union {
u32 *xor_check_result;
u32 *crc32_result;
+ u32 *pq_check_result;
};
};
#ifdef CONFIG_SMP
/* check for possible thread migration */
- if (!cpus_empty(next->cpu_vm_mask) && !cpu_isset(cpu, next->cpu_vm_mask))
+ if (!cpumask_empty(mm_cpumask(next)) &&
+ !cpumask_test_cpu(cpu, mm_cpumask(next)))
__flush_icache_all();
#endif
- if (!cpu_test_and_set(cpu, next->cpu_vm_mask) || prev != next) {
+ if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) {
check_context(next);
cpu_switch_mm(next->pgd, next);
if (cache_is_vivt())
- cpu_clear(cpu, prev->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
}
#endif
}
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
/*
* show local interrupt info
if (tlb_flag(TLB_WB))
dsb();
- if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
if (tlb_flag(TLB_V3_FULL))
asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (zero) : "cc");
if (tlb_flag(TLB_V4_U_FULL))
if (tlb_flag(TLB_WB))
dsb();
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
if (tlb_flag(TLB_V3_PAGE))
asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (uaddr) : "cc");
if (tlb_flag(TLB_V4_U_PAGE))
# Makefile for the linux kernel.
#
-AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
+CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
+AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
ifdef CONFIG_DYNAMIC_FTRACE
CFLAGS_REMOVE_ftrace.o = -pg
*
* The things we do for performance..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
read_lock(&tasklist_lock);
for_each_process(p) {
if (p->mm)
- cpu_clear(cpu, p->mm->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
}
read_unlock(&tasklist_lock);
atomic_inc(&mm->mm_users);
atomic_inc(&mm->mm_count);
current->active_mm = mm;
- cpu_set(cpu, mm->cpu_vm_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(mm));
cpu_switch_mm(mm->pgd, mm);
enter_lazy_tlb(mm, current);
local_flush_tlb_all();
void flush_tlb_mm(struct mm_struct *mm)
{
if (tlb_ops_need_broadcast())
- on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, &mm->cpu_vm_mask);
+ on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm));
else
local_flush_tlb_mm(mm);
}
struct tlb_args ta;
ta.ta_vma = vma;
ta.ta_start = uaddr;
- on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+ on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm));
} else
local_flush_tlb_page(vma, uaddr);
}
ta.ta_vma = vma;
ta.ta_start = start;
ta.ta_end = end;
- on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+ on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm));
} else
local_flush_tlb_range(vma, start, end);
}
#include <linux/mman.h>
#include <linux/fs.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/ipc.h>
#include <linux/uaccess.h>
void __init at91_add_device_ac97(struct ac97c_platform_data *data) {}
#endif
+/* --------------------------------------------------------------------
+ * CAN Controller
+ * -------------------------------------------------------------------- */
+
+#if defined(CONFIG_CAN_AT91) || defined(CONFIG_CAN_AT91_MODULE)
+static struct resource can_resources[] = {
+ [0] = {
+ .start = AT91SAM9263_BASE_CAN,
+ .end = AT91SAM9263_BASE_CAN + SZ_16K - 1,
+ .flags = IORESOURCE_MEM,
+ },
+ [1] = {
+ .start = AT91SAM9263_ID_CAN,
+ .end = AT91SAM9263_ID_CAN,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device at91sam9263_can_device = {
+ .name = "at91_can",
+ .id = -1,
+ .resource = can_resources,
+ .num_resources = ARRAY_SIZE(can_resources),
+};
+
+void __init at91_add_device_can(struct at91_can_data *data)
+{
+ at91_set_A_periph(AT91_PIN_PA13, 0); /* CANTX */
+ at91_set_A_periph(AT91_PIN_PA14, 0); /* CANRX */
+ at91sam9263_can_device.dev.platform_data = data;
+
+ platform_device_register(&at91sam9263_can_device);
+}
+#else
+void __init at91_add_device_can(struct at91_can_data *data) {}
+#endif
/* --------------------------------------------------------------------
* LCD Controller
}
};
+/*
+ * CAN
+ */
+static void sam9263ek_transceiver_switch(int on)
+{
+ if (on) {
+ at91_set_gpio_output(AT91_PIN_PA18, 1); /* CANRXEN */
+ at91_set_gpio_output(AT91_PIN_PA19, 0); /* CANRS */
+ } else {
+ at91_set_gpio_output(AT91_PIN_PA18, 0); /* CANRXEN */
+ at91_set_gpio_output(AT91_PIN_PA19, 1); /* CANRS */
+ }
+}
+
+static struct at91_can_data ek_can_data = {
+ .transceiver_switch = sam9263ek_transceiver_switch,
+};
static void __init ek_board_init(void)
{
/* LEDs */
at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds));
at91_pwm_leds(ek_pwm_led, ARRAY_SIZE(ek_pwm_led));
+ /* CAN */
+ at91_add_device_can(&ek_can_data);
}
MACHINE_START(AT91SAM9263EK, "Atmel AT91SAM9263-EK")
/* Touchscreen Controller */
extern void __init at91_add_device_tsadcc(void);
+/* CAN */
+struct at91_can_data {
+ void (*transceiver_switch)(int on);
+};
+extern void __init at91_add_device_can(struct at91_can_data *data);
+
/* LEDs */
extern void __init at91_init_leds(u8 cpu_led, u8 timer_led);
extern void __init at91_gpio_leds(struct gpio_led *leds, int nr);
return 16;
}
+#define iop_adma_get_max_pq iop_adma_get_max_xor
+
static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
{
return __raw_readl(ADMA_ADAR(chan));
#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
+#define IOP_ADMA_PQ_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o)
+#define iop_chan_pq_slot_count iop_chan_xor_slot_count
+#define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count
static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
return hw_desc->dest_addr;
}
+static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *chan)
+{
+ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+ return hw_desc->q_dest_addr;
+}
+
static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
return 1;
}
+static inline void
+iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+ union {
+ u32 value;
+ struct iop13xx_adma_desc_ctrl field;
+ } u_desc_ctrl;
+
+ u_desc_ctrl.value = 0;
+ u_desc_ctrl.field.src_select = src_cnt - 1;
+ u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
+ u_desc_ctrl.field.pq_xfer_en = 1;
+ u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
+ u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+ hw_desc->desc_ctrl = u_desc_ctrl.value;
+}
+
+static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
+{
+ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+ union {
+ u32 value;
+ struct iop13xx_adma_desc_ctrl field;
+ } u_desc_ctrl;
+
+ u_desc_ctrl.value = hw_desc->desc_ctrl;
+ return u_desc_ctrl.field.pq_xfer_en;
+}
+
+static inline void
+iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+ union {
+ u32 value;
+ struct iop13xx_adma_desc_ctrl field;
+ } u_desc_ctrl;
+
+ u_desc_ctrl.value = 0;
+ u_desc_ctrl.field.src_select = src_cnt - 1;
+ u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
+ u_desc_ctrl.field.zero_result = 1;
+ u_desc_ctrl.field.status_write_back_en = 1;
+ u_desc_ctrl.field.pq_xfer_en = 1;
+ u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
+ u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+ hw_desc->desc_ctrl = u_desc_ctrl.value;
+}
+
static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan,
u32 byte_count)
}
}
+#define iop_desc_set_pq_zero_sum_byte_count iop_desc_set_zero_sum_byte_count
static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan,
hw_desc->upper_dest_addr = 0;
}
+static inline void
+iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
+{
+ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+
+ hw_desc->dest_addr = addr[0];
+ hw_desc->q_dest_addr = addr[1];
+ hw_desc->upper_dest_addr = 0;
+}
+
static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
dma_addr_t addr)
{
} while (slot_cnt);
}
+static inline void
+iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
+ dma_addr_t addr, unsigned char coef)
+{
+ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
+ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
+ struct iop13xx_adma_src *src;
+ int i = 0;
+
+ do {
+ iter = iop_hw_desc_slot_idx(hw_desc, i);
+ src = &iter->src[src_idx];
+ src->src_addr = addr;
+ src->pq_upper_src_addr = 0;
+ src->pq_dmlt = coef;
+ slot_cnt -= slots_per_op;
+ if (slot_cnt) {
+ i += slots_per_op;
+ addr += IOP_ADMA_PQ_MAX_BYTE_COUNT;
+ }
+ } while (slot_cnt);
+}
+
static inline void
iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
}
#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr
+#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
+
+static inline void
+iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
+ dma_addr_t *src)
+{
+ iop_desc_set_xor_src_addr(desc, pq_idx, src[pq_idx]);
+ iop_desc_set_xor_src_addr(desc, pq_idx+1, src[pq_idx+1]);
+}
static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
u32 next_desc_addr)
hw_desc->block_fill_data = val;
}
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
+ enum sum_check_flags flags;
BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
- if (desc_ctrl.pq_xfer_en)
- return byte_count.zero_result_err_q;
- else
- return byte_count.zero_result_err;
+ flags = byte_count.zero_result_err_q << SUM_CHECK_Q;
+ flags |= byte_count.zero_result_err << SUM_CHECK_P;
+
+ return flags;
}
static inline void iop_chan_append(struct iop_adma_chan *chan)
plat_data = &iop13xx_adma_0_data;
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
+ dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
- dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
break;
case IOP13XX_INIT_ADMA_1:
plat_data = &iop13xx_adma_1_data;
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
+ dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
- dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
break;
case IOP13XX_INIT_ADMA_2:
plat_data = &iop13xx_adma_2_data;
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
+ dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
- dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
- dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask);
- dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
- dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask);
+ dma_cap_set(DMA_PQ, plat_data->cap_mask);
+ dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
break;
}
}
}
spin_unlock(&cpu_asid_lock);
- mm->cpu_vm_mask = cpumask_of_cpu(smp_processor_id());
+ cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id()));
mm->context.id = asid;
}
void flush_cache_mm(struct mm_struct *mm)
{
if (cache_is_vivt()) {
- if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
__cpuc_flush_user_all();
return;
}
void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
if (cache_is_vivt()) {
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
vma->vm_flags);
return;
void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
{
if (cache_is_vivt()) {
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
unsigned long addr = user_addr & PAGE_MASK;
__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
}
unsigned long len, int write)
{
if (cache_is_vivt()) {
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
unsigned long addr = (unsigned long)kaddr;
__cpuc_coherent_kern_range(addr, addr + len);
}
}
/* VIPT non-aliasing cache */
- if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask) &&
+ if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)) &&
vma->vm_flags & VM_EXEC) {
unsigned long addr = (unsigned long)kaddr;
/* only flushing the kernel mapping on non-aliasing VIPT */
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
#else
dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
- dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
#endif
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
#else
dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
- dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
#endif
dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
#else
dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
- dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask);
+ dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
#endif
/*
* Initial thread structure. Must be aligned on an 8192-byte boundary.
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
#include <asm/setup.h>
#include <asm/sections.h>
-#define __page_aligned __attribute__((section(".data.page_aligned")))
-
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned;
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_data;
struct page *empty_zero_page;
EXPORT_SYMBOL(empty_zero_page);
echo '* vmImage.gz - Kernel-only image for U-Boot (arch/$(ARCH)/boot/vmImage.gz)'
echo ' vmImage.lzma - Kernel-only image for U-Boot (arch/$(ARCH)/boot/vmImage.lzma)'
echo ' install - Install kernel using'
- echo ' (your) ~/bin/$(CROSS_COMPILE)installkernel or'
- echo ' (distribution) PATH: $(CROSS_COMPILE)installkernel or'
+ echo ' (your) ~/bin/$(INSTALLKERNEL) or'
+ echo ' (distribution) PATH: $(INSTALLKERNEL) or'
echo ' install to $$(INSTALL_PATH)'
endef
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if which ${CROSS_COMPILE}installkernel >/dev/null 2>&1; then
- exec ${CROSS_COMPILE}installkernel "$@"
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if which ${INSTALLKERNEL} >/dev/null 2>&1; then
+ exec ${INSTALLKERNEL} "$@"
fi
# Default install - same as make zlilo
OBJCOPYFLAGS := -O binary -R .note -R .comment -S
-CPPFLAGS_vmlinux.lds = -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE)
-
KBUILD_AFLAGS += -mlinux -march=$(arch-y) $(inc)
KBUILD_CFLAGS += -mlinux -march=$(arch-y) -pipe $(inc)
KBUILD_CPPFLAGS += $(inc)
# Makefile for the linux kernel.
#
+CPPFLAGS_vmlinux.lds := -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE)
extra-y := vmlinux.lds
obj-y := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
/*
* Send us to sleep.
*/
-static int sysctl_pm_do_suspend(ctl_table *ctl, int write, struct file *filp,
+static int sysctl_pm_do_suspend(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *fpos)
{
int retval, mode;
}
-static int cmode_procctl(ctl_table *ctl, int write, struct file *filp,
+static int cmode_procctl(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *fpos)
{
int new_cmode;
if (!write)
- return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+ return proc_dointvec(ctl, write, buffer, lenp, fpos);
new_cmode = user_atoi(buffer, *lenp);
return 0;
}
-static int p0_procctl(ctl_table *ctl, int write, struct file *filp,
+static int p0_procctl(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *fpos)
{
int new_p0;
if (!write)
- return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+ return proc_dointvec(ctl, write, buffer, lenp, fpos);
new_p0 = user_atoi(buffer, *lenp);
return 1;
}
-static int cm_procctl(ctl_table *ctl, int write, struct file *filp,
+static int cm_procctl(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *fpos)
{
int new_cm;
if (!write)
- return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+ return proc_dointvec(ctl, write, buffer, lenp, fpos);
new_cm = user_atoi(buffer, *lenp);
#include <linux/stat.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/syscalls.h>
#include <linux/ipc.h>
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
#include <linux/syscalls.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/fs.h>
#include <linux/ipc.h>
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
#else /* CONFIG_SMP */
/*
* Returns a bitmask of CPUs on Node 'node'.
*/
-#define node_to_cpumask(node) (node_to_cpu_mask[node])
#define cpumask_of_node(node) (&node_to_cpu_mask[node])
/*
#ifdef CONFIG_SMP
#define topology_physical_package_id(cpu) (cpu_data(cpu)->socket_id)
#define topology_core_id(cpu) (cpu_data(cpu)->core_id)
-#define topology_core_siblings(cpu) (cpu_core_map[cpu])
-#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#define smt_capable() (smp_num_siblings > 1)
# User may have a custom install script
-if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi
-if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install - same as make zlilo
cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
$(call if_changed,gate)
struct thread_info thread_info;
} s;
unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
-} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{
+} init_task_mem asm ("init_task") __init_task_data =
+ {{
.task = INIT_TASK(init_task_mem.s.task),
.thread_info = INIT_THREAD_INFO(init_task_mem.s.task)
}};
return;
}
- smp_call_function_mask(mm->cpu_vm_mask,
+ smp_call_function_many(mm_cpumask(mm),
(void (*)(void *))local_finish_flush_tlb_mm, mm, 1);
local_irq_disable();
local_finish_flush_tlb_mm(mm);
# User may have a custom install script
-if [ -x /sbin/installkernel ]; then
- exec /sbin/installkernel "$@"
+if [ -x /sbin/${INSTALLKERNEL} ]; then
+ exec /sbin/${INSTALLKERNEL} "$@"
fi
if [ "$2" = "zImage" ]; then
if (prev != next) {
#ifdef CONFIG_SMP
- cpu_set(cpu, next->cpu_vm_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
#endif /* CONFIG_SMP */
/* Set MPTB = next->pgd */
*(volatile unsigned long *)MPTB = (unsigned long)next->pgd;
}
#ifdef CONFIG_SMP
else
- if (!cpu_test_and_set(cpu, next->cpu_vm_mask))
+ if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)))
activate_context(next);
#endif /* CONFIG_SMP */
}
extern unsigned long send_IPI_mask_phys(cpumask_t, int, int);
extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#endif /* not __ASSEMBLY__ */
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
void smp_local_timer_interrupt(void);
static void send_IPI_allbutself(int, int);
-static void send_IPI_mask(cpumask_t, int, int);
+static void send_IPI_mask(const struct cpumask *, int, int);
unsigned long send_IPI_mask_phys(cpumask_t, int, int);
/*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
void smp_send_reschedule(int cpu_id)
{
WARN_ON(cpu_is_offline(cpu_id));
- send_IPI_mask(cpumask_of_cpu(cpu_id), RESCHEDULE_IPI, 1);
+ send_IPI_mask(cpumask_of(cpu_id), RESCHEDULE_IPI, 1);
}
/*==========================================================================*
spin_lock(&flushcache_lock);
mask=cpus_addr(cpumask);
atomic_set_mask(*mask, (atomic_t *)&flushcache_cpumask);
- send_IPI_mask(cpumask, INVALIDATE_CACHE_IPI, 0);
+ send_IPI_mask(&cpumask, INVALIDATE_CACHE_IPI, 0);
_flush_cache_copyback_all();
while (flushcache_cpumask)
mb();
preempt_disable();
cpu_id = smp_processor_id();
mmc = &mm->context[cpu_id];
- cpu_mask = mm->cpu_vm_mask;
+ cpu_mask = *mm_cpumask(mm);
cpu_clear(cpu_id, cpu_mask);
if (*mmc != NO_CONTEXT) {
if (mm == current->mm)
activate_context(mm);
else
- cpu_clear(cpu_id, mm->cpu_vm_mask);
+ cpumask_clear_cpu(cpu_id, mm_cpumask(mm));
local_irq_restore(flags);
}
if (!cpus_empty(cpu_mask))
preempt_disable();
cpu_id = smp_processor_id();
mmc = &mm->context[cpu_id];
- cpu_mask = mm->cpu_vm_mask;
+ cpu_mask = *mm_cpumask(mm);
cpu_clear(cpu_id, cpu_mask);
#ifdef DEBUG_SMP
* We have to send the IPI only to
* CPUs affected.
*/
- send_IPI_mask(cpumask, INVALIDATE_TLB_IPI, 0);
+ send_IPI_mask(&cpumask, INVALIDATE_TLB_IPI, 0);
while (!cpus_empty(flush_cpumask)) {
/* nothing. lockup detection does not belong here */
if (flush_mm == current->active_mm)
activate_context(flush_mm);
else
- cpu_clear(cpu_id, flush_mm->cpu_vm_mask);
+ cpumask_clear_cpu(cpu_id, mm_cpumask(flush_mm));
} else {
unsigned long va = flush_va;
for ( ; ; );
}
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
send_IPI_mask(mask, CALL_FUNCTION_IPI, 0);
}
void arch_send_call_function_single_ipi(int cpu)
{
- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNC_SINGLE_IPI, 0);
+ send_IPI_mask(cpumask_of(cpu), CALL_FUNC_SINGLE_IPI, 0);
}
/*==========================================================================*
cpumask = cpu_online_map;
cpu_clear(smp_processor_id(), cpumask);
- send_IPI_mask(cpumask, ipi_num, try);
+ send_IPI_mask(&cpumask, ipi_num, try);
}
/*==========================================================================*
* ---------- --- --------------------------------------------------------
*
*==========================================================================*/
-static void send_IPI_mask(cpumask_t cpumask, int ipi_num, int try)
+static void send_IPI_mask(const struct cpumask *cpumask, int ipi_num, int try)
{
cpumask_t physid_mask, tmp;
int cpu_id, phys_id;
if (num_cpus <= 1) /* NO MP */
return;
- cpus_and(tmp, cpumask, cpu_online_map);
- BUG_ON(!cpus_equal(cpumask, tmp));
+ cpumask_and(&tmp, cpumask, cpu_online_mask);
+ BUG_ON(!cpumask_equal(cpumask, &tmp));
physid_mask = CPU_MASK_NONE;
- for_each_cpu_mask(cpu_id, cpumask){
+ for_each_cpu(cpu_id, cpumask) {
if ((phys_id = cpu_to_physid(cpu_id)) != -1)
cpu_set(phys_id, physid_mask);
}
for (phys_id = 0 ; phys_id < nr_cpu ; phys_id++)
physid_set(phys_id, phys_cpu_present_map);
#ifndef CONFIG_HOTPLUG_CPU
- cpu_present_map = cpu_possible_map;
+ init_cpu_present(&cpu_possible_map);
#endif
show_mp_info(nr_cpu);
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install - same as make zlilo
*/
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-union thread_union init_thread_union
-__attribute__((section(".data.init_task"), aligned(THREAD_SIZE)))
- = { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data
+ __attribute__((aligned(THREAD_SIZE))) =
+ { INIT_THREAD_INFO(init_task) };
/* initial task structure */
struct task_struct init_task = INIT_TASK(init_task);
#include <linux/syscalls.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/ipc.h>
#include <asm/setup.h>
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
#include <linux/syscalls.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/ipc.h>
#include <linux/fs.h>
config MICROBLAZE
def_bool y
select HAVE_LMB
+ select USB_ARCH_HAS_EHCI
select ARCH_WANT_OPTIONAL_GPIOLIB
config SWAP
CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))
# r31 holds current when in kernel mode
-KBUILD_KERNEL += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2)
+KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2)
LDFLAGS :=
LDFLAGS_vmlinux :=
-LIBGCC := $(shell $(CC) $(KBUILD_KERNEL) -print-libgcc-file-name)
+LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
head-y := arch/microblaze/kernel/head.o
libs-y += arch/microblaze/lib/
boot := arch/microblaze/boot
+# Are we making a simpleImage.<boardname> target? If so, crack out the boardname
+DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS)))
+
+ifneq ($(DTB),)
+ core-y += $(boot)/
+endif
+
# defines filename extension depending memory management type
ifeq ($(CONFIG_MMU),)
MMU := -nommu
endif
-export MMU
+export MMU DTB
all: linux.bin
+BOOT_TARGETS = linux.bin linux.bin.gz simpleImage.%
+
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
-linux.bin linux.bin.gz: vmlinux
+$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
define archhelp
- echo '* linux.bin - Create raw binary'
- echo ' linux.bin.gz - Create compressed raw binary'
+ echo '* linux.bin - Create raw binary'
+ echo ' linux.bin.gz - Create compressed raw binary'
+ echo ' simpleImage.<dt> - ELF image with $(arch)/boot/dts/<dt>.dts linked in'
+ echo ' - stripped elf with fdt blob
+ echo ' simpleImage.<dt>.unstrip - full ELF image with fdt blob'
+ echo ' *_defconfig - Select default config from arch/microblaze/configs'
+ echo ''
+ echo ' Targets with <dt> embed a device tree blob inside the image'
+ echo ' These targets support board with firmware that does not'
+ echo ' support passing a device tree directly. Replace <dt> with the'
+ echo ' name of a dts file from the arch/microblaze/boot/dts/ directory'
+ echo ' (minus the .dts extension).'
endef
# arch/microblaze/boot/Makefile
#
-targets := linux.bin linux.bin.gz
+obj-y += linked_dtb.o
+
+targets := linux.bin linux.bin.gz simpleImage.%
OBJCOPYFLAGS_linux.bin := -O binary
+# Where the DTS files live
+dtstree := $(srctree)/$(src)/dts
+
+# Ensure system.dtb exists
+$(obj)/linked_dtb.o: $(obj)/system.dtb
+
+# Generate system.dtb from $(DTB).dtb
+ifneq ($(DTB),system)
+$(obj)/system.dtb: $(obj)/$(DTB).dtb
+ $(call if_changed,cp)
+endif
+
$(obj)/linux.bin: vmlinux FORCE
[ -n $(CONFIG_INITRAMFS_SOURCE) ] && [ ! -e $(CONFIG_INITRAMFS_SOURCE) ] && \
touch $(CONFIG_INITRAMFS_SOURCE) || echo "No CPIO image"
$(call if_changed,gzip)
@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
-clean-kernel += linux.bin linux.bin.gz
+quiet_cmd_cp = CP $< $@$2
+ cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
+
+quiet_cmd_strip = STRIP $@
+ cmd_strip = $(STRIP) -K _start -K _end -K __log_buf -K _fdt_start vmlinux -o $@
+
+$(obj)/simpleImage.%: vmlinux FORCE
+ $(call if_changed,cp,.unstrip)
+ $(call if_changed,strip)
+ @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+
+# Rule to build device tree blobs
+DTC = $(objtree)/scripts/dtc/dtc
+
+# Rule to build device tree blobs
+quiet_cmd_dtc = DTC $@
+ cmd_dtc = $(DTC) -O dtb -o $(obj)/$*.dtb -b 0 -p 1024 $(dtstree)/$*.dts
+
+$(obj)/%.dtb: $(dtstree)/%.dts FORCE
+ $(call if_changed,dtc)
+
+clean-kernel += linux.bin linux.bin.gz simpleImage.*
+
+clean-files += *.dtb
--- /dev/null
+../../platform/generic/system.dts
\ No newline at end of file
--- /dev/null
+.section __fdt_blob,"a"
+.incbin "arch/microblaze/boot/system.dtb"
+
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.31-rc6
-# Tue Aug 18 11:00:02 2009
+# Linux kernel version: 2.6.31
+# Thu Sep 24 10:28:50 2009
#
CONFIG_MICROBLAZE=y
# CONFIG_SWAP is not set
#
# RCU Subsystem
#
-CONFIG_CLASSIC_RCU=y
-# CONFIG_TREE_RCU is not set
-# CONFIG_PREEMPT_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
# CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_PREEMPT_RCU_TRACE is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=17
# CONFIG_NETFILTER is not set
# CONFIG_IP_DCCP is not set
# CONFIG_IP_SCTP is not set
+# CONFIG_RDS is not set
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
# CONFIG_KS8842 is not set
+CONFIG_XILINX_EMACLITE=y
CONFIG_NETDEV_1000=y
CONFIG_NETDEV_10000=y
-
-#
-# Wireless LAN
-#
+CONFIG_WLAN=y
# CONFIG_WLAN_PRE80211 is not set
# CONFIG_WLAN_80211 is not set
# CONFIG_DISPLAY_SUPPORT is not set
# CONFIG_SOUND is not set
# CONFIG_USB_SUPPORT is not set
+CONFIG_USB_ARCH_HAS_EHCI=y
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
# CONFIG_NEW_LEDS is not set
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_BTRFS_FS is not set
+# CONFIG_NILFS2_FS is not set
CONFIG_FILE_LOCKING=y
CONFIG_FSNOTIFY=y
# CONFIG_DNOTIFY is not set
# CONFIG_ROMFS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
-# CONFIG_NILFS2_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
# CONFIG_DEBUG_LIST is not set
# CONFIG_DEBUG_SG is not set
# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CREDENTIALS is not set
# CONFIG_BOOT_PRINTK_DELAY is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_BACKTRACE_SELF_TEST is not set
# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
# CONFIG_FAULT_INJECTION is not set
# CONFIG_SYSCTL_SYSCALL_CHECK is not set
# CONFIG_PAGE_POISONING is not set
# CONFIG_SAMPLES is not set
# CONFIG_KMEMCHECK is not set
CONFIG_EARLY_PRINTK=y
-CONFIG_HEART_BEAT=y
+# CONFIG_HEART_BEAT is not set
CONFIG_DEBUG_BOOTMEM=y
#
#
# Crypto core or helper
#
-# CONFIG_CRYPTO_FIPS is not set
# CONFIG_CRYPTO_MANAGER is not set
# CONFIG_CRYPTO_MANAGER2 is not set
# CONFIG_CRYPTO_GF128MUL is not set
#
# CONFIG_CRYPTO_HMAC is not set
# CONFIG_CRYPTO_XCBC is not set
+# CONFIG_CRYPTO_VMAC is not set
#
# Digest
#
# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_GHASH is not set
# CONFIG_CRYPTO_MD4 is not set
# CONFIG_CRYPTO_MD5 is not set
# CONFIG_CRYPTO_MICHAEL_MIC is not set
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.31-rc6
-# Tue Aug 18 10:35:30 2009
+# Linux kernel version: 2.6.31
+# Thu Sep 24 10:29:43 2009
#
CONFIG_MICROBLAZE=y
# CONFIG_SWAP is not set
#
# RCU Subsystem
#
-CONFIG_CLASSIC_RCU=y
-# CONFIG_TREE_RCU is not set
-# CONFIG_PREEMPT_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
# CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_PREEMPT_RCU_TRACE is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=17
# CONFIG_NETFILTER is not set
# CONFIG_IP_DCCP is not set
# CONFIG_IP_SCTP is not set
+# CONFIG_RDS is not set
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
# CONFIG_AF_RXRPC is not set
CONFIG_WIRELESS=y
# CONFIG_CFG80211 is not set
+CONFIG_CFG80211_DEFAULT_PS_VALUE=0
CONFIG_WIRELESS_OLD_REGULATORY=y
# CONFIG_WIRELESS_EXT is not set
# CONFIG_LIB80211 is not set
#
# CFG80211 needs to be enabled for MAC80211
#
-CONFIG_MAC80211_DEFAULT_PS_VALUE=0
# CONFIG_WIMAX is not set
# CONFIG_RFKILL is not set
# CONFIG_NET_9P is not set
# CONFIG_MTD_TESTS is not set
# CONFIG_MTD_REDBOOT_PARTS is not set
CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_OF_PARTS is not set
# CONFIG_MTD_AR7_PARTS is not set
#
#
# CONFIG_MTD_COMPLEX_MAPPINGS is not set
# CONFIG_MTD_PHYSMAP is not set
+# CONFIG_MTD_PHYSMAP_OF is not set
CONFIG_MTD_UCLINUX=y
# CONFIG_MTD_PLATRAM is not set
# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
# CONFIG_KS8842 is not set
+# CONFIG_XILINX_EMACLITE is not set
CONFIG_NETDEV_1000=y
CONFIG_NETDEV_10000=y
-
-#
-# Wireless LAN
-#
+CONFIG_WLAN=y
# CONFIG_WLAN_PRE80211 is not set
# CONFIG_WLAN_80211 is not set
CONFIG_USB_SUPPORT=y
CONFIG_USB_ARCH_HAS_HCD=y
# CONFIG_USB_ARCH_HAS_OHCI is not set
-# CONFIG_USB_ARCH_HAS_EHCI is not set
+CONFIG_USB_ARCH_HAS_EHCI=y
# CONFIG_USB is not set
# CONFIG_USB_OTG_WHITELIST is not set
# CONFIG_USB_OTG_BLACKLIST_HUB is not set
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_BTRFS_FS is not set
+# CONFIG_NILFS2_FS is not set
CONFIG_FILE_LOCKING=y
CONFIG_FSNOTIFY=y
# CONFIG_DNOTIFY is not set
CONFIG_ROMFS_ON_BLOCK=y
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
-# CONFIG_NILFS2_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CREDENTIALS is not set
# CONFIG_BOOT_PRINTK_DELAY is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_BACKTRACE_SELF_TEST is not set
# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
# CONFIG_FAULT_INJECTION is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
# CONFIG_PAGE_POISONING is not set
# CONFIG_DYNAMIC_DEBUG is not set
# CONFIG_SAMPLES is not set
CONFIG_EARLY_PRINTK=y
-CONFIG_HEART_BEAT=y
+# CONFIG_HEART_BEAT is not set
# CONFIG_DEBUG_BOOTMEM is not set
#
#
# Crypto core or helper
#
-# CONFIG_CRYPTO_FIPS is not set
# CONFIG_CRYPTO_MANAGER is not set
# CONFIG_CRYPTO_MANAGER2 is not set
# CONFIG_CRYPTO_GF128MUL is not set
#
# CONFIG_CRYPTO_HMAC is not set
# CONFIG_CRYPTO_XCBC is not set
+# CONFIG_CRYPTO_VMAC is not set
#
# Digest
#
# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_GHASH is not set
# CONFIG_CRYPTO_MD4 is not set
# CONFIG_CRYPTO_MD5 is not set
# CONFIG_CRYPTO_MICHAEL_MIC is not set
--- /dev/null
+#ifndef _ASM_MICROBLAZE_ASM_COMPAT_H
+#define _ASM_MICROBLAZE_ASM_COMPAT_H
+
+#include <asm/types.h>
+
+#ifdef __ASSEMBLY__
+# define stringify_in_c(...) __VA_ARGS__
+# define ASM_CONST(x) x
+#else
+/* This version of stringify will deal with commas... */
+# define __stringify_in_c(...) #__VA_ARGS__
+# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
+# define __ASM_CONST(x) x##UL
+# define ASM_CONST(x) __ASM_CONST(x)
+#endif
+
+#endif /* _ASM_MICROBLAZE_ASM_COMPAT_H */
#define in_be32(a) __raw_readl((const void __iomem __force *)(a))
#define in_be16(a) __raw_readw(a)
+#define writel_be(v, a) out_be32((__force unsigned *)a, v)
+#define readl_be(a) in_be32((__force unsigned *)a)
+
/*
* Little endian
*/
+++ /dev/null
-#include <asm-generic/ipc.h>
#include <linux/pfn.h>
#include <asm/setup.h>
+#include <asm/asm-compat.h>
#include <linux/const.h>
#ifdef __KERNEL__
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
+#define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_KERNEL_BASE_ADDR))
+
#ifndef __ASSEMBLY__
#define PAGE_UP(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1)))
void time_init(void);
void init_IRQ(void);
void machine_early_init(const char *cmdline, unsigned int ram,
- unsigned int fdt);
+ unsigned int fdt, unsigned int msr);
void machine_restart(char *cmd);
void machine_shutdown(void);
--- /dev/null
+#ifndef __ASM_MICROBLAZE_SYSCALL_H
+#define __ASM_MICROBLAZE_SYSCALL_H
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+
+/* The system call number is given by the user in R12 */
+static inline long syscall_get_nr(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return regs->r12;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ /* TODO. */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return IS_ERR_VALUE(regs->r3) ? regs->r3 : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return regs->r3;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+ struct pt_regs *regs,
+ int error, long val)
+{
+ if (error)
+ regs->r3 = -error;
+ else
+ regs->r3 = val;
+}
+
+static inline microblaze_reg_t microblaze_get_syscall_arg(struct pt_regs *regs,
+ unsigned int n)
+{
+ switch (n) {
+ case 5: return regs->r10;
+ case 4: return regs->r9;
+ case 3: return regs->r8;
+ case 2: return regs->r7;
+ case 1: return regs->r6;
+ case 0: return regs->r5;
+ default:
+ BUG();
+ }
+ return ~0;
+}
+
+static inline void microblaze_set_syscall_arg(struct pt_regs *regs,
+ unsigned int n,
+ unsigned long val)
+{
+ switch (n) {
+ case 5:
+ regs->r10 = val;
+ case 4:
+ regs->r9 = val;
+ case 3:
+ regs->r8 = val;
+ case 2:
+ regs->r7 = val;
+ case 1:
+ regs->r6 = val;
+ case 0:
+ regs->r5 = val;
+ default:
+ BUG();
+ }
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ unsigned long *args)
+{
+ while (n--)
+ *args++ = microblaze_get_syscall_arg(regs, i++);
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ const unsigned long *args)
+{
+ while (n--)
+ microblaze_set_syscall_arg(regs, i++, *args++);
+}
+
+#endif /* __ASM_MICROBLAZE_SYSCALL_H */
{"7.10.d", 0x0b},
{"7.20.a", 0x0c},
{"7.20.b", 0x0d},
+ {"7.20.c", 0x0e},
/* FIXME There is no keycode defined in MBV for these versions */
{"2.10.a", 0x10},
{"3.00.a", 0x20},
{"spartan3a", 0xa},
{"spartan3an", 0xb},
{"spartan3adsp", 0xc},
+ {"spartan6", 0xd},
+ {"virtex6", 0xe},
/* FIXME There is no key code defined for spartan2 */
{"spartan2", 0xf0},
{NULL, 0},
swi r12, r1, PTO+PT_R0;
tovirt(r1,r1)
- la r15, r0, ret_from_trap-8
/* where the trap should return need -8 to adjust for rtsd r15, 8*/
/* Jump to the appropriate function for the system call number in r12
* (r12 is not preserved), or return an error if r12 is not valid. The LP
* register should point to the location where
* the called function should return. [note that MAKE_SYS_CALL uses label 1] */
- /* See if the system call number is valid. */
+
+ # Step into virtual mode.
+ set_vms;
+ addik r11, r0, 3f
+ rtid r11, 0
+ nop
+3:
+ add r11, r0, CURRENT_TASK /* Get current task ptr into r11 */
+ lwi r11, r11, TS_THREAD_INFO /* get thread info */
+ lwi r11, r11, TI_FLAGS /* get flags in thread info */
+ andi r11, r11, _TIF_WORK_SYSCALL_MASK
+ beqi r11, 4f
+
+ addik r3, r0, -ENOSYS
+ swi r3, r1, PTO + PT_R3
+ brlid r15, do_syscall_trace_enter
+ addik r5, r1, PTO + PT_R0
+
+ # do_syscall_trace_enter returns the new syscall nr.
+ addk r12, r0, r3
+ lwi r5, r1, PTO+PT_R5;
+ lwi r6, r1, PTO+PT_R6;
+ lwi r7, r1, PTO+PT_R7;
+ lwi r8, r1, PTO+PT_R8;
+ lwi r9, r1, PTO+PT_R9;
+ lwi r10, r1, PTO+PT_R10;
+4:
+/* Jump to the appropriate function for the system call number in r12
+ * (r12 is not preserved), or return an error if r12 is not valid.
+ * The LP register should point to the location where the called function
+ * should return. [note that MAKE_SYS_CALL uses label 1] */
+ /* See if the system call number is valid */
addi r11, r12, -__NR_syscalls;
- bgei r11,1f;
+ bgei r11,5f;
/* Figure out which function to use for this system call. */
/* Note Microblaze barrel shift is optional, so don't rely on it */
add r12, r12, r12; /* convert num -> ptr */
add r12, r12, r12;
/* Trac syscalls and stored them to r0_ram */
- lwi r3, r12, 0x400 + TOPHYS(r0_ram)
+ lwi r3, r12, 0x400 + r0_ram
addi r3, r3, 1
- swi r3, r12, 0x400 + TOPHYS(r0_ram)
+ swi r3, r12, 0x400 + r0_ram
+
+ # Find and jump into the syscall handler.
+ lwi r12, r12, sys_call_table
+ /* where the trap should return need -8 to adjust for rtsd r15, 8 */
+ la r15, r0, ret_from_trap-8
+ bra r12
- lwi r12, r12, TOPHYS(sys_call_table); /* Function ptr */
- /* Make the system call. to r12*/
- set_vms;
- rtid r12, 0;
- nop;
/* The syscall number is invalid, return an error. */
-1: VM_ON; /* RETURN() expects virtual mode*/
+5:
addi r3, r0, -ENOSYS;
rtsd r15,8; /* looks like a normal subroutine return */
or r0, r0, r0
-/* Entry point used to return from a syscall/trap. */
+/* Entry point used to return from a syscall/trap */
/* We re-enable BIP bit before state restore */
C_ENTRY(ret_from_trap):
set_bip; /* Ints masked for state restore*/
/* See if returning to kernel mode, if so, skip resched &c. */
bnei r11, 2f;
+ /* We're returning to user mode, so check for various conditions that
+ * trigger rescheduling. */
+ # FIXME: Restructure all these flag checks.
+ add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+ lwi r11, r11, TS_THREAD_INFO; /* get thread info */
+ lwi r11, r11, TI_FLAGS; /* get flags in thread info */
+ andi r11, r11, _TIF_WORK_SYSCALL_MASK
+ beqi r11, 1f
+
+ swi r3, r1, PTO + PT_R3
+ swi r4, r1, PTO + PT_R4
+ brlid r15, do_syscall_trace_leave
+ addik r5, r1, PTO + PT_R0
+ lwi r3, r1, PTO + PT_R3
+ lwi r4, r1, PTO + PT_R4
+1:
+
/* We're returning to user mode, so check for various conditions that
* trigger rescheduling. */
/* Get current task ptr into r11 */
#endif
#if 0
- printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x ESR=%08x\n",
+ printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x " \
+ "ESR=%08x\n",
type, user_mode(regs) ? "user" : "kernel", fsr,
(unsigned int) regs->pc, (unsigned int) regs->esr);
#endif
switch (type & 0x1F) {
case MICROBLAZE_ILL_OPCODE_EXCEPTION:
if (user_mode(regs)) {
- printk(KERN_WARNING "Illegal opcode exception in user mode.\n");
+ pr_debug(KERN_WARNING "Illegal opcode exception " \
+ "in user mode.\n");
_exception(SIGILL, regs, ILL_ILLOPC, addr);
return;
}
- printk(KERN_WARNING "Illegal opcode exception in kernel mode.\n");
+ printk(KERN_WARNING "Illegal opcode exception " \
+ "in kernel mode.\n");
die("opcode exception", regs, SIGBUS);
break;
case MICROBLAZE_IBUS_EXCEPTION:
if (user_mode(regs)) {
- printk(KERN_WARNING "Instruction bus error exception in user mode.\n");
+ pr_debug(KERN_WARNING "Instruction bus error " \
+ "exception in user mode.\n");
_exception(SIGBUS, regs, BUS_ADRERR, addr);
return;
}
- printk(KERN_WARNING "Instruction bus error exception in kernel mode.\n");
+ printk(KERN_WARNING "Instruction bus error exception " \
+ "in kernel mode.\n");
die("bus exception", regs, SIGBUS);
break;
case MICROBLAZE_DBUS_EXCEPTION:
if (user_mode(regs)) {
- printk(KERN_WARNING "Data bus error exception in user mode.\n");
+ pr_debug(KERN_WARNING "Data bus error exception " \
+ "in user mode.\n");
_exception(SIGBUS, regs, BUS_ADRERR, addr);
return;
}
- printk(KERN_WARNING "Data bus error exception in kernel mode.\n");
+ printk(KERN_WARNING "Data bus error exception " \
+ "in kernel mode.\n");
die("bus exception", regs, SIGBUS);
break;
case MICROBLAZE_DIV_ZERO_EXCEPTION:
if (user_mode(regs)) {
- printk(KERN_WARNING "Divide by zero exception in user mode\n");
- _exception(SIGILL, regs, ILL_ILLOPC, addr);
+ pr_debug(KERN_WARNING "Divide by zero exception " \
+ "in user mode\n");
+ _exception(SIGILL, regs, FPE_INTDIV, addr);
return;
}
- printk(KERN_WARNING "Divide by zero exception in kernel mode.\n");
+ printk(KERN_WARNING "Divide by zero exception " \
+ "in kernel mode.\n");
die("Divide by exception", regs, SIGBUS);
break;
case MICROBLAZE_FPU_EXCEPTION:
- printk(KERN_WARNING "FPU exception\n");
+ pr_debug(KERN_WARNING "FPU exception\n");
/* IEEE FP exception */
/* I removed fsr variable and use code var for storing fsr */
if (fsr & FSR_IO)
#ifdef CONFIG_MMU
case MICROBLAZE_PRIVILEGED_EXCEPTION:
- printk(KERN_WARNING "Privileged exception\n");
+ pr_debug(KERN_WARNING "Privileged exception\n");
/* "brk r0,r0" - used as debug breakpoint */
if (get_user(code, (unsigned long *)regs->pc) == 0
&& code == 0x980c0000) {
mfs r1, rmsr
andi r1, r1, ~2
mts rmsr, r1
+/*
+ * Here is checking mechanism which check if Microblaze has msr instructions
+ * We load msr and compare it with previous r1 value - if is the same,
+ * msr instructions works if not - cpu don't have them.
+ */
+ /* r8=0 - I have msr instr, 1 - I don't have them */
+ rsubi r0, r0, 1 /* set the carry bit */
+ msrclr r0, 0x4 /* try to clear it */
+ /* read the carry bit, r8 will be '0' if msrclr exists */
+ addik r8, r0, 0
/* r7 may point to an FDT, or there may be one linked in.
if it's in r7, we've got to save it away ASAP.
* Please see $(ARCH)/mach-$(SUBARCH)/setup.c for
* the function.
*/
- la r8, r0, machine_early_init
- brald r15, r8
+ la r9, r0, machine_early_init
+ brald r15, r9
nop
#ifndef CONFIG_MMU
#define NUM_TO_REG(num) r ## num
#ifdef CONFIG_MMU
-/* FIXME you can't change first load of MSR because there is
- * hardcoded jump bri 4 */
#define RESTORE_STATE \
+ lwi r5, r1, 0; \
+ mts rmsr, r5; \
+ nop; \
lwi r3, r1, PT_R3; \
lwi r4, r1, PT_R4; \
lwi r5, r1, PT_R5; \
lwi r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */
#endif
+ mfs r5, rmsr;
+ nop
+ swi r5, r1, 0;
mfs r3, resr
nop
mfs r4, rear;
addk r8, r17, r0; /* Load exception address */
bralid r15, full_exception; /* Branch to the handler */
nop;
+ mts r0, rfsr; /* Clear sticky fsr */
+ nop
/*
* Trigger execution of the signal handler by enabling
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
-{ INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
struct task_struct init_task = INIT_TASK(init_task);
EXPORT_SYMBOL(init_task);
regs->pc = pc;
regs->r1 = usp;
regs->pt_mode = 0;
+ regs->msr |= MSR_UMS;
}
#ifdef CONFIG_MMU
#include <linux/sched.h>
#include <linux/ptrace.h>
#include <linux/signal.h>
+#include <linux/elf.h>
+#include <linux/audit.h>
+#include <linux/seccomp.h>
+#include <linux/tracehook.h>
#include <linux/errno.h>
#include <asm/processor.h>
return rval;
}
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
+{
+ long ret = 0;
+
+ secure_computing(regs->r12);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE) &&
+ tracehook_report_syscall_entry(regs))
+ /*
+ * Tracing decided this syscall should not happen.
+ * We'll return a bogus call number to get an ENOSYS
+ * error, but leave the original number in regs->regs[0].
+ */
+ ret = -1L;
+
+ if (unlikely(current->audit_context))
+ audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12,
+ regs->r5, regs->r6,
+ regs->r7, regs->r8);
+
+ return ret ?: regs->r12;
+}
+
+asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
+{
+ int step;
+
+ if (unlikely(current->audit_context))
+ audit_syscall_exit(AUDITSC_RESULT(regs->r3), regs->r3);
+
+ step = test_thread_flag(TIF_SINGLESTEP);
+ if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, step);
+}
+
+#if 0
+static asmlinkage void syscall_trace(void)
+{
+ if (!test_thread_flag(TIF_SYSCALL_TRACE))
+ return;
+ if (!(current->ptrace & PT_PTRACED))
+ return;
+ /* The 0x80 provides a way for the tracing parent to distinguish
+ between a syscall stop and SIGTRAP delivery */
+ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+ ? 0x80 : 0));
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+ * for normal use. strace only continues with a signal if the
+ * stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code) {
+ send_sig(current->exit_code, current, 1);
+ current->exit_code = 0;
+ }
+}
+#endif
+
void ptrace_disable(struct task_struct *child)
{
/* nothing to do */
#endif /* CONFIG_MTD_UCLINUX_EBSS */
void __init machine_early_init(const char *cmdline, unsigned int ram,
- unsigned int fdt)
+ unsigned int fdt, unsigned int msr)
{
unsigned long *src, *dst = (unsigned long *)0x0;
early_printk("New klimit: 0x%08x\n", (unsigned)klimit);
#endif
+#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
+ if (msr)
+ early_printk("!!!Your kernel has setup MSR instruction but "
+ "CPU don't have it %d\n", msr);
+#else
+ if (!msr)
+ early_printk("!!!Your kernel not setup MSR instruction but "
+ "CPU have it %d\n", msr);
+#endif
+
for (src = __ivt_start; src < __ivt_end; src++, dst++)
*dst = *src;
#include <linux/mman.h>
#include <linux/sys.h>
#include <linux/ipc.h>
-#include <linux/utsname.h>
#include <linux/file.h>
#include <linux/module.h>
#include <linux/err.h>
OUTPUT_ARCH(microblaze)
ENTRY(_start)
+#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
+#include <asm/thread_info.h>
jiffies = jiffies_64 + 4;
SECTIONS {
. = CONFIG_KERNEL_START;
- .text : {
+ _start = CONFIG_KERNEL_BASE_ADDR;
+ .text : AT(ADDR(.text) - LOAD_OFFSET) {
_text = . ;
_stext = . ;
*(.text .text.*)
}
. = ALIGN (4) ;
- _fdt_start = . ; /* place for fdt blob */
- . = . + 0x4000;
- _fdt_end = . ;
+ __fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET) {
+ _fdt_start = . ; /* place for fdt blob */
+ *(__fdt_blob) ; /* Any link-placed DTB */
+ . = _fdt_start + 0x4000; /* Pad up to 16kbyte */
+ _fdt_end = . ;
+ }
. = ALIGN(16);
RODATA
- . = ALIGN(16);
- __ex_table : {
- __start___ex_table = .;
- *(__ex_table)
- __stop___ex_table = .;
- }
+ EXCEPTION_TABLE(16)
/*
* sdata2 section can go anywhere, but must be word aligned
* and SDA2_BASE must point to the middle of it
*/
- .sdata2 : {
+ .sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) {
_ssrw = .;
. = ALIGN(4096); /* page aligned when MMU used - origin 0x8 */
*(.sdata2)
}
_sdata = . ;
- .data ALIGN (4096) : { /* page aligned when MMU used - origin 0x4 */
- DATA_DATA
- CONSTRUCTORS
- }
- . = ALIGN(32);
- .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+ RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
_edata = . ;
/* Reserve some low RAM for r0 based memory references */
r0_ram = . ;
. = . + 4096; /* a page should be enough */
- /* The initial task */
- . = ALIGN(8192);
- .data.init_task : { *(.data.init_task) }
-
/* Under the microblaze ABI, .sdata and .sbss must be contiguous */
. = ALIGN(8);
- .sdata : {
+ .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) {
_ssro = .;
*(.sdata)
}
- .sbss : {
+ .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) {
_ssbss = .;
*(.sbss)
_esbss = .;
__init_begin = .;
- . = ALIGN(4096);
- .init.text : {
- _sinittext = . ;
- INIT_TEXT
- _einittext = .;
- }
+ INIT_TEXT_SECTION(PAGE_SIZE)
- .init.data : {
+ .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
INIT_DATA
}
. = ALIGN(4);
- .init.ivt : {
+ .init.ivt : AT(ADDR(.init.ivt) - LOAD_OFFSET) {
__ivt_start = .;
*(.init.ivt)
__ivt_end = .;
}
- .init.setup : {
- __setup_start = .;
- *(.init.setup)
- __setup_end = .;
+ .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+ INIT_SETUP(0)
}
- .initcall.init : {
- __initcall_start = .;
- INITCALLS
- __initcall_end = .;
+ .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET ) {
+ INIT_CALLS
}
- .con_initcall.init : {
- __con_initcall_start = .;
- *(.con_initcall.init)
- __con_initcall_end = .;
+ .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+ CON_INITCALL
}
SECURITY_INIT
__init_end_before_initramfs = .;
- .init.ramfs ALIGN(4096) : {
+ .init.ramfs ALIGN(4096) : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
__initramfs_start = .;
*(.init.ramfs)
__initramfs_end = .;
}
__init_end = .;
- .bss ALIGN (4096) : { /* page aligned when MMU used */
+ .bss ALIGN (4096) : AT(ADDR(.bss) - LOAD_OFFSET) {
+ /* page aligned when MMU used */
__bss_start = . ;
*(.bss*)
*(COMMON)
totalram_pages++;
pages++;
}
- printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n", pages);
+ printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n",
+ (int)(pages * (PAGE_SIZE / 1024)));
}
#endif
cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic
drivers-$(CONFIG_PCI) += arch/mips/pci/
-ifdef CONFIG_32BIT
-ifdef CONFIG_CPU_LITTLE_ENDIAN
-JIFFIES = jiffies_64
-else
-JIFFIES = jiffies_64 + 4
-endif
-else
-JIFFIES = jiffies_64
-endif
-
#
# Automatically detect the build format. By default we choose
# the elf format according to the load address.
endif
KBUILD_AFLAGS += $(cflags-y)
-KBUILD_CFLAGS += $(cflags-y) \
- -D"VMLINUX_LOAD_ADDRESS=$(load-y)"
+KBUILD_CFLAGS += $(cflags-y)
+KBUILD_CPPFLAGS += -D"VMLINUX_LOAD_ADDRESS=$(load-y)"
+KBUILD_CPPFLAGS += -D"DATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)"
LDFLAGS += -m $(ld-emul)
OBJCOPYFLAGS += --remove-section=.reginfo
-#
-# Choosing incompatible machines durings configuration will result in
-# error messages during linking. Select a default linkscript if
-# none has been choosen above.
-#
-
-CPPFLAGS_vmlinux.lds := \
- $(KBUILD_CFLAGS) \
- -D"LOADADDR=$(load-y)" \
- -D"JIFFIES=$(JIFFIES)" \
- -D"DATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)"
-
head-y := arch/mips/kernel/head.o arch/mips/kernel/init_task.o
libs-y += arch/mips/lib/
.irq = AU1000_RTC_MATCH2_INT,
.set_next_event = au1x_rtcmatch2_set_next_event,
.set_mode = au1x_rtcmatch2_set_mode,
- .cpumask = CPU_MASK_ALL_PTR,
+ .cpumask = cpu_all_mask,
};
static struct irqaction au1x_rtcmatch2_irqaction = {
#define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid)
#define parent_node(node) (node)
-#define node_to_cpumask(node) (hub_data(node)->h_cpus)
#define cpumask_of_node(node) (&hub_data(node)->h_cpus)
struct pci_bus;
extern int pcibus_to_node(struct pci_bus *);
-#define pcibus_to_cpumask(bus) (cpu_online_map)
#define cpumask_of_pcibus(bus) (cpu_online_mask)
extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
* Mark current->active_mm as not "active" anymore.
* We don't want to mislead possible IPI tlb flush routines.
*/
- cpu_clear(cpu, prev->cpu_vm_mask);
- cpu_set(cpu, next->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ cpumask_set_cpu(cpu, mm_cpumask(next));
local_irq_restore(flags);
}
TLBMISS_HANDLER_SETUP_PGD(next->pgd);
/* mark mmu ownership change */
- cpu_clear(cpu, prev->cpu_vm_mask);
- cpu_set(cpu, next->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ cpumask_set_cpu(cpu, mm_cpumask(next));
local_irq_restore(flags);
}
local_irq_save(flags);
- if (cpu_isset(cpu, mm->cpu_vm_mask)) {
+ if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
get_new_mmu_context(mm, cpu);
#ifdef CONFIG_MIPS_MT_SMTC
/* See comments for similar code above */
struct plat_smp_ops {
void (*send_ipi_single)(int cpu, unsigned int action);
- void (*send_ipi_mask)(cpumask_t mask, unsigned int action);
+ void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action);
void (*init_secondary)(void);
void (*smp_finish)(void);
void (*cpus_done)(void);
extern asmlinkage void smp_call_function_interrupt(void);
extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#endif /* __ASM_SMP_H */
*
* The things we do for performance..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"),
- __aligned__(THREAD_SIZE))) =
+union thread_union init_thread_union __init_task_data
+ __attribute__((__aligned__(THREAD_SIZE))) =
{ INIT_THREAD_INFO(init_task) };
/*
local_irq_restore(flags);
}
-static void cmp_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void cmp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
cmp_send_ipi_single(i, action);
}
for (i = 1; i < NR_CPUS; i++) {
if (amon_cpu_avail(i)) {
- cpu_set(i, cpu_possible_map);
+ set_cpu_possible(i, true);
__cpu_number_map[i] = ++ncpu;
__cpu_logical_map[ncpu] = i;
}
write_vpe_c0_vpeconf0(tmp);
/* Record this as available CPU */
- cpu_set(tc, cpu_possible_map);
+ set_cpu_possible(tc, true);
__cpu_number_map[tc] = ++ncpu;
__cpu_logical_map[ncpu] = tc;
}
local_irq_restore(flags);
}
-static void vsmp_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void vsmp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
vsmp_send_ipi_single(i, action);
}
panic(KERN_ERR "%s called", __func__);
}
-static inline void up_send_ipi_mask(cpumask_t mask, unsigned int action)
+static inline void up_send_ipi_mask(const struct cpumask *mask,
+ unsigned int action)
{
panic(KERN_ERR "%s called", __func__);
}
cpu_idle();
}
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
mp_ops->send_ipi_mask(mask, SMP_CALL_FUNCTION);
}
mp_ops->prepare_cpus(max_cpus);
set_cpu_sibling_map(0);
#ifndef CONFIG_HOTPLUG_CPU
- cpu_present_map = cpu_possible_map;
+ init_cpu_present(&cpu_possible_map);
#endif
}
/* preload SMP state for boot cpu */
void __devinit smp_prepare_boot_cpu(void)
{
- cpu_set(0, cpu_possible_map);
- cpu_set(0, cpu_online_map);
+ set_cpu_possible(0, true);
+ set_cpu_online(0, true);
cpu_set(0, cpu_callin_map);
}
*/
ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) {
- cpu_set(i, cpu_possible_map);
+ set_cpu_possible(i, true);
__cpu_number_map[i] = i;
__cpu_logical_map[i] = i;
}
* Pull any physically present but unused TCs out of circulation.
*/
while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) {
- cpu_clear(tc, cpu_possible_map);
- cpu_clear(tc, cpu_present_map);
+ set_cpu_possible(tc, false);
+ set_cpu_present(tc, false);
tc++;
}
text PT_LOAD FLAGS(7); /* RWX */
note PT_NOTE FLAGS(4); /* R__ */
}
-jiffies = JIFFIES;
+
+ifdef CONFIG_32BIT
+ ifdef CONFIG_CPU_LITTLE_ENDIAN
+ jiffies = jiffies_64;
+ else
+ jiffies = jiffies_64 + 4;
+ endif
+else
+ jiffies = jiffies_64;
+endif
SECTIONS
{
/* . = 0xa800000000300000; */
. = 0xffffffff80300000;
#endif
- . = LOADADDR;
+ . = VMLINUX_LOAD_ADDRESS;
/* read-only */
_text = .; /* Text and read-only data */
.text : {
/* And the same for proc */
-int proc_dolasatstring(ctl_table *table, int write, struct file *filp,
+int proc_dolasatstring(ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int r;
- r = proc_dostring(table, write, filp, buffer, lenp, ppos);
+ r = proc_dostring(table, write, buffer, lenp, ppos);
if ((!write) || r)
return r;
}
/* proc function to write EEPROM after changing int entry */
-int proc_dolasatint(ctl_table *table, int write, struct file *filp,
+int proc_dolasatint(ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int r;
- r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ r = proc_dointvec(table, write, buffer, lenp, ppos);
if ((!write) || r)
return r;
static int rtctmp;
/* proc function to read/write RealTime Clock */
-int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
+int proc_dolasatrtc(ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct timespec ts;
if (rtctmp < 0)
rtctmp = 0;
}
- r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ r = proc_dointvec(table, write, buffer, lenp, ppos);
if (r)
return r;
#endif
#ifdef CONFIG_INET
-int proc_lasat_ip(ctl_table *table, int write, struct file *filp,
+int proc_lasat_ip(ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
unsigned int ip;
return 0;
}
-int proc_lasat_prid(ctl_table *table, int write, struct file *filp,
+int proc_lasat_prid(ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int r;
- r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ r = proc_dointvec(table, write, buffer, lenp, ppos);
if (r < 0)
return r;
if (write) {
/* "CPU" may be TC of same VPE, VPE of same CPU, or different CPU */
}
-static inline void ssmtc_send_ipi_mask(cpumask_t mask, unsigned int action)
+static inline void ssmtc_send_ipi_mask(const struct cpumask *mask,
+ unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
ssmtc_send_ipi_single(i, action);
}
* cores it has been used on
*/
if (vma)
- mask = vma->vm_mm->cpu_vm_mask;
+ mask = *mm_cpumask(vma->vm_mm);
else
mask = cpu_online_map;
cpu_clear(cpu, mask);
smtc_send_ipi(cpu, LINUX_SMP_IPI, action);
}
-static void msmtc_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void msmtc_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
msmtc_send_ipi_single(i, action);
}
}
}
-static void yos_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void yos_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
yos_send_ipi_single(i, action);
}
/*
* A node with nothing. We use it to avoid any special casing in
- * node_to_cpumask
+ * cpumask_of_node
*/
static struct node_data null_node = {
.hub = {
REMOTE_HUB_SEND_INTR(COMPACT_TO_NASID_NODEID(cpu_to_node(destid)), irq);
}
-static void ip27_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void ip27_send_ipi(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
ip27_send_ipi_single(i, action);
}
__raw_writeq((((u64)action)<< 48), mailbox_0_set_regs[cpu]);
}
-static void bcm1480_send_ipi_mask(cpumask_t mask, unsigned int action)
+static void bcm1480_send_ipi_mask(const struct cpumask *mask,
+ unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
bcm1480_send_ipi_single(i, action);
}
__raw_writeq((((u64)action) << 48), mailbox_set_regs[cpu]);
}
-static inline void sb1250_send_ipi_mask(cpumask_t mask, unsigned int action)
+static inline void sb1250_send_ipi_mask(const struct cpumask *mask,
+ unsigned int action)
{
unsigned int i;
- for_each_cpu_mask(i, mask)
+ for_each_cpu(i, mask)
sb1250_send_ipi_single(i, action);
}
#define enter_lazy_tlb(mm, tsk) do {} while (0)
#ifdef CONFIG_SMP
-#define cpu_ran_vm(cpu, task) \
- cpu_set((cpu), (task)->cpu_vm_mask)
-#define cpu_maybe_ran_vm(cpu, task) \
- cpu_test_and_set((cpu), (task)->cpu_vm_mask)
+#define cpu_ran_vm(cpu, mm) \
+ cpumask_set_cpu((cpu), mm_cpumask(mm))
+#define cpu_maybe_ran_vm(cpu, mm) \
+ cpumask_test_and_set_cpu((cpu), mm_cpumask(mm))
#else
-#define cpu_ran_vm(cpu, task) do {} while (0)
-#define cpu_maybe_ran_vm(cpu, task) true
+#define cpu_ran_vm(cpu, mm) do {} while (0)
+#define cpu_maybe_ran_vm(cpu, mm) true
#endif /* CONFIG_SMP */
/*
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
#include <linux/stat.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/tty.h>
#include <asm/uaccess.h>
@echo '* vmlinux - Uncompressed kernel image (./vmlinux)'
@echo ' palo - Bootable image (./lifimage)'
@echo ' install - Install kernel using'
- @echo ' (your) ~/bin/installkernel or'
- @echo ' (distribution) /sbin/installkernel or'
+ @echo ' (your) ~/bin/$(INSTALLKERNEL) or'
+ @echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
@echo ' copy to $$(INSTALL_PATH)'
endef
#define F_SETOWN 12 /* for sockets. */
#define F_SETSIG 13 /* for sockets. */
#define F_GETSIG 14 /* for sockets. */
+#define F_GETOWN_EX 15
+#define F_SETOWN_EX 16
/* for posix fcntl() and lockf() */
#define F_RDLCK 01
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
#endif /* !ASSEMBLY */
# User may have a custom install script
-if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi
-if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((aligned(128))) __attribute__((__section__(".data.init_task"))) =
+union thread_union init_thread_union __init_task_data
+ __attribute__((aligned(128))) =
{ INIT_THREAD_INFO(init_task) };
#if PT_NLEVELS == 3
#include <linux/signal.h>
#include <linux/resource.h>
#include <linux/times.h>
-#include <linux/utsname.h>
#include <linux/time.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
# Default to zImage, override when needed
all: zImage
-CPPFLAGS_vmlinux.lds := -Upowerpc
-
BOOT_TARGETS = zImage zImage.initrd uImage zImage% dtbImage% treeImage.% cuImage.% simpleImage.%
PHONY += $(BOOT_TARGETS)
@echo ' simpleImage.<dt> - Firmware independent image.'
@echo ' treeImage.<dt> - Support for older IBM 4xx firmware (not U-Boot)'
@echo ' install - Install kernel using'
- @echo ' (your) ~/bin/installkernel or'
- @echo ' (distribution) /sbin/installkernel or'
+ @echo ' (your) ~/bin/$(INSTALLKERNEL) or'
+ @echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
@echo ' install to $$(INSTALL_PATH) and run lilo'
@echo ' *_defconfig - Select default config from arch/$(ARCH)/configs'
@echo ''
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install
--- /dev/null
+/*
+ * Freescale MPC83XX / MPC85XX DMA Controller
+ *
+ * Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ARCH_POWERPC_ASM_FSLDMA_H__
+#define __ARCH_POWERPC_ASM_FSLDMA_H__
+
+#include <linux/dmaengine.h>
+
+/*
+ * Definitions for the Freescale DMA controller's DMA_SLAVE implemention
+ *
+ * The Freescale DMA_SLAVE implementation was designed to handle many-to-many
+ * transfers. An example usage would be an accelerated copy between two
+ * scatterlists. Another example use would be an accelerated copy from
+ * multiple non-contiguous device buffers into a single scatterlist.
+ *
+ * A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This
+ * structure contains a list of hardware addresses that should be copied
+ * to/from the scatterlist passed into device_prep_slave_sg(). The structure
+ * also has some fields to enable hardware-specific features.
+ */
+
+/**
+ * struct fsl_dma_hw_addr
+ * @entry: linked list entry
+ * @address: the hardware address
+ * @length: length to transfer
+ *
+ * Holds a single physical hardware address / length pair for use
+ * with the DMAEngine DMA_SLAVE API.
+ */
+struct fsl_dma_hw_addr {
+ struct list_head entry;
+
+ dma_addr_t address;
+ size_t length;
+};
+
+/**
+ * struct fsl_dma_slave
+ * @addresses: a linked list of struct fsl_dma_hw_addr structures
+ * @request_count: value for DMA request count
+ * @src_loop_size: setup and enable constant source-address DMA transfers
+ * @dst_loop_size: setup and enable constant destination address DMA transfers
+ * @external_start: enable externally started DMA transfers
+ * @external_pause: enable externally paused DMA transfers
+ *
+ * Holds a list of address / length pairs for use with the DMAEngine
+ * DMA_SLAVE API implementation for the Freescale DMA controller.
+ */
+struct fsl_dma_slave {
+
+ /* List of hardware address/length pairs */
+ struct list_head addresses;
+
+ /* Support for extra controller features */
+ unsigned int request_count;
+ unsigned int src_loop_size;
+ unsigned int dst_loop_size;
+ bool external_start;
+ bool external_pause;
+};
+
+/**
+ * fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave
+ * @slave: the &struct fsl_dma_slave to add to
+ * @address: the hardware address to add
+ * @length: the length of bytes to transfer from @address
+ *
+ * Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on
+ * success, -ERRNO otherwise.
+ */
+static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave,
+ dma_addr_t address, size_t length)
+{
+ struct fsl_dma_hw_addr *addr;
+
+ addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
+ if (!addr)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&addr->entry);
+ addr->address = address;
+ addr->length = length;
+
+ list_add_tail(&addr->entry, &slave->addresses);
+ return 0;
+}
+
+/**
+ * fsl_dma_slave_free - free a struct fsl_dma_slave
+ * @slave: the struct fsl_dma_slave to free
+ *
+ * Free a struct fsl_dma_slave and all associated address/length pairs
+ */
+static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave)
+{
+ struct fsl_dma_hw_addr *addr, *tmp;
+
+ if (slave) {
+ list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) {
+ list_del(&addr->entry);
+ kfree(addr);
+ }
+
+ kfree(slave);
+ }
+}
+
+/**
+ * fsl_dma_slave_alloc - allocate a struct fsl_dma_slave
+ * @gfp: the flags to pass to kmalloc when allocating this structure
+ *
+ * Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new
+ * struct fsl_dma_slave on success, or NULL on failure.
+ */
+static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp)
+{
+ struct fsl_dma_slave *slave;
+
+ slave = kzalloc(sizeof(*slave), gfp);
+ if (!slave)
+ return NULL;
+
+ INIT_LIST_HEAD(&slave->addresses);
+ return slave;
+}
+
+#endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */
extern struct smp_ops_t *smp_ops;
extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
/* Definitions relative to the secondary CPU spin loop
* and entry point. Not all of them exist on both 32 and
#define parent_node(node) (node)
-static inline cpumask_t node_to_cpumask(int node)
-{
- return numa_cpumask_lookup_table[node];
-}
-
#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
int of_node_to_nid(struct device_node *device);
}
#endif
-#define pcibus_to_cpumask(bus) (pcibus_to_node(bus) == -1 ? \
- CPU_MASK_ALL : \
- node_to_cpumask(pcibus_to_node(bus)) \
- )
-
#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
cpu_all_mask : \
cpumask_of_node(pcibus_to_node(bus)))
#ifdef CONFIG_PPC64
#include <asm/smp.h>
-#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
-#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu))
#define topology_core_id(cpu) (cpu_to_core_id(cpu))
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
#include <linux/kexec.h>
#include <linux/smp.h>
#include <linux/thread_info.h>
+#include <linux/init_task.h>
#include <linux/errno.h>
#include <asm/page.h>
* We could use a smaller stack if we don't care about anything using
* current, but that audit has not been performed.
*/
-static union thread_union kexec_stack
- __attribute__((__section__(".data.init_task"))) = { };
+static union thread_union kexec_stack __init_task_data =
+ { };
/* Our assembly helper, in kexec_stub.S */
extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
#include <linux/seq_file.h>
#include <linux/ioport.h>
#include <linux/console.h>
-#include <linux/utsname.h>
#include <linux/screen_info.h>
#include <linux/root_dev.h>
#include <linux/notifier.h>
for (j = 0; j < nthreads && cpu < NR_CPUS; j++) {
DBG(" thread %d -> cpu %d (hard id %d)\n",
j, cpu, intserv[j]);
- cpu_set(cpu, cpu_present_map);
+ set_cpu_present(cpu, true);
set_hard_smp_processor_id(cpu, intserv[j]);
- cpu_set(cpu, cpu_possible_map);
+ set_cpu_possible(cpu, true);
cpu++;
}
}
maxcpus);
for (cpu = 0; cpu < maxcpus; cpu++)
- cpu_set(cpu, cpu_possible_map);
+ set_cpu_possible(cpu, true);
out:
of_node_put(dn);
}
smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
}
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
unsigned int cpu;
- for_each_cpu_mask(cpu, mask)
+ for_each_cpu(cpu, mask)
smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
{
BUG_ON(smp_processor_id() != boot_cpuid);
- cpu_set(boot_cpuid, cpu_online_map);
+ set_cpu_online(boot_cpuid, true);
cpu_set(boot_cpuid, per_cpu(cpu_sibling_map, boot_cpuid));
cpu_set(boot_cpuid, per_cpu(cpu_core_map, boot_cpuid));
#ifdef CONFIG_PPC64
if (cpu == boot_cpuid)
return -EBUSY;
- cpu_clear(cpu, cpu_online_map);
+ set_cpu_online(cpu, false);
#ifdef CONFIG_PPC64
vdso_data->processorCount--;
fixup_irqs(cpu_online_map);
smp_wmb();
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
cpu_relax();
- cpu_set(cpu, cpu_online_map);
+ set_cpu_online(cpu, true);
local_irq_enable();
}
#endif
ipi_call_lock();
notify_cpu_starting(cpu);
- cpu_set(cpu, cpu_online_map);
+ set_cpu_online(cpu, true);
/* Update sibling maps */
base = cpu_first_thread_in_core(cpu);
for (i = 0; i < threads_per_core; i++) {
#include <linux/signal.h>
#include <linux/resource.h>
#include <linux/times.h>
-#include <linux/utsname.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/sem.h>
+
/*
* Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
* <benh@kernel.crashing.org>
static union {
struct vdso_data data;
u8 page[PAGE_SIZE];
-} vdso_data_store __attribute__((__section__(".data.page_aligned")));
+} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = &vdso_data_store.data;
/* Format of the patch table */
EXTRA_CFLAGS := -shared -fno-common -fno-builtin
EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
EXTRA_AFLAGS := -D__VDSO32__ -s
obj-y += vdso32_wrapper.o
#include <linux/init.h>
+#include <linux/linkage.h>
#include <asm/page.h>
- .section ".data.page_aligned"
+ __PAGE_ALIGNED_DATA
.globl vdso32_start, vdso32_end
.balign PAGE_SIZE
EXTRA_CFLAGS := -shared -fno-common -fno-builtin
EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
EXTRA_AFLAGS := -D__VDSO64__ -s
obj-y += vdso64_wrapper.o
#include <linux/init.h>
+#include <linux/linkage.h>
#include <asm/page.h>
- .section ".data.page_aligned"
+ __PAGE_ALIGNED_DATA
.globl vdso64_start, vdso64_end
.balign PAGE_SIZE
if (ncpus > NR_CPUS)
ncpus = NR_CPUS;
for (i = 1; i < ncpus ; ++i)
- cpu_set(i, cpu_present_map);
+ set_cpu_present(i, true);
if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352);
int smp_core99_cpu_disable(void)
{
- cpu_clear(smp_processor_id(), cpu_online_map);
+ set_cpu_online(smp_processor_id(), false);
/* XXX reset cpu affinity here */
mpic_cpu_set_priority(0xf);
int cpu;
for (cpu = 1; cpu < 4 && cpu < NR_CPUS; ++cpu)
- cpu_set(cpu, cpu_possible_map);
+ set_cpu_possible(cpu, true);
smp_ops = &psurge_smp_ops;
}
#endif /* CONFIG_PPC32 */
{
int cpu = smp_processor_id();
- cpu_clear(cpu, cpu_online_map);
+ set_cpu_online(cpu, false);
vdso_data->processorCount--;
/*fix boot_cpuid here*/
for_each_cpu_mask(cpu, tmp) {
BUG_ON(cpu_isset(cpu, cpu_present_map));
- cpu_set(cpu, cpu_present_map);
+ set_cpu_present(cpu, true);
set_hard_smp_processor_id(cpu, *intserv++);
}
err = 0;
if (get_hard_smp_processor_id(cpu) != intserv[i])
continue;
BUG_ON(cpu_online(cpu));
- cpu_clear(cpu, cpu_present_map);
+ set_cpu_present(cpu, false);
set_hard_smp_processor_id(cpu, -1);
break;
}
* /proc entries (sysctl)
*/
static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
-static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+static int appldata_timer_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
static int appldata_interval_handler(ctl_table *ctl, int write,
- struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos);
* Start/Stop timer, show status of timer (0 = not active, 1 = active)
*/
static int
-appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_timer_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int len;
* current timer interval.
*/
static int
-appldata_interval_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_interval_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int len, interval;
* monitoring (0 = not in process, 1 = in process)
*/
static int
-appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_generic_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct appldata_ops *ops = NULL, *tmp_ops;
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install - same as make zlilo
extern int smp_cpu_polarization[];
extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#endif
extern cpumask_t cpu_core_map[NR_CPUS];
-#define topology_core_siblings(cpu) (cpu_core_map[cpu])
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
int topology_set_cpu_management(int fc);
#include <linux/signal.h>
#include <linux/resource.h>
#include <linux/times.h>
-#include <linux/utsname.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/sem.h>
* if debug_active is already off
*/
static int
-s390dbf_procactive(ctl_table *table, int write, struct file *filp,
+s390dbf_procactive(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
if (!write || debug_stoppable || !debug_active)
- return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ return proc_dointvec(table, write, buffer, lenp, ppos);
else
return 0;
}
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
#include <linux/init.h>
#include <linux/module.h>
#include <linux/notifier.h>
-#include <linux/utsname.h>
#include <linux/tick.h>
#include <linux/elfcore.h>
#include <linux/kernel_stat.h>
udelay(10);
}
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
int cpu;
- for_each_cpu_mask(cpu, mask)
+ for_each_cpu(cpu, mask)
smp_ext_bitcall(cpu, ec_call_function);
}
static union {
struct vdso_data data;
u8 page[PAGE_SIZE];
-} vdso_data_store __attribute__((__section__(".data.page_aligned")));
+} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = &vdso_data_store.data;
/*
KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
#include <linux/init.h>
+#include <linux/linkage.h>
#include <asm/page.h>
- .section ".data.page_aligned"
+ __PAGE_ALIGNED_DATA
.globl vdso32_start, vdso32_end
.balign PAGE_SIZE
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
#include <linux/init.h>
+#include <linux/linkage.h>
#include <asm/page.h>
- .section ".data.page_aligned"
+ __PAGE_ALIGNED_DATA
.globl vdso64_start, vdso64_end
.balign PAGE_SIZE
static struct ctl_table cmm_table[];
static int
-cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
+cmm_pages_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char buf[16], *p;
}
static int
-cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
+cmm_timeout_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char buf[64], *p;
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"), __aligned__(THREAD_SIZE))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
# User may have a custom install script
-if [ -x /sbin/installkernel ]; then
- exec /sbin/installkernel "$@"
+if [ -x /sbin/${INSTALLKERNEL} ]; then
+ exec /sbin/${INSTALLKERNEL} "$@"
fi
if [ "$2" = "zImage" ]; then
menu "DMA support"
-config SH_DMA_API
- bool
config SH_DMA
bool "SuperH on-chip DMA controller (DMAC) support"
depends on CPU_SH3 || CPU_SH4
- select SH_DMA_API
default n
config SH_DMA_IRQ_MULTI
CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \
CPU_SUBTYPE_SH7760
+config SH_DMA_API
+ depends on SH_DMA
+ bool "SuperH DMA API support"
+ default n
+ help
+ SH_DMA_API always enabled DMA API of used SuperH.
+ If you want to use DMA ENGINE, you must not enable this.
+ Please enable DMA_ENGINE and SH_DMAE.
+
config NR_ONCHIP_DMA_CHANNELS
int
depends on SH_DMA
# Makefile for the SuperH DMA specific kernel interface routines under Linux.
#
-obj-$(CONFIG_SH_DMA_API) += dma-api.o dma-sysfs.o
-obj-$(CONFIG_SH_DMA) += dma-sh.o
+obj-$(CONFIG_SH_DMA_API) += dma-sh.o dma-api.o dma-sysfs.o
obj-$(CONFIG_PVR2_DMA) += dma-pvr2.o
obj-$(CONFIG_G2_DMA) += dma-g2.o
obj-$(CONFIG_SH_DMABRG) += dmabrg.o
#define CHCR 0x0C
#define DMAOR 0x40
+/*
+ * for dma engine
+ *
+ * SuperH DMA mode
+ */
+#define SHDMA_MIX_IRQ (1 << 1)
+#define SHDMA_DMAOR1 (1 << 2)
+#define SHDMA_DMAE1 (1 << 3)
+
+struct sh_dmae_pdata {
+ unsigned int mode;
+};
+
#endif /* __DMA_SH_H */
void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
#else
#define cpu_to_node(cpu) ((void)(cpu),0)
#define parent_node(node) ((void)(node),0)
-#define node_to_cpumask(node) ((void)node, cpu_online_map)
#define cpumask_of_node(node) ((void)node, cpu_online_mask)
#define pcibus_to_node(bus) ((void)(bus), -1)
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
}
#ifdef CONFIG_IRQSTACKS
-static char softirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__section__(".bss.page_aligned")));
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
-static char hardirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__section__(".bss.page_aligned")));
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
/*
* allocate per-cpu stacks for hardirq and for softirq processing
#include <linux/syscalls.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/ipc.h>
#include <linux/stat.h>
#include <linux/mman.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/syscalls.h>
#include <linux/ipc.h>
#include <asm/uaccess.h>
export CPPFLAGS_vsyscall.lds += -P -C -Ush
vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
SYSCFLAGS_vsyscall-trapa.so = $(vsyscall-flags)
#KBUILD_CFLAGS += -g -pipe -fcall-used-g5 -fcall-used-g7
KBUILD_CFLAGS += -m32 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
KBUILD_AFLAGS += -m32
-CPPFLAGS_vmlinux.lds += -m32
#LDFLAGS_vmlinux = -N -Ttext 0xf0004000
# Since 2.5.40, the first stage is left not btfix-ed.
CHECKFLAGS += -D__sparc__ -D__sparc_v9__ -D__arch64__ -m64
-# Undefine sparc when processing vmlinux.lds - it is used
-# And teach CPP we are doing 64 bit builds (for this case)
-CPPFLAGS_vmlinux.lds += -m64 -Usparc
LDFLAGS := -m elf64_sparc
export BITS := 64
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
/*
* General functions that each host system must provide.
#define parent_node(node) (node)
-static inline cpumask_t node_to_cpumask(int node)
-{
- return numa_cpumask_lookup_table[node];
-}
#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
-/*
- * Returns a pointer to the cpumask of CPUs on Node 'node'.
- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
- */
-#define node_to_cpumask_ptr(v, node) \
- cpumask_t *v = &(numa_cpumask_lookup_table[node])
-
-#define node_to_cpumask_ptr_next(v, node) \
- v = &(numa_cpumask_lookup_table[node])
-
struct pci_bus;
#ifdef CONFIG_PCI
extern int pcibus_to_node(struct pci_bus *pbus);
#ifdef CONFIG_SMP
#define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id)
#define topology_core_id(cpu) (cpu_data(cpu).core_id)
-#define topology_core_siblings(cpu) (cpu_core_map[cpu])
-#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#define mc_capable() (sparc64_multi_core)
extra-y := head_$(BITS).o
extra-y += init_task.o
-extra-y += vmlinux.lds
+
+# Undefine sparc when processing vmlinux.lds - it is used
+# And teach CPP we are doing $(BITS) builds (for this case)
+CPPFLAGS_vmlinux.lds := -Usparc -m$(BITS)
+extra-y += vmlinux.lds
obj-$(CONFIG_SPARC32) += entry.o wof.o wuf.o
obj-$(CONFIG_SPARC32) += etrap_32.o
* If this is not aligned on a 8k boundry, then you should change code
* in etrap.S which assumes it.
*/
-union thread_union init_thread_union
- __attribute__((section (".data.init_task")))
- = { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
#include <linux/signal.h>
#include <linux/resource.h>
#include <linux/times.h>
-#include <linux/utsname.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/sem.h>
#include <linux/kernel.h>
#include <linux/types.h>
-#include <linux/utsname.h>
#include <asm/utrap.h>
#include <asm/signal.h>
+struct new_utsname;
+
extern asmlinkage unsigned long sys_getpagesize(void);
extern asmlinkage unsigned long sparc_brk(unsigned long brk);
extern asmlinkage long sparc_pipe(struct pt_regs *regs);
$(call cc-option, -fno-stack-protector,) \
$(call cc-option, -fno-stack-protector-all,)
-CONFIG_KERNEL_STACK_ORDER ?= 2
-STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] )
-
-CPPFLAGS_vmlinux.lds = -U$(SUBARCH) -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \
- -DELF_FORMAT="$(ELF_FORMAT)" -DKERNEL_STACK_SIZE=$(STACK_SIZE)
+# Options used by linker script
+export LDS_START := $(START)
+export LDS_ELF_ARCH := $(ELF_ARCH)
+export LDS_ELF_FORMAT := $(ELF_FORMAT)
# The wrappers will select whether using "malloc" or the kernel allocator.
LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
unsigned cpu = smp_processor_id();
if(prev != next){
- cpu_clear(cpu, prev->cpu_vm_mask);
- cpu_set(cpu, next->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ cpumask_set_cpu(cpu, mm_cpumask(next));
if(next != &init_mm)
__switch_mm(&next->context.id);
}
# Licensed under the GPL
#
+CPPFLAGS_vmlinux.lds := -U$(SUBARCH) -DSTART=$(LDS_START) \
+ -DELF_ARCH=$(LDS_ELF_ARCH) \
+ -DELF_FORMAT=$(LDS_ELF_FORMAT)
extra-y := vmlinux.lds
clean-files :=
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
union thread_union cpu0_irqstack
__attribute__((__section__(".data.init_irqstack"))) =
int i;
for (i = 0; i < ncpus; ++i)
- cpu_set(i, cpu_possible_map);
+ set_cpu_possible(i, true);
cpu_clear(me, cpu_online_map);
cpu_set(me, cpu_online_map);
+
+KERNEL_STACK_SIZE = 4096 * (1 << CONFIG_KERNEL_STACK_ORDER);
+
#ifdef CONFIG_LD_SCRIPT_STATIC
#include "uml.lds.S"
#else
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
echo ' install - Install kernel using'
- echo ' (your) ~/bin/installkernel or'
- echo ' (distribution) /sbin/installkernel or'
+ echo ' (your) ~/bin/$(INSTALLKERNEL) or'
+ echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
echo ' install to $$(INSTALL_PATH) and run lilo'
echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install - same as make zlilo
#ifndef _ASM_X86_CACHE_H
#define _ASM_X86_CACHE_H
+#include <linux/linkage.h>
+
/* L1 cache line size */
#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
#ifdef CONFIG_SMP
#define __cacheline_aligned_in_smp \
__attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \
- __attribute__((__section__(".data.page_aligned")))
+ __page_aligned_data
#endif
#endif
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
- cpu_clear(cpu, prev->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
#ifdef CONFIG_SMP
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
percpu_write(cpu_tlbstate.active_mm, next);
#endif
- cpu_set(cpu, next->cpu_vm_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
/* Re-load page tables */
load_cr3(next->pgd);
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
- if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+ if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
#define NMI_INVALID 3
struct ctl_table;
-struct file;
-extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
+extern int proc_nmi_enabled(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
extern int unknown_nmi_panic;
static inline const struct cpumask *
cpumask_of_pcibus(const struct pci_bus *bus)
{
- return cpumask_of_node(__pcibus_to_node(bus));
+ int node;
+
+ node = __pcibus_to_node(bus);
+ return (node == -1) ? cpu_online_mask :
+ cpumask_of_node(node);
}
#endif
smp_ops.send_call_func_single_ipi(cpu);
}
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
smp_ops.send_call_func_ipi(mask);
cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
if (cfg) {
- if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
+ if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
kfree(cfg);
cfg = NULL;
- } else if (!alloc_cpumask_var_node(&cfg->old_domain,
+ } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
GFP_ATOMIC, node)) {
free_cpumask_var(cfg->domain);
kfree(cfg);
cfg = NULL;
- } else {
- cpumask_clear(cfg->domain);
- cpumask_clear(cfg->old_domain);
}
}
/*
* proc handler for /proc/sys/kernel/nmi
*/
-int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+int proc_nmi_enabled(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
int old_state;
nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
old_state = nmi_watchdog_enabled;
- proc_dointvec(table, write, file, buffer, length, ppos);
+ proc_dointvec(table, write, buffer, length, ppos);
if (!!old_state == !!nmi_watchdog_enabled)
return 0;
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
-#include <linux/utsname.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
-#include <linux/utsname.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/module.h>
/*
* BSS section
*/
-.section ".bss.page_aligned","wa"
+__PAGE_ALIGNED_BSS
.align PAGE_SIZE_asm
#ifdef CONFIG_X86_PAE
swapper_pg_pmd:
* This starts the data section.
*/
#ifdef CONFIG_X86_PAE
-.section ".data.page_aligned","wa"
+__PAGE_ALIGNED_DATA
/* Page-aligned for the benefit of paravirt? */
.align PAGE_SIZE_asm
ENTRY(swapper_pg_dir)
ENTRY(idt_table)
.skip IDT_ENTRIES * 16
- .section .bss.page_aligned, "aw", @nobits
+ __PAGE_ALIGNED_BSS
.align PAGE_SIZE
ENTRY(empty_zero_page)
.skip PAGE_SIZE
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
#ifdef CONFIG_SMP
preempt_disable();
load_LDT(pc);
- if (!cpus_equal(current->mm->cpu_vm_mask,
- cpumask_of_cpu(smp_processor_id())))
+ if (!cpumask_equal(mm_cpumask(current->mm),
+ cpumask_of(smp_processor_id())))
smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else
void __init init_c1e_mask(void)
{
/* If we're using c1e_idle, we need to allocate c1e_mask. */
- if (pm_idle == c1e_idle) {
- alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
- cpumask_clear(c1e_mask);
- }
+ if (pm_idle == c1e_idle)
+ zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
}
static int __init idle_setup(char *str)
#endif
current_thread_info()->cpu = 0; /* needed? */
for_each_possible_cpu(i) {
- alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
- alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
- cpumask_clear(per_cpu(cpu_core_map, i));
- cpumask_clear(per_cpu(cpu_sibling_map, i));
- cpumask_clear(cpu_data(i).llc_shared_map);
+ zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
}
set_cpu_sibling_map(0);
void __init setup_default_timer_irq(void)
{
- irq0.mask = cpumask_of_cpu(0);
setup_irq(0, &irq0);
}
#include <linux/spinlock.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
-#include <linux/utsname.h>
#include <linux/kdebug.h>
#include <linux/kernel.h>
#include <linux/module.h>
}
#ifdef CONFIG_SYSCTL
-
-static int
-vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-}
-
static ctl_table kernel_table2[] = {
{ .procname = "vsyscall64",
.data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = vsyscall_sysctl_change },
+ .proc_handler = proc_dointvec },
{}
};
info.si_errno = 0;
info.si_code = si_code;
info.si_addr = (void __user *)address;
+ info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
force_sig_info(si_signo, &info, tsk);
}
}
static void
-do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+ unsigned int fault)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
+ int code = BUS_ADRERR;
up_read(&mm->mmap_sem);
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+#ifdef CONFIG_MEMORY_FAILURE
+ if (fault & VM_FAULT_HWPOISON) {
+ printk(KERN_ERR
+ "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+ tsk->comm, tsk->pid, address);
+ code = BUS_MCEERR_AR;
+ }
+#endif
+ force_sig_info_fault(SIGBUS, code, address, tsk);
}
static noinline void
if (fault & VM_FAULT_OOM) {
out_of_memory(regs, error_code, address);
} else {
- if (fault & VM_FAULT_SIGBUS)
- do_sigbus(regs, error_code, address);
+ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+ do_sigbus(regs, error_code, address, fault);
else
BUG();
}
mb();
}
+EXPORT_SYMBOL_GPL(clflush_cache_range);
static void __cpa_flush_all(void *arg)
{
{
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
BUG();
- cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
+ cpumask_clear_cpu(cpu,
+ mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
preempt_disable();
local_flush_tlb();
- if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
+ if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+ flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
preempt_enable();
}
else
leave_mm(smp_processor_id());
}
- if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
+ if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+ flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
preempt_enable();
}
leave_mm(smp_processor_id());
}
- if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(&mm->cpu_vm_mask, mm, va);
+ if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+ flush_tlb_others(mm_cpumask(mm), mm, va);
preempt_enable();
}
#else /* CONFIG_X86_32 */
-static unsigned char mp_bus_to_node[BUS_NR] = {
+static int mp_bus_to_node[BUS_NR] = {
[0 ... BUS_NR - 1] = -1
};
$(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
-VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
GCOV_PROFILE := n
#
/* Get the "official" set of cpus referring to our pagetable. */
if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
for_each_online_cpu(cpu) {
- if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
+ if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
&& per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
continue;
smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
}
return;
}
- cpumask_copy(mask, &mm->cpu_vm_mask);
+ cpumask_copy(mask, mm_cpumask(mm));
/* It's possible that a vcpu may have a stale reference to our
cr3, because its in lazy mode, and it hasn't yet flushed
-e 's/(\(\.text\.[a-z]*\))/(\1.literal \1)/g'
quiet_cmd__cpp_lds_S = LDS $@
- cmd__cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ $< | sed $(sed-y) >$@
+ cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ $< \
+ | sed $(sed-y) >$@
$(obj)/vmlinux.lds: $(src)/vmlinux.lds.S FORCE
$(call if_changed_dep,_cpp_lds_S)
* BSS section
*/
-.section ".bss.page_aligned", "w"
+__PAGE_ALIGNED_BSS
#ifdef CONFIG_MMU
ENTRY(swapper_pg_dir)
.fill PAGE_SIZE, 1, 0
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
-{ INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
struct task_struct init_task = INIT_TASK(init_task);
tristate
select ASYNC_CORE
+config ASYNC_PQ
+ tristate
+ select ASYNC_CORE
+
+config ASYNC_RAID6_RECOV
+ tristate
+ select ASYNC_CORE
+ select ASYNC_PQ
+
obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
obj-$(CONFIG_ASYNC_XOR) += async_xor.o
+obj-$(CONFIG_ASYNC_PQ) += async_pq.o
+obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
+obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o
* async_memcpy - attempt to copy memory with a dma engine.
* @dest: destination page
* @src: src page
- * @offset: offset in pages to start transaction
+ * @dest_offset: offset into 'dest' to start transaction
+ * @src_offset: offset into 'src' to start transaction
* @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
- * @depend_tx: memcpy depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
*/
struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
- unsigned int src_offset, size_t len, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+ unsigned int src_offset, size_t len,
+ struct async_submit_ctl *submit)
{
- struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY,
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
&dest, 1, &src, 1, len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
- if (device) {
+ if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
dma_addr_t dma_dest, dma_src;
- unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+ unsigned long dma_prep_flags = 0;
+ if (submit->cb_fn)
+ dma_prep_flags |= DMA_PREP_INTERRUPT;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_prep_flags |= DMA_PREP_FENCE;
dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
DMA_FROM_DEVICE);
if (tx) {
pr_debug("%s: (async) len: %zu\n", __func__, len);
- async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+ async_tx_submit(chan, tx, submit);
} else {
void *dest_buf, *src_buf;
pr_debug("%s: (sync) len: %zu\n", __func__, len);
/* wait for any prerequisite operations */
- async_tx_quiesce(&depend_tx);
+ async_tx_quiesce(&submit->depend_tx);
dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
src_buf = kmap_atomic(src, KM_USER1) + src_offset;
kunmap_atomic(dest_buf, KM_USER0);
kunmap_atomic(src_buf, KM_USER1);
- async_tx_sync_epilog(cb_fn, cb_param);
+ async_tx_sync_epilog(submit);
}
return tx;
}
EXPORT_SYMBOL_GPL(async_memcpy);
-static int __init async_memcpy_init(void)
-{
- return 0;
-}
-
-static void __exit async_memcpy_exit(void)
-{
- do { } while (0);
-}
-
-module_init(async_memcpy_init);
-module_exit(async_memcpy_exit);
-
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous memcpy api");
MODULE_LICENSE("GPL");
* @val: fill value
* @offset: offset in pages to start transaction
* @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: memset depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ *
+ * honored flags: ASYNC_TX_ACK
*/
struct dma_async_tx_descriptor *
-async_memset(struct page *dest, int val, unsigned int offset,
- size_t len, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+async_memset(struct page *dest, int val, unsigned int offset, size_t len,
+ struct async_submit_ctl *submit)
{
- struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET,
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
&dest, 1, NULL, 0, len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
- if (device) {
+ if (device && is_dma_fill_aligned(device, offset, 0, len)) {
dma_addr_t dma_dest;
- unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+ unsigned long dma_prep_flags = 0;
+ if (submit->cb_fn)
+ dma_prep_flags |= DMA_PREP_INTERRUPT;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_prep_flags |= DMA_PREP_FENCE;
dma_dest = dma_map_page(device->dev, dest, offset, len,
DMA_FROM_DEVICE);
if (tx) {
pr_debug("%s: (async) len: %zu\n", __func__, len);
- async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+ async_tx_submit(chan, tx, submit);
} else { /* run the memset synchronously */
void *dest_buf;
pr_debug("%s: (sync) len: %zu\n", __func__, len);
- dest_buf = (void *) (((char *) page_address(dest)) + offset);
+ dest_buf = page_address(dest) + offset;
/* wait for any prerequisite operations */
- async_tx_quiesce(&depend_tx);
+ async_tx_quiesce(&submit->depend_tx);
memset(dest_buf, val, len);
- async_tx_sync_epilog(cb_fn, cb_param);
+ async_tx_sync_epilog(submit);
}
return tx;
}
EXPORT_SYMBOL_GPL(async_memset);
-static int __init async_memset_init(void)
-{
- return 0;
-}
-
-static void __exit async_memset_exit(void)
-{
- do { } while (0);
-}
-
-module_init(async_memset_init);
-module_exit(async_memset_exit);
-
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous memset api");
MODULE_LICENSE("GPL");
--- /dev/null
+/*
+ * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com>
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+/**
+ * scribble - space to hold throwaway P buffer for synchronous gen_syndrome
+ */
+static struct page *scribble;
+
+static bool is_raid6_zero_block(struct page *p)
+{
+ return p == (void *) raid6_empty_zero_page;
+}
+
+/* the struct page *blocks[] parameter passed to async_gen_syndrome()
+ * and async_syndrome_val() contains the 'P' destination address at
+ * blocks[disks-2] and the 'Q' destination address at blocks[disks-1]
+ *
+ * note: these are macros as they are used as lvalues
+ */
+#define P(b, d) (b[d-2])
+#define Q(b, d) (b[d-1])
+
+/**
+ * do_async_gen_syndrome - asynchronously calculate P and/or Q
+ */
+static __async_inline struct dma_async_tx_descriptor *
+do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
+ const unsigned char *scfs, unsigned int offset, int disks,
+ size_t len, dma_addr_t *dma_src,
+ struct async_submit_ctl *submit)
+{
+ struct dma_async_tx_descriptor *tx = NULL;
+ struct dma_device *dma = chan->device;
+ enum dma_ctrl_flags dma_flags = 0;
+ enum async_tx_flags flags_orig = submit->flags;
+ dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+ dma_async_tx_callback cb_param_orig = submit->cb_param;
+ int src_cnt = disks - 2;
+ unsigned char coefs[src_cnt];
+ unsigned short pq_src_cnt;
+ dma_addr_t dma_dest[2];
+ int src_off = 0;
+ int idx;
+ int i;
+
+ /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
+ if (P(blocks, disks))
+ dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
+ len, DMA_BIDIRECTIONAL);
+ else
+ dma_flags |= DMA_PREP_PQ_DISABLE_P;
+ if (Q(blocks, disks))
+ dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
+ len, DMA_BIDIRECTIONAL);
+ else
+ dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+
+ /* convert source addresses being careful to collapse 'empty'
+ * sources and update the coefficients accordingly
+ */
+ for (i = 0, idx = 0; i < src_cnt; i++) {
+ if (is_raid6_zero_block(blocks[i]))
+ continue;
+ dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
+ DMA_TO_DEVICE);
+ coefs[idx] = scfs[i];
+ idx++;
+ }
+ src_cnt = idx;
+
+ while (src_cnt > 0) {
+ submit->flags = flags_orig;
+ pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
+ /* if we are submitting additional pqs, leave the chain open,
+ * clear the callback parameters, and leave the destination
+ * buffers mapped
+ */
+ if (src_cnt > pq_src_cnt) {
+ submit->flags &= ~ASYNC_TX_ACK;
+ submit->flags |= ASYNC_TX_FENCE;
+ dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
+ submit->cb_fn = NULL;
+ submit->cb_param = NULL;
+ } else {
+ dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
+ submit->cb_fn = cb_fn_orig;
+ submit->cb_param = cb_param_orig;
+ if (cb_fn_orig)
+ dma_flags |= DMA_PREP_INTERRUPT;
+ }
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
+
+ /* Since we have clobbered the src_list we are committed
+ * to doing this asynchronously. Drivers force forward
+ * progress in case they can not provide a descriptor
+ */
+ for (;;) {
+ tx = dma->device_prep_dma_pq(chan, dma_dest,
+ &dma_src[src_off],
+ pq_src_cnt,
+ &coefs[src_off], len,
+ dma_flags);
+ if (likely(tx))
+ break;
+ async_tx_quiesce(&submit->depend_tx);
+ dma_async_issue_pending(chan);
+ }
+
+ async_tx_submit(chan, tx, submit);
+ submit->depend_tx = tx;
+
+ /* drop completed sources */
+ src_cnt -= pq_src_cnt;
+ src_off += pq_src_cnt;
+
+ dma_flags |= DMA_PREP_CONTINUE;
+ }
+
+ return tx;
+}
+
+/**
+ * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
+ */
+static void
+do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+ size_t len, struct async_submit_ctl *submit)
+{
+ void **srcs;
+ int i;
+
+ if (submit->scribble)
+ srcs = submit->scribble;
+ else
+ srcs = (void **) blocks;
+
+ for (i = 0; i < disks; i++) {
+ if (is_raid6_zero_block(blocks[i])) {
+ BUG_ON(i > disks - 3); /* P or Q can't be zero */
+ srcs[i] = blocks[i];
+ } else
+ srcs[i] = page_address(blocks[i]) + offset;
+ }
+ raid6_call.gen_syndrome(disks, len, srcs);
+ async_tx_sync_epilog(submit);
+}
+
+/**
+ * async_gen_syndrome - asynchronously calculate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @submit: submission/completion modifiers
+ *
+ * General note: This routine assumes a field of GF(2^8) with a
+ * primitive polynomial of 0x11d and a generator of {02}.
+ *
+ * 'disks' note: callers can optionally omit either P or Q (but not
+ * both) from the calculation by setting blocks[disks-2] or
+ * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <=
+ * PAGE_SIZE as a temporary buffer of this size is used in the
+ * synchronous path. 'disks' always accounts for both destination
+ * buffers.
+ *
+ * 'blocks' note: if submit->scribble is NULL then the contents of
+ * 'blocks' may be overridden
+ */
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+ size_t len, struct async_submit_ctl *submit)
+{
+ int src_cnt = disks - 2;
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+ &P(blocks, disks), 2,
+ blocks, src_cnt, len);
+ struct dma_device *device = chan ? chan->device : NULL;
+ dma_addr_t *dma_src = NULL;
+
+ BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
+
+ if (submit->scribble)
+ dma_src = submit->scribble;
+ else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+ dma_src = (dma_addr_t *) blocks;
+
+ if (dma_src && device &&
+ (src_cnt <= dma_maxpq(device, 0) ||
+ dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
+ is_dma_pq_aligned(device, offset, 0, len)) {
+ /* run the p+q asynchronously */
+ pr_debug("%s: (async) disks: %d len: %zu\n",
+ __func__, disks, len);
+ return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
+ disks, len, dma_src, submit);
+ }
+
+ /* run the pq synchronously */
+ pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
+
+ /* wait for any prerequisite operations */
+ async_tx_quiesce(&submit->depend_tx);
+
+ if (!P(blocks, disks)) {
+ P(blocks, disks) = scribble;
+ BUG_ON(len + offset > PAGE_SIZE);
+ }
+ if (!Q(blocks, disks)) {
+ Q(blocks, disks) = scribble;
+ BUG_ON(len + offset > PAGE_SIZE);
+ }
+ do_sync_gen_syndrome(blocks, offset, disks, len, submit);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(async_gen_syndrome);
+
+/**
+ * async_syndrome_val - asynchronously validate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
+ * @spare: temporary result buffer for the synchronous case
+ * @submit: submission / completion modifiers
+ *
+ * The same notes from async_gen_syndrome apply to the 'blocks',
+ * and 'disks' parameters of this routine. The synchronous path
+ * requires a temporary result buffer and submit->scribble to be
+ * specified.
+ */
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
+ size_t len, enum sum_check_flags *pqres, struct page *spare,
+ struct async_submit_ctl *submit)
+{
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
+ NULL, 0, blocks, disks,
+ len);
+ struct dma_device *device = chan ? chan->device : NULL;
+ struct dma_async_tx_descriptor *tx;
+ enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+ dma_addr_t *dma_src = NULL;
+
+ BUG_ON(disks < 4);
+
+ if (submit->scribble)
+ dma_src = submit->scribble;
+ else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+ dma_src = (dma_addr_t *) blocks;
+
+ if (dma_src && device && disks <= dma_maxpq(device, 0) &&
+ is_dma_pq_aligned(device, offset, 0, len)) {
+ struct device *dev = device->dev;
+ dma_addr_t *pq = &dma_src[disks-2];
+ int i;
+
+ pr_debug("%s: (async) disks: %d len: %zu\n",
+ __func__, disks, len);
+ if (!P(blocks, disks))
+ dma_flags |= DMA_PREP_PQ_DISABLE_P;
+ if (!Q(blocks, disks))
+ dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
+ for (i = 0; i < disks; i++)
+ if (likely(blocks[i])) {
+ BUG_ON(is_raid6_zero_block(blocks[i]));
+ dma_src[i] = dma_map_page(dev, blocks[i],
+ offset, len,
+ DMA_TO_DEVICE);
+ }
+
+ for (;;) {
+ tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
+ disks - 2,
+ raid6_gfexp,
+ len, pqres,
+ dma_flags);
+ if (likely(tx))
+ break;
+ async_tx_quiesce(&submit->depend_tx);
+ dma_async_issue_pending(chan);
+ }
+ async_tx_submit(chan, tx, submit);
+
+ return tx;
+ } else {
+ struct page *p_src = P(blocks, disks);
+ struct page *q_src = Q(blocks, disks);
+ enum async_tx_flags flags_orig = submit->flags;
+ dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+ void *scribble = submit->scribble;
+ void *cb_param_orig = submit->cb_param;
+ void *p, *q, *s;
+
+ pr_debug("%s: (sync) disks: %d len: %zu\n",
+ __func__, disks, len);
+
+ /* caller must provide a temporary result buffer and
+ * allow the input parameters to be preserved
+ */
+ BUG_ON(!spare || !scribble);
+
+ /* wait for any prerequisite operations */
+ async_tx_quiesce(&submit->depend_tx);
+
+ /* recompute p and/or q into the temporary buffer and then
+ * check to see the result matches the current value
+ */
+ tx = NULL;
+ *pqres = 0;
+ if (p_src) {
+ init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+ NULL, NULL, scribble);
+ tx = async_xor(spare, blocks, offset, disks-2, len, submit);
+ async_tx_quiesce(&tx);
+ p = page_address(p_src) + offset;
+ s = page_address(spare) + offset;
+ *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
+ }
+
+ if (q_src) {
+ P(blocks, disks) = NULL;
+ Q(blocks, disks) = spare;
+ init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
+ tx = async_gen_syndrome(blocks, offset, disks, len, submit);
+ async_tx_quiesce(&tx);
+ q = page_address(q_src) + offset;
+ s = page_address(spare) + offset;
+ *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
+ }
+
+ /* restore P, Q and submit */
+ P(blocks, disks) = p_src;
+ Q(blocks, disks) = q_src;
+
+ submit->cb_fn = cb_fn_orig;
+ submit->cb_param = cb_param_orig;
+ submit->flags = flags_orig;
+ async_tx_sync_epilog(submit);
+
+ return NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(async_syndrome_val);
+
+static int __init async_pq_init(void)
+{
+ scribble = alloc_page(GFP_KERNEL);
+
+ if (scribble)
+ return 0;
+
+ pr_err("%s: failed to allocate required spare page\n", __func__);
+
+ return -ENOMEM;
+}
+
+static void __exit async_pq_exit(void)
+{
+ put_page(scribble);
+}
+
+module_init(async_pq_init);
+module_exit(async_pq_exit);
+
+MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation");
+MODULE_LICENSE("GPL");
--- /dev/null
+/*
+ * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * based on raid6recov.c:
+ * Copyright 2002 H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+static struct dma_async_tx_descriptor *
+async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
+ size_t len, struct async_submit_ctl *submit)
+{
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+ &dest, 1, srcs, 2, len);
+ struct dma_device *dma = chan ? chan->device : NULL;
+ const u8 *amul, *bmul;
+ u8 ax, bx;
+ u8 *a, *b, *c;
+
+ if (dma) {
+ dma_addr_t dma_dest[2];
+ dma_addr_t dma_src[2];
+ struct device *dev = dma->dev;
+ struct dma_async_tx_descriptor *tx;
+ enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
+ dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+ dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
+ dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
+ len, dma_flags);
+ if (tx) {
+ async_tx_submit(chan, tx, submit);
+ return tx;
+ }
+
+ /* could not get a descriptor, unmap and fall through to
+ * the synchronous path
+ */
+ dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
+ dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+ dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
+ }
+
+ /* run the operation synchronously */
+ async_tx_quiesce(&submit->depend_tx);
+ amul = raid6_gfmul[coef[0]];
+ bmul = raid6_gfmul[coef[1]];
+ a = page_address(srcs[0]);
+ b = page_address(srcs[1]);
+ c = page_address(dest);
+
+ while (len--) {
+ ax = amul[*a++];
+ bx = bmul[*b++];
+ *c++ = ax ^ bx;
+ }
+
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
+ struct async_submit_ctl *submit)
+{
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+ &dest, 1, &src, 1, len);
+ struct dma_device *dma = chan ? chan->device : NULL;
+ const u8 *qmul; /* Q multiplier table */
+ u8 *d, *s;
+
+ if (dma) {
+ dma_addr_t dma_dest[2];
+ dma_addr_t dma_src[1];
+ struct device *dev = dma->dev;
+ struct dma_async_tx_descriptor *tx;
+ enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
+ dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+ dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
+ len, dma_flags);
+ if (tx) {
+ async_tx_submit(chan, tx, submit);
+ return tx;
+ }
+
+ /* could not get a descriptor, unmap and fall through to
+ * the synchronous path
+ */
+ dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
+ dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+ }
+
+ /* no channel available, or failed to allocate a descriptor, so
+ * perform the operation synchronously
+ */
+ async_tx_quiesce(&submit->depend_tx);
+ qmul = raid6_gfmul[coef];
+ d = page_address(dest);
+ s = page_address(src);
+
+ while (len--)
+ *d++ = qmul[*s++];
+
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
+ struct async_submit_ctl *submit)
+{
+ struct dma_async_tx_descriptor *tx = NULL;
+ struct page *p, *q, *a, *b;
+ struct page *srcs[2];
+ unsigned char coef[2];
+ enum async_tx_flags flags = submit->flags;
+ dma_async_tx_callback cb_fn = submit->cb_fn;
+ void *cb_param = submit->cb_param;
+ void *scribble = submit->scribble;
+
+ p = blocks[4-2];
+ q = blocks[4-1];
+
+ a = blocks[faila];
+ b = blocks[failb];
+
+ /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
+ /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+ srcs[0] = p;
+ srcs[1] = q;
+ coef[0] = raid6_gfexi[failb-faila];
+ coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_sum_product(b, srcs, coef, bytes, submit);
+
+ /* Dy = P+Pxy+Dx */
+ srcs[0] = p;
+ srcs[1] = b;
+ init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn,
+ cb_param, scribble);
+ tx = async_xor(a, srcs, 0, 2, bytes, submit);
+
+ return tx;
+
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
+ struct async_submit_ctl *submit)
+{
+ struct dma_async_tx_descriptor *tx = NULL;
+ struct page *p, *q, *g, *dp, *dq;
+ struct page *srcs[2];
+ unsigned char coef[2];
+ enum async_tx_flags flags = submit->flags;
+ dma_async_tx_callback cb_fn = submit->cb_fn;
+ void *cb_param = submit->cb_param;
+ void *scribble = submit->scribble;
+ int uninitialized_var(good);
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ if (i == faila || i == failb)
+ continue;
+ else {
+ good = i;
+ break;
+ }
+ }
+ BUG_ON(i >= 3);
+
+ p = blocks[5-2];
+ q = blocks[5-1];
+ g = blocks[good];
+
+ /* Compute syndrome with zero for the missing data pages
+ * Use the dead data pages as temporary storage for delta p and
+ * delta q
+ */
+ dp = blocks[faila];
+ dq = blocks[failb];
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_memcpy(dp, g, 0, 0, bytes, submit);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+
+ /* compute P + Pxy */
+ srcs[0] = dp;
+ srcs[1] = p;
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
+ tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+ /* compute Q + Qxy */
+ srcs[0] = dq;
+ srcs[1] = q;
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
+ tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+ /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+ srcs[0] = dp;
+ srcs[1] = dq;
+ coef[0] = raid6_gfexi[failb-faila];
+ coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+ /* Dy = P+Pxy+Dx */
+ srcs[0] = dp;
+ srcs[1] = dq;
+ init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+ cb_param, scribble);
+ tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+ return tx;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_n(int disks, size_t bytes, int faila, int failb,
+ struct page **blocks, struct async_submit_ctl *submit)
+{
+ struct dma_async_tx_descriptor *tx = NULL;
+ struct page *p, *q, *dp, *dq;
+ struct page *srcs[2];
+ unsigned char coef[2];
+ enum async_tx_flags flags = submit->flags;
+ dma_async_tx_callback cb_fn = submit->cb_fn;
+ void *cb_param = submit->cb_param;
+ void *scribble = submit->scribble;
+
+ p = blocks[disks-2];
+ q = blocks[disks-1];
+
+ /* Compute syndrome with zero for the missing data pages
+ * Use the dead data pages as temporary storage for
+ * delta p and delta q
+ */
+ dp = blocks[faila];
+ blocks[faila] = (void *)raid6_empty_zero_page;
+ blocks[disks-2] = dp;
+ dq = blocks[failb];
+ blocks[failb] = (void *)raid6_empty_zero_page;
+ blocks[disks-1] = dq;
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+
+ /* Restore pointer table */
+ blocks[faila] = dp;
+ blocks[failb] = dq;
+ blocks[disks-2] = p;
+ blocks[disks-1] = q;
+
+ /* compute P + Pxy */
+ srcs[0] = dp;
+ srcs[1] = p;
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
+ tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+ /* compute Q + Qxy */
+ srcs[0] = dq;
+ srcs[1] = q;
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
+ tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+ /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+ srcs[0] = dp;
+ srcs[1] = dq;
+ coef[0] = raid6_gfexi[failb-faila];
+ coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+ /* Dy = P+Pxy+Dx */
+ srcs[0] = dp;
+ srcs[1] = dq;
+ init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+ cb_param, scribble);
+ tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+ return tx;
+}
+
+/**
+ * async_raid6_2data_recov - asynchronously calculate two missing data blocks
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: first failed drive index
+ * @failb: second failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+ struct page **blocks, struct async_submit_ctl *submit)
+{
+ BUG_ON(faila == failb);
+ if (failb < faila)
+ swap(faila, failb);
+
+ pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+ /* we need to preserve the contents of 'blocks' for the async
+ * case, so punt to synchronous if a scribble buffer is not available
+ */
+ if (!submit->scribble) {
+ void **ptrs = (void **) blocks;
+ int i;
+
+ async_tx_quiesce(&submit->depend_tx);
+ for (i = 0; i < disks; i++)
+ ptrs[i] = page_address(blocks[i]);
+
+ raid6_2data_recov(disks, bytes, faila, failb, ptrs);
+
+ async_tx_sync_epilog(submit);
+
+ return NULL;
+ }
+
+ switch (disks) {
+ case 4:
+ /* dma devices do not uniformly understand a zero source pq
+ * operation (in contrast to the synchronous case), so
+ * explicitly handle the 4 disk special case
+ */
+ return __2data_recov_4(bytes, faila, failb, blocks, submit);
+ case 5:
+ /* dma devices do not uniformly understand a single
+ * source pq operation (in contrast to the synchronous
+ * case), so explicitly handle the 5 disk special case
+ */
+ return __2data_recov_5(bytes, faila, failb, blocks, submit);
+ default:
+ return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
+ }
+}
+EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
+
+/**
+ * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_datap_recov(int disks, size_t bytes, int faila,
+ struct page **blocks, struct async_submit_ctl *submit)
+{
+ struct dma_async_tx_descriptor *tx = NULL;
+ struct page *p, *q, *dq;
+ u8 coef;
+ enum async_tx_flags flags = submit->flags;
+ dma_async_tx_callback cb_fn = submit->cb_fn;
+ void *cb_param = submit->cb_param;
+ void *scribble = submit->scribble;
+ struct page *srcs[2];
+
+ pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+ /* we need to preserve the contents of 'blocks' for the async
+ * case, so punt to synchronous if a scribble buffer is not available
+ */
+ if (!scribble) {
+ void **ptrs = (void **) blocks;
+ int i;
+
+ async_tx_quiesce(&submit->depend_tx);
+ for (i = 0; i < disks; i++)
+ ptrs[i] = page_address(blocks[i]);
+
+ raid6_datap_recov(disks, bytes, faila, ptrs);
+
+ async_tx_sync_epilog(submit);
+
+ return NULL;
+ }
+
+ p = blocks[disks-2];
+ q = blocks[disks-1];
+
+ /* Compute syndrome with zero for the missing data page
+ * Use the dead data page as temporary storage for delta q
+ */
+ dq = blocks[faila];
+ blocks[faila] = (void *)raid6_empty_zero_page;
+ blocks[disks-1] = dq;
+
+ /* in the 4 disk case we only need to perform a single source
+ * multiplication
+ */
+ if (disks == 4) {
+ int good = faila == 0 ? 1 : 0;
+ struct page *g = blocks[good];
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
+ tx = async_memcpy(p, g, 0, 0, bytes, submit);
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
+ tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+ } else {
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
+ tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+ }
+
+ /* Restore pointer table */
+ blocks[faila] = dq;
+ blocks[disks-1] = q;
+
+ /* calculate g^{-faila} */
+ coef = raid6_gfinv[raid6_gfexp[faila]];
+
+ srcs[0] = dq;
+ srcs[1] = q;
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
+ tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+ tx = async_mult(dq, dq, coef, bytes, submit);
+
+ srcs[0] = p;
+ srcs[1] = dq;
+ init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+ cb_param, scribble);
+ tx = async_xor(p, srcs, 0, 2, bytes, submit);
+
+ return tx;
+}
+EXPORT_SYMBOL_GPL(async_raid6_datap_recov);
+
+MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
+MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
+MODULE_LICENSE("GPL");
async_dmaengine_put();
}
+module_init(async_tx_init);
+module_exit(async_tx_exit);
+
/**
* __async_tx_find_channel - find a channel to carry out the operation or let
* the transaction execute synchronously
- * @depend_tx: transaction dependency
+ * @submit: transaction dependency and submission modifiers
* @tx_type: transaction type
*/
struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
- enum dma_transaction_type tx_type)
+__async_tx_find_channel(struct async_submit_ctl *submit,
+ enum dma_transaction_type tx_type)
{
+ struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
/* see if we can keep the chain on one channel */
if (depend_tx &&
dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
return async_dma_find_channel(tx_type);
}
EXPORT_SYMBOL_GPL(__async_tx_find_channel);
-#else
-static int __init async_tx_init(void)
-{
- printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
- return 0;
-}
-
-static void __exit async_tx_exit(void)
-{
- do { } while (0);
-}
#endif
async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
struct dma_async_tx_descriptor *tx)
{
- struct dma_chan *chan;
- struct dma_device *device;
+ struct dma_chan *chan = depend_tx->chan;
+ struct dma_device *device = chan->device;
struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
+ #ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+ BUG();
+ #endif
+
/* first check to see if we can still append to depend_tx */
spin_lock_bh(&depend_tx->lock);
if (depend_tx->parent && depend_tx->chan == tx->chan) {
}
spin_unlock_bh(&depend_tx->lock);
- if (!intr_tx)
+ /* attached dependency, flush the parent channel */
+ if (!intr_tx) {
+ device->device_issue_pending(chan);
return;
-
- chan = depend_tx->chan;
- device = chan->device;
+ }
/* see if we can schedule an interrupt
* otherwise poll for completion
intr_tx->tx_submit(intr_tx);
async_tx_ack(intr_tx);
}
+ device->device_issue_pending(chan);
} else {
if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
panic("%s: DMA_ERROR waiting for depend_tx\n",
/**
- * submit_disposition - while holding depend_tx->lock we must avoid submitting
- * new operations to prevent a circular locking dependency with
- * drivers that already hold a channel lock when calling
- * async_tx_run_dependencies.
+ * submit_disposition - flags for routing an incoming operation
* @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
* @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
* @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
+ *
+ * while holding depend_tx->lock we must avoid submitting new operations
+ * to prevent a circular locking dependency with drivers that already
+ * hold a channel lock when calling async_tx_run_dependencies.
*/
enum submit_disposition {
ASYNC_TX_SUBMITTED,
void
async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
- enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+ struct async_submit_ctl *submit)
{
- tx->callback = cb_fn;
- tx->callback_param = cb_param;
+ struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
+ tx->callback = submit->cb_fn;
+ tx->callback_param = submit->cb_param;
if (depend_tx) {
enum submit_disposition s;
tx->tx_submit(tx);
}
- if (flags & ASYNC_TX_ACK)
+ if (submit->flags & ASYNC_TX_ACK)
async_tx_ack(tx);
- if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
+ if (depend_tx)
async_tx_ack(depend_tx);
}
EXPORT_SYMBOL_GPL(async_tx_submit);
/**
- * async_trigger_callback - schedules the callback function to be run after
- * any dependent operations have been completed.
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: 'callback' requires the completion of this transaction
- * @cb_fn: function to call after depend_tx completes
- * @cb_param: parameter to pass to the callback routine
+ * async_trigger_callback - schedules the callback function to be run
+ * @submit: submission and completion parameters
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * The callback is run after any dependent operations have completed.
*/
struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+async_trigger_callback(struct async_submit_ctl *submit)
{
struct dma_chan *chan;
struct dma_device *device;
struct dma_async_tx_descriptor *tx;
+ struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
if (depend_tx) {
chan = depend_tx->chan;
if (tx) {
pr_debug("%s: (async)\n", __func__);
- async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+ async_tx_submit(chan, tx, submit);
} else {
pr_debug("%s: (sync)\n", __func__);
/* wait for any prerequisite operations */
- async_tx_quiesce(&depend_tx);
+ async_tx_quiesce(&submit->depend_tx);
- async_tx_sync_epilog(cb_fn, cb_param);
+ async_tx_sync_epilog(submit);
}
return tx;
}
EXPORT_SYMBOL_GPL(async_tx_quiesce);
-module_init(async_tx_init);
-module_exit(async_tx_exit);
-
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
MODULE_LICENSE("GPL");
/* do_async_xor - dma map the pages and perform the xor with an engine */
static __async_inline struct dma_async_tx_descriptor *
do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
- unsigned int offset, int src_cnt, size_t len,
- enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+ unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
+ struct async_submit_ctl *submit)
{
struct dma_device *dma = chan->device;
- dma_addr_t *dma_src = (dma_addr_t *) src_list;
struct dma_async_tx_descriptor *tx = NULL;
int src_off = 0;
int i;
- dma_async_tx_callback _cb_fn;
- void *_cb_param;
- enum async_tx_flags async_flags;
+ dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+ void *cb_param_orig = submit->cb_param;
+ enum async_tx_flags flags_orig = submit->flags;
enum dma_ctrl_flags dma_flags;
int xor_src_cnt;
dma_addr_t dma_dest;
}
while (src_cnt) {
- async_flags = flags;
+ submit->flags = flags_orig;
dma_flags = 0;
- xor_src_cnt = min(src_cnt, dma->max_xor);
+ xor_src_cnt = min(src_cnt, (int)dma->max_xor);
/* if we are submitting additional xors, leave the chain open,
* clear the callback parameters, and leave the destination
* buffer mapped
*/
if (src_cnt > xor_src_cnt) {
- async_flags &= ~ASYNC_TX_ACK;
+ submit->flags &= ~ASYNC_TX_ACK;
+ submit->flags |= ASYNC_TX_FENCE;
dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
- _cb_fn = NULL;
- _cb_param = NULL;
+ submit->cb_fn = NULL;
+ submit->cb_param = NULL;
} else {
- _cb_fn = cb_fn;
- _cb_param = cb_param;
+ submit->cb_fn = cb_fn_orig;
+ submit->cb_param = cb_param_orig;
}
- if (_cb_fn)
+ if (submit->cb_fn)
dma_flags |= DMA_PREP_INTERRUPT;
-
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
/* Since we have clobbered the src_list we are committed
* to doing this asynchronously. Drivers force forward progress
* in case they can not provide a descriptor
xor_src_cnt, len, dma_flags);
if (unlikely(!tx))
- async_tx_quiesce(&depend_tx);
+ async_tx_quiesce(&submit->depend_tx);
/* spin wait for the preceeding transactions to complete */
while (unlikely(!tx)) {
dma_flags);
}
- async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn,
- _cb_param);
-
- depend_tx = tx;
- flags |= ASYNC_TX_DEP_ACK;
+ async_tx_submit(chan, tx, submit);
+ submit->depend_tx = tx;
if (src_cnt > xor_src_cnt) {
/* drop completed sources */
static void
do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
- int src_cnt, size_t len, enum async_tx_flags flags,
- dma_async_tx_callback cb_fn, void *cb_param)
+ int src_cnt, size_t len, struct async_submit_ctl *submit)
{
int i;
int xor_src_cnt;
int src_off = 0;
void *dest_buf;
- void **srcs = (void **) src_list;
+ void **srcs;
+
+ if (submit->scribble)
+ srcs = submit->scribble;
+ else
+ srcs = (void **) src_list;
- /* reuse the 'src_list' array to convert to buffer pointers */
+ /* convert to buffer pointers */
for (i = 0; i < src_cnt; i++)
srcs[i] = page_address(src_list[i]) + offset;
/* set destination address */
dest_buf = page_address(dest) + offset;
- if (flags & ASYNC_TX_XOR_ZERO_DST)
+ if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
memset(dest_buf, 0, len);
while (src_cnt > 0) {
src_off += xor_src_cnt;
}
- async_tx_sync_epilog(cb_fn, cb_param);
+ async_tx_sync_epilog(submit);
}
/**
* async_xor - attempt to xor a set of blocks with a dma engine.
- * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
- * flag must be set to not include dest data in the calculation. The
- * assumption with dma eninges is that they only use the destination
- * buffer as a source when it is explicity specified in the source list.
* @dest: destination page
- * @src_list: array of source pages (if the dest is also a source it must be
- * at index zero). The contents of this array may be overwritten.
- * @offset: offset in pages to start transaction
+ * @src_list: array of source pages
+ * @offset: common src/dst offset to start transaction
* @src_cnt: number of source pages
* @len: length in bytes
- * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
- * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
+ *
+ * xor_blocks always uses the dest as a source so the
+ * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
+ * the calculation. The assumption with dma eninges is that they only
+ * use the destination buffer as a source when it is explicity specified
+ * in the source list.
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
*/
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
- int src_cnt, size_t len, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+ int src_cnt, size_t len, struct async_submit_ctl *submit)
{
- struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
&dest, 1, src_list,
src_cnt, len);
+ dma_addr_t *dma_src = NULL;
+
BUG_ON(src_cnt <= 1);
- if (chan) {
+ if (submit->scribble)
+ dma_src = submit->scribble;
+ else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+ dma_src = (dma_addr_t *) src_list;
+
+ if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
/* run the xor asynchronously */
pr_debug("%s (async): len: %zu\n", __func__, len);
return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
- flags, depend_tx, cb_fn, cb_param);
+ dma_src, submit);
} else {
/* run the xor synchronously */
pr_debug("%s (sync): len: %zu\n", __func__, len);
+ WARN_ONCE(chan, "%s: no space for dma address conversion\n",
+ __func__);
/* in the sync case the dest is an implied source
* (assumes the dest is the first source)
*/
- if (flags & ASYNC_TX_XOR_DROP_DST) {
+ if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
src_cnt--;
src_list++;
}
/* wait for any prerequisite operations */
- async_tx_quiesce(&depend_tx);
+ async_tx_quiesce(&submit->depend_tx);
- do_sync_xor(dest, src_list, offset, src_cnt, len,
- flags, cb_fn, cb_param);
+ do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
return NULL;
}
}
/**
- * async_xor_zero_sum - attempt a xor parity check with a dma engine.
+ * async_xor_val - attempt a xor parity check with a dma engine.
* @dest: destination page used if the xor is performed synchronously
- * @src_list: array of source pages. The dest page must be listed as a source
- * at index zero. The contents of this array may be overwritten.
+ * @src_list: array of source pages
* @offset: offset in pages to start transaction
* @src_cnt: number of source pages
* @len: length in bytes
* @result: 0 if sum == 0 else non-zero
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
*/
struct dma_async_tx_descriptor *
-async_xor_zero_sum(struct page *dest, struct page **src_list,
- unsigned int offset, int src_cnt, size_t len,
- u32 *result, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_param)
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+ int src_cnt, size_t len, enum sum_check_flags *result,
+ struct async_submit_ctl *submit)
{
- struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM,
+ struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
&dest, 1, src_list,
src_cnt, len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
+ dma_addr_t *dma_src = NULL;
BUG_ON(src_cnt <= 1);
- if (device && src_cnt <= device->max_xor) {
- dma_addr_t *dma_src = (dma_addr_t *) src_list;
- unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+ if (submit->scribble)
+ dma_src = submit->scribble;
+ else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+ dma_src = (dma_addr_t *) src_list;
+
+ if (dma_src && device && src_cnt <= device->max_xor &&
+ is_dma_xor_aligned(device, offset, 0, len)) {
+ unsigned long dma_prep_flags = 0;
int i;
pr_debug("%s: (async) len: %zu\n", __func__, len);
+ if (submit->cb_fn)
+ dma_prep_flags |= DMA_PREP_INTERRUPT;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_prep_flags |= DMA_PREP_FENCE;
for (i = 0; i < src_cnt; i++)
dma_src[i] = dma_map_page(device->dev, src_list[i],
offset, len, DMA_TO_DEVICE);
- tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt,
- len, result,
- dma_prep_flags);
+ tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
+ len, result,
+ dma_prep_flags);
if (unlikely(!tx)) {
- async_tx_quiesce(&depend_tx);
+ async_tx_quiesce(&submit->depend_tx);
while (!tx) {
dma_async_issue_pending(chan);
- tx = device->device_prep_dma_zero_sum(chan,
+ tx = device->device_prep_dma_xor_val(chan,
dma_src, src_cnt, len, result,
dma_prep_flags);
}
}
- async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+ async_tx_submit(chan, tx, submit);
} else {
- unsigned long xor_flags = flags;
+ enum async_tx_flags flags_orig = submit->flags;
pr_debug("%s: (sync) len: %zu\n", __func__, len);
+ WARN_ONCE(device && src_cnt <= device->max_xor,
+ "%s: no space for dma address conversion\n",
+ __func__);
- xor_flags |= ASYNC_TX_XOR_DROP_DST;
- xor_flags &= ~ASYNC_TX_ACK;
+ submit->flags |= ASYNC_TX_XOR_DROP_DST;
+ submit->flags &= ~ASYNC_TX_ACK;
- tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
- depend_tx, NULL, NULL);
+ tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
async_tx_quiesce(&tx);
- *result = page_is_zero(dest, offset, len) ? 0 : 1;
+ *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
- async_tx_sync_epilog(cb_fn, cb_param);
+ async_tx_sync_epilog(submit);
+ submit->flags = flags_orig;
}
return tx;
}
-EXPORT_SYMBOL_GPL(async_xor_zero_sum);
-
-static int __init async_xor_init(void)
-{
- #ifdef CONFIG_ASYNC_TX_DMA
- /* To conserve stack space the input src_list (array of page pointers)
- * is reused to hold the array of dma addresses passed to the driver.
- * This conversion is only possible when dma_addr_t is less than the
- * the size of a pointer. HIGHMEM64G is known to violate this
- * assumption.
- */
- BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *));
- #endif
-
- return 0;
-}
-
-static void __exit async_xor_exit(void)
-{
- do { } while (0);
-}
-
-module_init(async_xor_init);
-module_exit(async_xor_exit);
+EXPORT_SYMBOL_GPL(async_xor_val);
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
--- /dev/null
+/*
+ * asynchronous raid6 recovery self test
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * based on drivers/md/raid6test/test.c:
+ * Copyright 2002-2007 H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/async_tx.h>
+#include <linux/random.h>
+
+#undef pr
+#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
+
+#define NDISKS 16 /* Including P and Q */
+
+static struct page *dataptrs[NDISKS];
+static addr_conv_t addr_conv[NDISKS];
+static struct page *data[NDISKS+3];
+static struct page *spare;
+static struct page *recovi;
+static struct page *recovj;
+
+static void callback(void *param)
+{
+ struct completion *cmp = param;
+
+ complete(cmp);
+}
+
+static void makedata(int disks)
+{
+ int i, j;
+
+ for (i = 0; i < disks; i++) {
+ for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) {
+ u32 *p = page_address(data[i]) + j;
+
+ *p = random32();
+ }
+
+ dataptrs[i] = data[i];
+ }
+}
+
+static char disk_type(int d, int disks)
+{
+ if (d == disks - 2)
+ return 'P';
+ else if (d == disks - 1)
+ return 'Q';
+ else
+ return 'D';
+}
+
+/* Recover two failed blocks. */
+static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs)
+{
+ struct async_submit_ctl submit;
+ struct completion cmp;
+ struct dma_async_tx_descriptor *tx = NULL;
+ enum sum_check_flags result = ~0;
+
+ if (faila > failb)
+ swap(faila, failb);
+
+ if (failb == disks-1) {
+ if (faila == disks-2) {
+ /* P+Q failure. Just rebuild the syndrome. */
+ init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
+ tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
+ } else {
+ struct page *blocks[disks];
+ struct page *dest;
+ int count = 0;
+ int i;
+
+ /* data+Q failure. Reconstruct data from P,
+ * then rebuild syndrome
+ */
+ for (i = disks; i-- ; ) {
+ if (i == faila || i == failb)
+ continue;
+ blocks[count++] = ptrs[i];
+ }
+ dest = ptrs[faila];
+ init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+ NULL, NULL, addr_conv);
+ tx = async_xor(dest, blocks, 0, count, bytes, &submit);
+
+ init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv);
+ tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
+ }
+ } else {
+ if (failb == disks-2) {
+ /* data+P failure. */
+ init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
+ tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
+ } else {
+ /* data+data failure. */
+ init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
+ tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
+ }
+ }
+ init_completion(&cmp);
+ init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv);
+ tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit);
+ async_tx_issue_pending(tx);
+
+ if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0)
+ pr("%s: timeout! (faila: %d failb: %d disks: %d)\n",
+ __func__, faila, failb, disks);
+
+ if (result != 0)
+ pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n",
+ __func__, faila, failb, result);
+}
+
+static int test_disks(int i, int j, int disks)
+{
+ int erra, errb;
+
+ memset(page_address(recovi), 0xf0, PAGE_SIZE);
+ memset(page_address(recovj), 0xba, PAGE_SIZE);
+
+ dataptrs[i] = recovi;
+ dataptrs[j] = recovj;
+
+ raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs);
+
+ erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE);
+ errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE);
+
+ pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n",
+ __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks),
+ (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB");
+
+ dataptrs[i] = data[i];
+ dataptrs[j] = data[j];
+
+ return erra || errb;
+}
+
+static int test(int disks, int *tests)
+{
+ struct dma_async_tx_descriptor *tx;
+ struct async_submit_ctl submit;
+ struct completion cmp;
+ int err = 0;
+ int i, j;
+
+ recovi = data[disks];
+ recovj = data[disks+1];
+ spare = data[disks+2];
+
+ makedata(disks);
+
+ /* Nuke syndromes */
+ memset(page_address(data[disks-2]), 0xee, PAGE_SIZE);
+ memset(page_address(data[disks-1]), 0xee, PAGE_SIZE);
+
+ /* Generate assumed good syndrome */
+ init_completion(&cmp);
+ init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv);
+ tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit);
+ async_tx_issue_pending(tx);
+
+ if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) {
+ pr("error: initial gen_syndrome(%d) timed out\n", disks);
+ return 1;
+ }
+
+ pr("testing the %d-disk case...\n", disks);
+ for (i = 0; i < disks-1; i++)
+ for (j = i+1; j < disks; j++) {
+ (*tests)++;
+ err += test_disks(i, j, disks);
+ }
+
+ return err;
+}
+
+
+static int raid6_test(void)
+{
+ int err = 0;
+ int tests = 0;
+ int i;
+
+ for (i = 0; i < NDISKS+3; i++) {
+ data[i] = alloc_page(GFP_KERNEL);
+ if (!data[i]) {
+ while (i--)
+ put_page(data[i]);
+ return -ENOMEM;
+ }
+ }
+
+ /* the 4-disk and 5-disk cases are special for the recovery code */
+ if (NDISKS > 4)
+ err += test(4, &tests);
+ if (NDISKS > 5)
+ err += test(5, &tests);
+ err += test(NDISKS, &tests);
+
+ pr("\n");
+ pr("complete (%d tests, %d failure%s)\n",
+ tests, err, err == 1 ? "" : "s");
+
+ for (i = 0; i < NDISKS+3; i++)
+ put_page(data[i]);
+
+ return 0;
+}
+
+static void raid6_test_exit(void)
+{
+}
+
+/* when compiled-in wait for drivers to load first (assumes dma drivers
+ * are also compliled-in)
+ */
+late_initcall(raid6_test);
+module_exit(raid6_test_exit);
+MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
+MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests");
+MODULE_LICENSE("GPL");
.release = single_release,
};
+static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier);
+static struct acpi_device *lid_device;
+
/* --------------------------------------------------------------------------
FS Interface (/proc)
-------------------------------------------------------------------------- */
/* --------------------------------------------------------------------------
Driver Interface
-------------------------------------------------------------------------- */
+int acpi_lid_notifier_register(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&acpi_lid_notifier, nb);
+}
+EXPORT_SYMBOL(acpi_lid_notifier_register);
+
+int acpi_lid_notifier_unregister(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&acpi_lid_notifier, nb);
+}
+EXPORT_SYMBOL(acpi_lid_notifier_unregister);
+
+int acpi_lid_open(void)
+{
+ acpi_status status;
+ unsigned long long state;
+
+ status = acpi_evaluate_integer(lid_device->handle, "_LID", NULL,
+ &state);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ return !!state;
+}
+EXPORT_SYMBOL(acpi_lid_open);
+
static int acpi_lid_send_state(struct acpi_device *device)
{
struct acpi_button *button = acpi_driver_data(device);
unsigned long long state;
acpi_status status;
+ int ret;
status = acpi_evaluate_integer(device->handle, "_LID", NULL, &state);
if (ACPI_FAILURE(status))
/* input layer checks if event is redundant */
input_report_switch(button->input, SW_LID, !state);
input_sync(button->input);
- return 0;
+
+ ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
+ if (ret == NOTIFY_DONE)
+ ret = blocking_notifier_call_chain(&acpi_lid_notifier, state,
+ device);
+ return ret;
}
static void acpi_button_notify(struct acpi_device *device, u32 event)
error = input_register_device(input);
if (error)
goto err_remove_fs;
- if (button->type == ACPI_BUTTON_TYPE_LID)
+ if (button->type == ACPI_BUTTON_TYPE_LID) {
acpi_lid_send_state(device);
+ /*
+ * This assumes there's only one lid device, or if there are
+ * more we only care about the last one...
+ */
+ lid_device = device;
+ }
if (device->wakeup.flags.valid) {
/* Button's GPE is run-wake GPE */
static void bind_to_cpu0(struct work_struct *work)
{
- set_cpus_allowed(current, cpumask_of_cpu(0));
+ set_cpus_allowed_ptr(current, cpumask_of(0));
kfree(work);
}
struct acpi_processor *match_pr;
struct acpi_psd_package *match_pdomain;
- if (!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))
return -ENOMEM;
mutex_lock(&performance_mutex);
* Now that we have _PSD data from all CPUs, lets setup P-state
* domain info.
*/
- cpumask_clear(covered_cpus);
for_each_possible_cpu(i) {
pr = per_cpu(processors, i);
if (!pr)
struct acpi_tsd_package *pdomain, *match_pdomain;
struct acpi_processor_throttling *pthrottling, *match_pthrottling;
- if (!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))
return -ENOMEM;
/*
if (retval)
goto err_ret;
- cpumask_clear(covered_cpus);
for_each_possible_cpu(i) {
pr = per_cpu(processors, i);
if (!pr)
he_dev->rbps_base = pci_alloc_consistent(he_dev->pci_dev,
CONFIG_RBPS_SIZE * sizeof(struct he_rbp), &he_dev->rbps_phys);
if (he_dev->rbps_base == NULL) {
- hprintk("failed to alloc rbps\n");
- return -ENOMEM;
+ hprintk("failed to alloc rbps_base\n");
+ goto out_destroy_rbps_pool;
}
memset(he_dev->rbps_base, 0, CONFIG_RBPS_SIZE * sizeof(struct he_rbp));
he_dev->rbps_virt = kmalloc(CONFIG_RBPS_SIZE * sizeof(struct he_virt), GFP_KERNEL);
+ if (he_dev->rbps_virt == NULL) {
+ hprintk("failed to alloc rbps_virt\n");
+ goto out_free_rbps_base;
+ }
for (i = 0; i < CONFIG_RBPS_SIZE; ++i) {
dma_addr_t dma_handle;
cpuaddr = pci_pool_alloc(he_dev->rbps_pool, GFP_KERNEL|GFP_DMA, &dma_handle);
if (cpuaddr == NULL)
- return -ENOMEM;
+ goto out_free_rbps_virt;
he_dev->rbps_virt[i].virt = cpuaddr;
he_dev->rbps_base[i].status = RBP_LOANED | RBP_SMALLBUF | (i << RBP_INDEX_OFF);
CONFIG_RBPL_BUFSIZE, 8, 0);
if (he_dev->rbpl_pool == NULL) {
hprintk("unable to create rbpl pool\n");
- return -ENOMEM;
+ goto out_free_rbps_virt;
}
he_dev->rbpl_base = pci_alloc_consistent(he_dev->pci_dev,
CONFIG_RBPL_SIZE * sizeof(struct he_rbp), &he_dev->rbpl_phys);
if (he_dev->rbpl_base == NULL) {
- hprintk("failed to alloc rbpl\n");
- return -ENOMEM;
+ hprintk("failed to alloc rbpl_base\n");
+ goto out_destroy_rbpl_pool;
}
memset(he_dev->rbpl_base, 0, CONFIG_RBPL_SIZE * sizeof(struct he_rbp));
he_dev->rbpl_virt = kmalloc(CONFIG_RBPL_SIZE * sizeof(struct he_virt), GFP_KERNEL);
+ if (he_dev->rbpl_virt == NULL) {
+ hprintk("failed to alloc rbpl_virt\n");
+ goto out_free_rbpl_base;
+ }
for (i = 0; i < CONFIG_RBPL_SIZE; ++i) {
dma_addr_t dma_handle;
cpuaddr = pci_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL|GFP_DMA, &dma_handle);
if (cpuaddr == NULL)
- return -ENOMEM;
+ goto out_free_rbpl_virt;
he_dev->rbpl_virt[i].virt = cpuaddr;
he_dev->rbpl_base[i].status = RBP_LOANED | (i << RBP_INDEX_OFF);
CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq), &he_dev->rbrq_phys);
if (he_dev->rbrq_base == NULL) {
hprintk("failed to allocate rbrq\n");
- return -ENOMEM;
+ goto out_free_rbpl_virt;
}
memset(he_dev->rbrq_base, 0, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq));
CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq), &he_dev->tbrq_phys);
if (he_dev->tbrq_base == NULL) {
hprintk("failed to allocate tbrq\n");
- return -ENOMEM;
+ goto out_free_rbpq_base;
}
memset(he_dev->tbrq_base, 0, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq));
he_writel(he_dev, CONFIG_TBRQ_THRESH, G0_TBRQ_THRESH + (group * 16));
return 0;
+
+out_free_rbpq_base:
+ pci_free_consistent(he_dev->pci_dev, CONFIG_RBRQ_SIZE *
+ sizeof(struct he_rbrq), he_dev->rbrq_base,
+ he_dev->rbrq_phys);
+ i = CONFIG_RBPL_SIZE;
+out_free_rbpl_virt:
+ while (--i)
+ pci_pool_free(he_dev->rbps_pool, he_dev->rbpl_virt[i].virt,
+ he_dev->rbps_base[i].phys);
+ kfree(he_dev->rbpl_virt);
+
+out_free_rbpl_base:
+ pci_free_consistent(he_dev->pci_dev, CONFIG_RBPL_SIZE *
+ sizeof(struct he_rbp), he_dev->rbpl_base,
+ he_dev->rbpl_phys);
+out_destroy_rbpl_pool:
+ pci_pool_destroy(he_dev->rbpl_pool);
+
+ i = CONFIG_RBPL_SIZE;
+out_free_rbps_virt:
+ while (--i)
+ pci_pool_free(he_dev->rbpl_pool, he_dev->rbps_virt[i].virt,
+ he_dev->rbpl_base[i].phys);
+ kfree(he_dev->rbps_virt);
+
+out_free_rbps_base:
+ pci_free_consistent(he_dev->pci_dev, CONFIG_RBPS_SIZE *
+ sizeof(struct he_rbp), he_dev->rbps_base,
+ he_dev->rbps_phys);
+out_destroy_rbps_pool:
+ pci_pool_destroy(he_dev->rbps_pool);
+ return -ENOMEM;
}
static int __devinit
SOLOS_ATTR_RO(RSUnCorrectedErrorsUp)
SOLOS_ATTR_RO(InterleaveRDn)
SOLOS_ATTR_RO(InterleaveRUp)
+SOLOS_ATTR_RO(BisRDn)
+SOLOS_ATTR_RO(BisRUp)
+SOLOS_ATTR_RO(INPdown)
+SOLOS_ATTR_RO(INPup)
SOLOS_ATTR_RO(ShowtimeStart)
SOLOS_ATTR_RO(ATURVendor)
SOLOS_ATTR_RO(ATUCCountry)
SOLOS_ATTR_RW(LineMode)
SOLOS_ATTR_RW(Profile)
SOLOS_ATTR_RW(DetectNoise)
+SOLOS_ATTR_RW(BisAForceSNRMarginDn)
+SOLOS_ATTR_RW(BisMForceSNRMarginDn)
+SOLOS_ATTR_RW(BisAMaxMargin)
+SOLOS_ATTR_RW(BisMMaxMargin)
+SOLOS_ATTR_RW(AnnexAForceSNRMarginDn)
+SOLOS_ATTR_RW(AnnexAMaxMargin)
+SOLOS_ATTR_RW(AnnexMMaxMargin)
SOLOS_ATTR_RO(SupportedAnnexes)
SOLOS_ATTR_RO(Status)
SOLOS_ATTR_RO(TotalStart)
#define RX_DMA_ADDR(port) (0x30 + (4 * (port)))
#define DATA_RAM_SIZE 32768
-#define BUF_SIZE 4096
+#define BUF_SIZE 2048
+#define OLD_BUF_SIZE 4096 /* For FPGA versions <= 2*/
#define FPGA_PAGE 528 /* FPGA flash page size*/
#define SOLOS_PAGE 512 /* Solos flash page size*/
#define FPGA_BLOCK (FPGA_PAGE * 8) /* FPGA flash block size*/
#define SOLOS_BLOCK (SOLOS_PAGE * 8) /* Solos flash block size*/
-#define RX_BUF(card, nr) ((card->buffers) + (nr)*BUF_SIZE*2)
-#define TX_BUF(card, nr) ((card->buffers) + (nr)*BUF_SIZE*2 + BUF_SIZE)
+#define RX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2)
+#define TX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2 + (card->buffer_size))
+#define FLASH_BUF ((card->buffers) + 4*(card->buffer_size)*2)
#define RX_DMA_SIZE 2048
+#define FPGA_VERSION(a,b) (((a) << 8) + (b))
+#define LEGACY_BUFFERS 2
+#define DMA_SUPPORTED 4
+
static int reset = 0;
static int atmdebug = 0;
static int firmware_upgrade = 0;
static int fpga_upgrade = 0;
+static int db_firmware_upgrade = 0;
+static int db_fpga_upgrade = 0;
struct pkt_hdr {
__le16 size;
wait_queue_head_t param_wq;
wait_queue_head_t fw_wq;
int using_dma;
+ int fpga_version;
+ int buffer_size;
};
MODULE_PARM_DESC(atmdebug, "Print ATM data");
MODULE_PARM_DESC(firmware_upgrade, "Initiate Solos firmware upgrade");
MODULE_PARM_DESC(fpga_upgrade, "Initiate FPGA upgrade");
+MODULE_PARM_DESC(db_firmware_upgrade, "Initiate daughter board Solos firmware upgrade");
+MODULE_PARM_DESC(db_fpga_upgrade, "Initiate daughter board FPGA upgrade");
module_param(reset, int, 0444);
module_param(atmdebug, int, 0644);
module_param(firmware_upgrade, int, 0444);
module_param(fpga_upgrade, int, 0444);
+module_param(db_firmware_upgrade, int, 0444);
+module_param(db_fpga_upgrade, int, 0444);
static void fpga_queue(struct solos_card *card, int port, struct sk_buff *skb,
struct atm_vcc *vcc);
if (chip == 0) {
fw_name = "solos-FPGA.bin";
blocksize = FPGA_BLOCK;
- } else {
+ }
+
+ if (chip == 1) {
fw_name = "solos-Firmware.bin";
blocksize = SOLOS_BLOCK;
}
+
+ if (chip == 2){
+ if (card->fpga_version > LEGACY_BUFFERS){
+ fw_name = "solos-db-FPGA.bin";
+ blocksize = FPGA_BLOCK;
+ } else {
+ dev_info(&card->dev->dev, "FPGA version doesn't support daughter board upgrades\n");
+ return -EPERM;
+ }
+ }
+
+ if (chip == 3){
+ if (card->fpga_version > LEGACY_BUFFERS){
+ fw_name = "solos-Firmware.bin";
+ blocksize = SOLOS_BLOCK;
+ } else {
+ dev_info(&card->dev->dev, "FPGA version doesn't support daughter board upgrades\n");
+ return -EPERM;
+ }
+ }
if (request_firmware(&fw, fw_name, &card->dev->dev))
return -ENOENT;
data32 = ioread32(card->config_regs + FPGA_MODE);
/* Set mode to Chip Erase */
- dev_info(&card->dev->dev, "Set FPGA Flash mode to %s Chip Erase\n",
- chip?"Solos":"FPGA");
+ if(chip == 0 || chip == 2)
+ dev_info(&card->dev->dev, "Set FPGA Flash mode to FPGA Chip Erase\n");
+ if(chip == 1 || chip == 3)
+ dev_info(&card->dev->dev, "Set FPGA Flash mode to Solos Chip Erase\n");
iowrite32((chip * 2), card->config_regs + FLASH_MODE);
/* Copy block to buffer, swapping each 16 bits */
for(i = 0; i < blocksize; i += 4) {
uint32_t word = swahb32p((uint32_t *)(fw->data + offset + i));
- iowrite32(word, RX_BUF(card, 3) + i);
+ if(card->fpga_version > LEGACY_BUFFERS)
+ iowrite32(word, FLASH_BUF + i);
+ else
+ iowrite32(word, RX_BUF(card, 3) + i);
}
/* Specify block number and then trigger flash write */
memcpy_fromio(header, RX_BUF(card, port), sizeof(*header));
size = le16_to_cpu(header->size);
+ if (size > (card->buffer_size - sizeof(*header))){
+ dev_warn(&card->dev->dev, "Invalid buffer size\n");
+ continue;
+ }
skb = alloc_skb(size + 1, GFP_ATOMIC);
if (!skb) {
fpga_ver = (data32 & 0x0000FFFF);
major_ver = ((data32 & 0xFF000000) >> 24);
minor_ver = ((data32 & 0x00FF0000) >> 16);
+ card->fpga_version = FPGA_VERSION(major_ver,minor_ver);
+ if (card->fpga_version > LEGACY_BUFFERS)
+ card->buffer_size = BUF_SIZE;
+ else
+ card->buffer_size = OLD_BUF_SIZE;
dev_info(&dev->dev, "Solos FPGA Version %d.%02d svn-%d\n",
major_ver, minor_ver, fpga_ver);
- if (0 && fpga_ver > 27)
+ if (card->fpga_version >= DMA_SUPPORTED){
card->using_dma = 1;
- else {
+ } else {
+ card->using_dma = 0;
/* Set RX empty flag for all ports */
iowrite32(0xF0, card->config_regs + FLAGS_ADDR);
}
if (firmware_upgrade)
flash_upgrade(card, 1);
+ if (db_fpga_upgrade)
+ flash_upgrade(card, 2);
+
+ if (db_firmware_upgrade)
+ flash_upgrade(card, 3);
+
err = atm_init(card);
if (err)
goto out_free_irq;
return 0;
}
-static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp,
+static int cdrom_sysctl_info(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int pos;
goto done;
doit:
mutex_unlock(&cdrom_mutex);
- return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ return proc_dostring(ctl, write, buffer, lenp, ppos);
done:
printk(KERN_INFO "cdrom: info buffer too small\n");
goto doit;
mutex_unlock(&cdrom_mutex);
}
-static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp,
+static int cdrom_sysctl_handler(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write) {
The mmtimer device allows direct userspace access to the
Altix system timer.
+config UV_MMTIMER
+ tristate "UV_MMTIMER Memory mapped RTC for SGI UV"
+ depends on X86_UV
+ default m
+ help
+ The uv_mmtimer device allows direct userspace access to the
+ UV system timer.
+
source "drivers/char/tpm/Kconfig"
config TELCLOCK
obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o
obj-$(CONFIG_MSPEC) += mspec.o
obj-$(CONFIG_MMTIMER) += mmtimer.o
+obj-$(CONFIG_UV_MMTIMER) += uv_mmtimer.o
obj-$(CONFIG_VIOTAPE) += viotape.o
obj-$(CONFIG_HVCS) += hvcs.o
obj-$(CONFIG_IBM_BSR) += bsr.o
#define PCI_DEVICE_ID_INTEL_Q35_IG 0x29B2
#define PCI_DEVICE_ID_INTEL_Q33_HB 0x29D0
#define PCI_DEVICE_ID_INTEL_Q33_IG 0x29D2
+#define PCI_DEVICE_ID_INTEL_B43_HB 0x2E40
+#define PCI_DEVICE_ID_INTEL_B43_IG 0x2E42
#define PCI_DEVICE_ID_INTEL_GM45_HB 0x2A40
#define PCI_DEVICE_ID_INTEL_GM45_IG 0x2A42
#define PCI_DEVICE_ID_INTEL_IGD_E_HB 0x2E00
agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G45_HB || \
agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_GM45_HB || \
agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G41_HB || \
+ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_B43_HB || \
agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_D_HB || \
agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_M_HB || \
agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MA_HB)
if (!intel_private.i8xx_page)
return;
- /* make page uncached */
- map_page_into_agp(intel_private.i8xx_page);
-
intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page);
if (!intel_private.i8xx_flush_page)
intel_i830_fini_flush();
}
+static void
+do_wbinvd(void *null)
+{
+ wbinvd();
+}
+
+/* The chipset_flush interface needs to get data that has already been
+ * flushed out of the CPU all the way out to main memory, because the GPU
+ * doesn't snoop those buffers.
+ *
+ * The 8xx series doesn't have the same lovely interface for flushing the
+ * chipset write buffers that the later chips do. According to the 865
+ * specs, it's 64 octwords, or 1KB. So, to get those previous things in
+ * that buffer out, we just fill 1KB and clflush it out, on the assumption
+ * that it'll push whatever was in there out. It appears to work.
+ */
static void intel_i830_chipset_flush(struct agp_bridge_data *bridge)
{
unsigned int *pg = intel_private.i8xx_flush_page;
- int i;
- for (i = 0; i < 256; i += 2)
- *(pg + i) = i;
+ memset(pg, 0, 1024);
- wmb();
+ if (cpu_has_clflush) {
+ clflush_cache_range(pg, 1024);
+ } else {
+ if (on_each_cpu(do_wbinvd, NULL, 1) != 0)
+ printk(KERN_ERR "Timed out waiting for cache flush.\n");
+ }
}
/* The intel i830 automatically initializes the agp aperture during POST.
case PCI_DEVICE_ID_INTEL_Q45_HB:
case PCI_DEVICE_ID_INTEL_G45_HB:
case PCI_DEVICE_ID_INTEL_G41_HB:
+ case PCI_DEVICE_ID_INTEL_B43_HB:
case PCI_DEVICE_ID_INTEL_IGDNG_D_HB:
case PCI_DEVICE_ID_INTEL_IGDNG_M_HB:
case PCI_DEVICE_ID_INTEL_IGDNG_MA_HB:
"Q45/Q43", NULL, &intel_i965_driver },
{ PCI_DEVICE_ID_INTEL_G45_HB, PCI_DEVICE_ID_INTEL_G45_IG, 0,
"G45/G43", NULL, &intel_i965_driver },
+ { PCI_DEVICE_ID_INTEL_B43_HB, PCI_DEVICE_ID_INTEL_B43_IG, 0,
+ "B43", NULL, &intel_i965_driver },
{ PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, 0,
"G41", NULL, &intel_i965_driver },
{ PCI_DEVICE_ID_INTEL_IGDNG_D_HB, PCI_DEVICE_ID_INTEL_IGDNG_D_IG, 0,
ID(PCI_DEVICE_ID_INTEL_Q45_HB),
ID(PCI_DEVICE_ID_INTEL_G45_HB),
ID(PCI_DEVICE_ID_INTEL_G41_HB),
+ ID(PCI_DEVICE_ID_INTEL_B43_HB),
ID(PCI_DEVICE_ID_INTEL_IGDNG_D_HB),
ID(PCI_DEVICE_ID_INTEL_IGDNG_M_HB),
ID(PCI_DEVICE_ID_INTEL_IGDNG_MA_HB),
/*
* Blackfin On-Chip OTP Memory Interface
- * Supports BF52x/BF54x
*
- * Copyright 2007-2008 Analog Devices Inc.
+ * Copyright 2007-2009 Analog Devices Inc.
*
* Enter bugs at http://blackfin.uclinux.org/
*
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/types.h>
+#include <mtd/mtd-abi.h>
#include <asm/blackfin.h>
+#include <asm/bfrom.h>
#include <asm/uaccess.h>
#define stamp(fmt, args...) pr_debug("%s:%i: " fmt "\n", __func__, __LINE__, ## args)
static DEFINE_MUTEX(bfin_otp_lock);
-/* OTP Boot ROM functions */
-#define _BOOTROM_OTP_COMMAND 0xEF000018
-#define _BOOTROM_OTP_READ 0xEF00001A
-#define _BOOTROM_OTP_WRITE 0xEF00001C
-
-static u32 (* const otp_command)(u32 command, u32 value) = (void *)_BOOTROM_OTP_COMMAND;
-static u32 (* const otp_read)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_READ;
-static u32 (* const otp_write)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_WRITE;
-
-/* otp_command(): defines for "command" */
-#define OTP_INIT 0x00000001
-#define OTP_CLOSE 0x00000002
-
-/* otp_{read,write}(): defines for "flags" */
-#define OTP_LOWER_HALF 0x00000000 /* select upper/lower 64-bit half (bit 0) */
-#define OTP_UPPER_HALF 0x00000001
-#define OTP_NO_ECC 0x00000010 /* do not use ECC */
-#define OTP_LOCK 0x00000020 /* sets page protection bit for page */
-#define OTP_ACCESS_READ 0x00001000
-#define OTP_ACCESS_READWRITE 0x00002000
-
-/* Return values for all functions */
-#define OTP_SUCCESS 0x00000000
-#define OTP_MASTER_ERROR 0x001
-#define OTP_WRITE_ERROR 0x003
-#define OTP_READ_ERROR 0x005
-#define OTP_ACC_VIO_ERROR 0x009
-#define OTP_DATA_MULT_ERROR 0x011
-#define OTP_ECC_MULT_ERROR 0x021
-#define OTP_PREV_WR_ERROR 0x041
-#define OTP_DATA_SB_WARN 0x100
-#define OTP_ECC_SB_WARN 0x200
-
/**
* bfin_otp_read - Read OTP pages
*
page = *pos / (sizeof(u64) * 2);
while (bytes_done < count) {
flags = (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF);
- stamp("processing page %i (%s)", page, (flags == OTP_UPPER_HALF ? "upper" : "lower"));
- ret = otp_read(page, flags, &content);
+ stamp("processing page %i (0x%x:%s)", page, flags,
+ (flags & OTP_UPPER_HALF ? "upper" : "lower"));
+ ret = bfrom_OtpRead(page, flags, &content);
if (ret & OTP_MASTER_ERROR) {
+ stamp("error from otp: 0x%x", ret);
bytes_done = -EIO;
break;
}
bytes_done = -EFAULT;
break;
}
- if (flags == OTP_UPPER_HALF)
+ if (flags & OTP_UPPER_HALF)
++page;
bytes_done += sizeof(content);
*pos += sizeof(content);
}
#ifdef CONFIG_BFIN_OTP_WRITE_ENABLE
+static bool allow_writes;
+
+/**
+ * bfin_otp_init_timing - setup OTP timing parameters
+ *
+ * Required before doing any write operation. Algorithms from HRM.
+ */
+static u32 bfin_otp_init_timing(void)
+{
+ u32 tp1, tp2, tp3, timing;
+
+ tp1 = get_sclk() / 1000000;
+ tp2 = (2 * get_sclk() / 10000000) << 8;
+ tp3 = (0x1401) << 15;
+ timing = tp1 | tp2 | tp3;
+ if (bfrom_OtpCommand(OTP_INIT, timing))
+ return 0;
+
+ return timing;
+}
+
+/**
+ * bfin_otp_deinit_timing - set timings to only allow reads
+ *
+ * Should be called after all writes are done.
+ */
+static void bfin_otp_deinit_timing(u32 timing)
+{
+ /* mask bits [31:15] so that any attempts to write fail */
+ bfrom_OtpCommand(OTP_CLOSE, 0);
+ bfrom_OtpCommand(OTP_INIT, timing & ~(-1 << 15));
+ bfrom_OtpCommand(OTP_CLOSE, 0);
+}
+
/**
- * bfin_otp_write - Write OTP pages
+ * bfin_otp_write - write OTP pages
*
* All writes must be in half page chunks (half page == 64 bits).
*/
static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t count, loff_t *pos)
{
- stampit();
+ ssize_t bytes_done;
+ u32 timing, page, base_flags, flags, ret;
+ u64 content;
+
+ if (!allow_writes)
+ return -EACCES;
if (count % sizeof(u64))
return -EMSGSIZE;
if (mutex_lock_interruptible(&bfin_otp_lock))
return -ERESTARTSYS;
- /* need otp_init() documentation before this can be implemented */
+ stampit();
+
+ timing = bfin_otp_init_timing();
+ if (timing == 0) {
+ mutex_unlock(&bfin_otp_lock);
+ return -EIO;
+ }
+
+ base_flags = OTP_CHECK_FOR_PREV_WRITE;
+
+ bytes_done = 0;
+ page = *pos / (sizeof(u64) * 2);
+ while (bytes_done < count) {
+ flags = base_flags | (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF);
+ stamp("processing page %i (0x%x:%s) from %p", page, flags,
+ (flags & OTP_UPPER_HALF ? "upper" : "lower"), buff + bytes_done);
+ if (copy_from_user(&content, buff + bytes_done, sizeof(content))) {
+ bytes_done = -EFAULT;
+ break;
+ }
+ ret = bfrom_OtpWrite(page, flags, &content);
+ if (ret & OTP_MASTER_ERROR) {
+ stamp("error from otp: 0x%x", ret);
+ bytes_done = -EIO;
+ break;
+ }
+ if (flags & OTP_UPPER_HALF)
+ ++page;
+ bytes_done += sizeof(content);
+ *pos += sizeof(content);
+ }
+
+ bfin_otp_deinit_timing(timing);
mutex_unlock(&bfin_otp_lock);
+ return bytes_done;
+}
+
+static long bfin_otp_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
+{
+ stampit();
+
+ switch (cmd) {
+ case OTPLOCK: {
+ u32 timing;
+ int ret = -EIO;
+
+ if (!allow_writes)
+ return -EACCES;
+
+ if (mutex_lock_interruptible(&bfin_otp_lock))
+ return -ERESTARTSYS;
+
+ timing = bfin_otp_init_timing();
+ if (timing) {
+ u32 otp_result = bfrom_OtpWrite(arg, OTP_LOCK, NULL);
+ stamp("locking page %lu resulted in 0x%x", arg, otp_result);
+ if (!(otp_result & OTP_MASTER_ERROR))
+ ret = 0;
+
+ bfin_otp_deinit_timing(timing);
+ }
+
+ mutex_unlock(&bfin_otp_lock);
+
+ return ret;
+ }
+
+ case MEMLOCK:
+ allow_writes = false;
+ return 0;
+
+ case MEMUNLOCK:
+ allow_writes = true;
+ return 0;
+ }
+
return -EINVAL;
}
#else
# define bfin_otp_write NULL
+# define bfin_otp_ioctl NULL
#endif
static struct file_operations bfin_otp_fops = {
- .owner = THIS_MODULE,
- .read = bfin_otp_read,
- .write = bfin_otp_write,
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = bfin_otp_ioctl,
+ .read = bfin_otp_read,
+ .write = bfin_otp_write,
};
static struct miscdevice bfin_otp_misc_device = {
unsigned long m, t;
t = devp->hd_ireqfreq;
- m = read_counter(&devp->hd_hpet->hpet_mc);
- write_counter(t + m + devp->hd_hpets->hp_delta,
- &devp->hd_timer->hpet_compare);
+ m = read_counter(&devp->hd_timer->hpet_compare);
+ write_counter(t + m, &devp->hd_timer->hpet_compare);
}
if (devp->hd_flags & HPET_SHARED_IRQ)
g = v | Tn_32MODE_CNF_MASK | Tn_INT_ENB_CNF_MASK;
if (devp->hd_flags & HPET_PERIODIC) {
- write_counter(t, &timer->hpet_compare);
g |= Tn_TYPE_CNF_MASK;
- v |= Tn_TYPE_CNF_MASK;
- writeq(v, &timer->hpet_config);
- v |= Tn_VAL_SET_CNF_MASK;
+ v |= Tn_TYPE_CNF_MASK | Tn_VAL_SET_CNF_MASK;
writeq(v, &timer->hpet_config);
local_irq_save(flags);
- /* NOTE: what we modify here is a hidden accumulator
+ /*
+ * NOTE: First we modify the hidden accumulator
* register supported by periodic-capable comparators.
* We never want to modify the (single) counter; that
- * would affect all the comparators.
+ * would affect all the comparators. The value written
+ * is the counter value when the first interrupt is due.
*/
m = read_counter(&hpet->hpet_mc);
write_counter(t + m + hpetp->hp_delta, &timer->hpet_compare);
+ /*
+ * Then we modify the comparator, indicating the period
+ * for subsequent interrupt.
+ */
+ write_counter(t, &timer->hpet_compare);
} else {
local_irq_save(flags);
m = read_counter(&hpet->hpet_mc);
if (chunk > PAGE_SIZE)
chunk = PAGE_SIZE; /* Just for latency reasons */
- unwritten = clear_user(buf, chunk);
+ unwritten = __clear_user(buf, chunk);
written += chunk - unwritten;
if (unwritten)
break;
case IOCTL_MW_REGISTER_IPC: {
unsigned int ipcnum = (unsigned int) ioarg;
- PRINTK_3(TRACE_MWAVE,
- "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
- " ipcnum %x entry usIntCount %x\n",
- ipcnum,
- pDrvData->IPCs[ipcnum].usIntCount);
-
if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) {
PRINTK_ERROR(KERN_ERR_MWAVE
"mwavedd::mwave_ioctl:"
ipcnum);
return -EINVAL;
}
+ PRINTK_3(TRACE_MWAVE,
+ "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
+ " ipcnum %x entry usIntCount %x\n",
+ ipcnum,
+ pDrvData->IPCs[ipcnum].usIntCount);
+
lock_kernel();
pDrvData->IPCs[ipcnum].bIsHere = FALSE;
pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
case IOCTL_MW_GET_IPC: {
unsigned int ipcnum = (unsigned int) ioarg;
- PRINTK_3(TRACE_MWAVE,
- "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC"
- " ipcnum %x, usIntCount %x\n",
- ipcnum,
- pDrvData->IPCs[ipcnum].usIntCount);
if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) {
PRINTK_ERROR(KERN_ERR_MWAVE
"mwavedd::mwave_ioctl:"
" Invalid ipcnum %x\n", ipcnum);
return -EINVAL;
}
+ PRINTK_3(TRACE_MWAVE,
+ "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC"
+ " ipcnum %x, usIntCount %x\n",
+ ipcnum,
+ pDrvData->IPCs[ipcnum].usIntCount);
lock_kernel();
if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
* as an ASCII string in the standard UUID format. If accesses via the
* sysctl system call, it is returned as 16 bytes of binary data.
*/
-static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
+static int proc_do_uuid(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
ctl_table fake_table;
fake_table.data = buf;
fake_table.maxlen = sizeof(buf);
- return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos);
+ return proc_dostring(&fake_table, write, buffer, lenp, ppos);
}
static int uuid_strategy(ctl_table *table,
/*
** It is important that the product code is an unsigned object!
*/
- if (DownLoad.ProductCode > MAX_PRODUCT) {
+ if (DownLoad.ProductCode >= MAX_PRODUCT) {
rio_dprintk(RIO_DEBUG_CTRL, "RIO_DOWNLOAD: Bad product code %d passed\n", DownLoad.ProductCode);
p->RIOError.Error = NO_SUCH_PRODUCT;
return -ENXIO;
* the module usage count.
*/
#define TPM_ORD_PCR_EXTEND cpu_to_be32(20)
-#define EXTEND_PCR_SIZE 34
+#define EXTEND_PCR_RESULT_SIZE 34
static struct tpm_input_header pcrextend_header = {
.tag = TPM_TAG_RQU_COMMAND,
.length = cpu_to_be32(34),
return -ENODEV;
cmd.header.in = pcrextend_header;
- BUG_ON(be32_to_cpu(cmd.header.in.length) > EXTEND_PCR_SIZE);
cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
- rc = transmit_cmd(chip, &cmd, cmd.header.in.length,
+ rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
"attempting extend a PCR value");
module_put(chip->dev->driver->owner);
--- /dev/null
+/*
+ * Timer device implementation for SGI UV platform.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2009 Silicon Graphics, Inc. All rights reserved.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mmtimer.h>
+#include <linux/miscdevice.h>
+#include <linux/posix-timers.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/smp_lock.h>
+
+#include <asm/genapic.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+
+MODULE_AUTHOR("Dimitri Sivanich <sivanich@sgi.com>");
+MODULE_DESCRIPTION("SGI UV Memory Mapped RTC Timer");
+MODULE_LICENSE("GPL");
+
+/* name of the device, usually in /dev */
+#define UV_MMTIMER_NAME "mmtimer"
+#define UV_MMTIMER_DESC "SGI UV Memory Mapped RTC Timer"
+#define UV_MMTIMER_VERSION "1.0"
+
+static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg);
+static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma);
+
+/*
+ * Period in femtoseconds (10^-15 s)
+ */
+static unsigned long uv_mmtimer_femtoperiod;
+
+static const struct file_operations uv_mmtimer_fops = {
+ .owner = THIS_MODULE,
+ .mmap = uv_mmtimer_mmap,
+ .unlocked_ioctl = uv_mmtimer_ioctl,
+};
+
+/**
+ * uv_mmtimer_ioctl - ioctl interface for /dev/uv_mmtimer
+ * @file: file structure for the device
+ * @cmd: command to execute
+ * @arg: optional argument to command
+ *
+ * Executes the command specified by @cmd. Returns 0 for success, < 0 for
+ * failure.
+ *
+ * Valid commands:
+ *
+ * %MMTIMER_GETOFFSET - Should return the offset (relative to the start
+ * of the page where the registers are mapped) for the counter in question.
+ *
+ * %MMTIMER_GETRES - Returns the resolution of the clock in femto (10^-15)
+ * seconds
+ *
+ * %MMTIMER_GETFREQ - Copies the frequency of the clock in Hz to the address
+ * specified by @arg
+ *
+ * %MMTIMER_GETBITS - Returns the number of bits in the clock's counter
+ *
+ * %MMTIMER_MMAPAVAIL - Returns 1 if registers can be mmap'd into userspace
+ *
+ * %MMTIMER_GETCOUNTER - Gets the current value in the counter and places it
+ * in the address specified by @arg.
+ */
+static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret = 0;
+
+ switch (cmd) {
+ case MMTIMER_GETOFFSET: /* offset of the counter */
+ /*
+ * UV RTC register is on its own page
+ */
+ if (PAGE_SIZE <= (1 << 16))
+ ret = ((UV_LOCAL_MMR_BASE | UVH_RTC) & (PAGE_SIZE-1))
+ / 8;
+ else
+ ret = -ENOSYS;
+ break;
+
+ case MMTIMER_GETRES: /* resolution of the clock in 10^-15 s */
+ if (copy_to_user((unsigned long __user *)arg,
+ &uv_mmtimer_femtoperiod, sizeof(unsigned long)))
+ ret = -EFAULT;
+ break;
+
+ case MMTIMER_GETFREQ: /* frequency in Hz */
+ if (copy_to_user((unsigned long __user *)arg,
+ &sn_rtc_cycles_per_second,
+ sizeof(unsigned long)))
+ ret = -EFAULT;
+ break;
+
+ case MMTIMER_GETBITS: /* number of bits in the clock */
+ ret = hweight64(UVH_RTC_REAL_TIME_CLOCK_MASK);
+ break;
+
+ case MMTIMER_MMAPAVAIL: /* can we mmap the clock into userspace? */
+ ret = (PAGE_SIZE <= (1 << 16)) ? 1 : 0;
+ break;
+
+ case MMTIMER_GETCOUNTER:
+ if (copy_to_user((unsigned long __user *)arg,
+ (unsigned long *)uv_local_mmr_address(UVH_RTC),
+ sizeof(unsigned long)))
+ ret = -EFAULT;
+ break;
+ default:
+ ret = -ENOTTY;
+ break;
+ }
+ return ret;
+}
+
+/**
+ * uv_mmtimer_mmap - maps the clock's registers into userspace
+ * @file: file structure for the device
+ * @vma: VMA to map the registers into
+ *
+ * Calls remap_pfn_range() to map the clock's registers into
+ * the calling process' address space.
+ */
+static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long uv_mmtimer_addr;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (vma->vm_flags & VM_WRITE)
+ return -EPERM;
+
+ if (PAGE_SIZE > (1 << 16))
+ return -ENOSYS;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ uv_mmtimer_addr = UV_LOCAL_MMR_BASE | UVH_RTC;
+ uv_mmtimer_addr &= ~(PAGE_SIZE - 1);
+ uv_mmtimer_addr &= 0xfffffffffffffffUL;
+
+ if (remap_pfn_range(vma, vma->vm_start, uv_mmtimer_addr >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ printk(KERN_ERR "remap_pfn_range failed in uv_mmtimer_mmap\n");
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static struct miscdevice uv_mmtimer_miscdev = {
+ MISC_DYNAMIC_MINOR,
+ UV_MMTIMER_NAME,
+ &uv_mmtimer_fops
+};
+
+
+/**
+ * uv_mmtimer_init - device initialization routine
+ *
+ * Does initial setup for the uv_mmtimer device.
+ */
+static int __init uv_mmtimer_init(void)
+{
+ if (!is_uv_system()) {
+ printk(KERN_ERR "%s: Hardware unsupported\n", UV_MMTIMER_NAME);
+ return -1;
+ }
+
+ /*
+ * Sanity check the cycles/sec variable
+ */
+ if (sn_rtc_cycles_per_second < 100000) {
+ printk(KERN_ERR "%s: unable to determine clock frequency\n",
+ UV_MMTIMER_NAME);
+ return -1;
+ }
+
+ uv_mmtimer_femtoperiod = ((unsigned long)1E15 +
+ sn_rtc_cycles_per_second / 2) /
+ sn_rtc_cycles_per_second;
+
+ if (misc_register(&uv_mmtimer_miscdev)) {
+ printk(KERN_ERR "%s: failed to register device\n",
+ UV_MMTIMER_NAME);
+ return -1;
+ }
+
+ printk(KERN_INFO "%s: v%s, %ld MHz\n", UV_MMTIMER_DESC,
+ UV_MMTIMER_VERSION,
+ sn_rtc_cycles_per_second/(unsigned long)1E6);
+
+ return 0;
+}
+
+module_init(uv_mmtimer_init);
#include <linux/device.h>
#include <linux/dca.h>
-#define DCA_VERSION "1.8"
+#define DCA_VERSION "1.12.1"
MODULE_VERSION(DCA_VERSION);
MODULE_LICENSE("GPL");
static DEFINE_SPINLOCK(dca_lock);
-static LIST_HEAD(dca_providers);
+static LIST_HEAD(dca_domains);
-static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+static struct pci_bus *dca_pci_rc_from_dev(struct device *dev)
{
- struct dca_provider *dca, *ret = NULL;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pci_bus *bus = pdev->bus;
- list_for_each_entry(dca, &dca_providers, node) {
- if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
- ret = dca;
- break;
- }
+ while (bus->parent)
+ bus = bus->parent;
+
+ return bus;
+}
+
+static struct dca_domain *dca_allocate_domain(struct pci_bus *rc)
+{
+ struct dca_domain *domain;
+
+ domain = kzalloc(sizeof(*domain), GFP_NOWAIT);
+ if (!domain)
+ return NULL;
+
+ INIT_LIST_HEAD(&domain->dca_providers);
+ domain->pci_rc = rc;
+
+ return domain;
+}
+
+static void dca_free_domain(struct dca_domain *domain)
+{
+ list_del(&domain->node);
+ kfree(domain);
+}
+
+static struct dca_domain *dca_find_domain(struct pci_bus *rc)
+{
+ struct dca_domain *domain;
+
+ list_for_each_entry(domain, &dca_domains, node)
+ if (domain->pci_rc == rc)
+ return domain;
+
+ return NULL;
+}
+
+static struct dca_domain *dca_get_domain(struct device *dev)
+{
+ struct pci_bus *rc;
+ struct dca_domain *domain;
+
+ rc = dca_pci_rc_from_dev(dev);
+ domain = dca_find_domain(rc);
+
+ if (!domain) {
+ domain = dca_allocate_domain(rc);
+ if (domain)
+ list_add(&domain->node, &dca_domains);
+ }
+
+ return domain;
+}
+
+static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+{
+ struct dca_provider *dca;
+ struct pci_bus *rc;
+ struct dca_domain *domain;
+
+ if (dev) {
+ rc = dca_pci_rc_from_dev(dev);
+ domain = dca_find_domain(rc);
+ if (!domain)
+ return NULL;
+ } else {
+ if (!list_empty(&dca_domains))
+ domain = list_first_entry(&dca_domains,
+ struct dca_domain,
+ node);
+ else
+ return NULL;
}
- return ret;
+ list_for_each_entry(dca, &domain->dca_providers, node)
+ if ((!dev) || (dca->ops->dev_managed(dca, dev)))
+ return dca;
+
+ return NULL;
}
/**
struct dca_provider *dca;
int err, slot = -ENODEV;
unsigned long flags;
+ struct pci_bus *pci_rc;
+ struct dca_domain *domain;
if (!dev)
return -EFAULT;
return -EEXIST;
}
- list_for_each_entry(dca, &dca_providers, node) {
+ pci_rc = dca_pci_rc_from_dev(dev);
+ domain = dca_find_domain(pci_rc);
+ if (!domain) {
+ spin_unlock_irqrestore(&dca_lock, flags);
+ return -ENODEV;
+ }
+
+ list_for_each_entry(dca, &domain->dca_providers, node) {
slot = dca->ops->add_requester(dca, dev);
if (slot >= 0)
break;
{
int err;
unsigned long flags;
+ struct dca_domain *domain;
err = dca_sysfs_add_provider(dca, dev);
if (err)
return err;
spin_lock_irqsave(&dca_lock, flags);
- list_add(&dca->node, &dca_providers);
+ domain = dca_get_domain(dev);
+ if (!domain) {
+ spin_unlock_irqrestore(&dca_lock, flags);
+ return -ENODEV;
+ }
+ list_add(&dca->node, &domain->dca_providers);
spin_unlock_irqrestore(&dca_lock, flags);
blocking_notifier_call_chain(&dca_provider_chain,
* unregister_dca_provider - remove a dca provider
* @dca - struct created by alloc_dca_provider()
*/
-void unregister_dca_provider(struct dca_provider *dca)
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev)
{
unsigned long flags;
+ struct pci_bus *pci_rc;
+ struct dca_domain *domain;
blocking_notifier_call_chain(&dca_provider_chain,
DCA_PROVIDER_REMOVE, NULL);
spin_lock_irqsave(&dca_lock, flags);
+
list_del(&dca->node);
+
+ pci_rc = dca_pci_rc_from_dev(dev);
+ domain = dca_find_domain(pci_rc);
+ if (list_empty(&domain->dca_providers))
+ dca_free_domain(domain);
+
spin_unlock_irqrestore(&dca_lock, flags);
dca_sysfs_remove_provider(dca);
static int __init dca_init(void)
{
- printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION);
+ pr_info("dca service started, version %s\n", DCA_VERSION);
return dca_sysfs_init();
}
comment "DMA Devices"
+config ASYNC_TX_DISABLE_CHANNEL_SWITCH
+ bool
+
config INTEL_IOATDMA
tristate "Intel I/OAT DMA support"
depends on PCI && X86
select DMA_ENGINE
select DCA
+ select ASYNC_TX_DISABLE_CHANNEL_SWITCH
help
Enable support for the Intel(R) I/OAT DMA engine present
in recent Intel Xeon chipsets.
Support the TXx9 SoC internal DMA controller. This can be
integrated in chips such as the Toshiba TX4927/38/39.
+config SH_DMAE
+ tristate "Renesas SuperH DMAC support"
+ depends on SUPERH && SH_DMA
+ depends on !SH_DMA_API
+ select DMA_ENGINE
+ help
+ Enable support for the Renesas SuperH DMA controllers.
+
config DMA_ENGINE
bool
config ASYNC_TX_DMA
bool "Async_tx: Offload support for the async_tx api"
- depends on DMA_ENGINE && !HIGHMEM64G
+ depends on DMA_ENGINE
help
This allows the async_tx api to take advantage of offload engines for
memcpy, memset, xor, and raid6 p+q operations. If your platform has
obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
obj-$(CONFIG_NET_DMA) += iovlock.o
obj-$(CONFIG_DMATEST) += dmatest.o
-obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
-ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
+obj-$(CONFIG_INTEL_IOATDMA) += ioat/
obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
obj-$(CONFIG_FSL_DMA) += fsldma.o
obj-$(CONFIG_MV_XOR) += mv_xor.o
obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
obj-$(CONFIG_MX3_IPU) += ipu/
obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
+obj-$(CONFIG_SH_DMAE) += shdma.o
desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys);
if (desc) {
memset(desc, 0, sizeof(struct at_desc));
+ INIT_LIST_HEAD(&desc->tx_list);
dma_async_tx_descriptor_init(&desc->txd, chan);
/* txd.flags will be overwritten in prep functions */
desc->txd.flags = DMA_CTRL_ACK;
struct at_desc *child;
spin_lock_bh(&atchan->lock);
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
dev_vdbg(chan2dev(&atchan->chan_common),
"moving child desc %p to freelist\n",
child);
- list_splice_init(&desc->txd.tx_list, &atchan->free_list);
+ list_splice_init(&desc->tx_list, &atchan->free_list);
dev_vdbg(chan2dev(&atchan->chan_common),
"moving desc %p to freelist\n", desc);
list_add(&desc->desc_node, &atchan->free_list);
param = txd->callback_param;
/* move children to free_list */
- list_splice_init(&txd->tx_list, &atchan->free_list);
+ list_splice_init(&desc->tx_list, &atchan->free_list);
/* move myself to free_list */
list_move(&desc->desc_node, &atchan->free_list);
/* unmap dma addresses */
- if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
- dma_unmap_single(chan2parent(&atchan->chan_common),
- desc->lli.daddr,
- desc->len, DMA_FROM_DEVICE);
- else
- dma_unmap_page(chan2parent(&atchan->chan_common),
- desc->lli.daddr,
- desc->len, DMA_FROM_DEVICE);
- }
- if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
- dma_unmap_single(chan2parent(&atchan->chan_common),
- desc->lli.saddr,
- desc->len, DMA_TO_DEVICE);
- else
- dma_unmap_page(chan2parent(&atchan->chan_common),
- desc->lli.saddr,
- desc->len, DMA_TO_DEVICE);
+ if (!atchan->chan_common.private) {
+ struct device *parent = chan2parent(&atchan->chan_common);
+ if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+ dma_unmap_single(parent,
+ desc->lli.daddr,
+ desc->len, DMA_FROM_DEVICE);
+ else
+ dma_unmap_page(parent,
+ desc->lli.daddr,
+ desc->len, DMA_FROM_DEVICE);
+ }
+ if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+ dma_unmap_single(parent,
+ desc->lli.saddr,
+ desc->len, DMA_TO_DEVICE);
+ else
+ dma_unmap_page(parent,
+ desc->lli.saddr,
+ desc->len, DMA_TO_DEVICE);
+ }
}
/*
/* This one is currently in progress */
return;
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
if (!(child->lli.ctrla & ATC_DONE))
/* Currently in progress */
return;
dev_crit(chan2dev(&atchan->chan_common),
" cookie: %d\n", bad_desc->txd.cookie);
atc_dump_lli(atchan, &bad_desc->lli);
- list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
atc_dump_lli(atchan, &child->lli);
/* Pretend the descriptor completed successfully */
prev->lli.dscr = desc->txd.phys;
/* insert the link descriptor to the LD ring */
list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ &first->tx_list);
}
prev = desc;
}
reg_width = atslave->reg_width;
- sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
-
ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla;
ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN;
prev->lli.dscr = desc->txd.phys;
/* insert the link descriptor to the LD ring */
list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ &first->tx_list);
}
prev = desc;
total_len += len;
prev->lli.dscr = desc->txd.phys;
/* insert the link descriptor to the LD ring */
list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ &first->tx_list);
}
prev = desc;
total_len += len;
struct at_lli lli;
/* THEN values for driver housekeeping */
+ struct list_head tx_list;
struct dma_async_tx_descriptor txd;
struct list_head desc_node;
size_t len;
}
EXPORT_SYMBOL(dmaengine_put);
+static bool device_has_all_tx_types(struct dma_device *device)
+{
+ /* A device that satisfies this test has channels that will never cause
+ * an async_tx channel switch event as all possible operation types can
+ * be handled.
+ */
+ #ifdef CONFIG_ASYNC_TX_DMA
+ if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+ return false;
+ #endif
+
+ #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
+ if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
+ return false;
+ #endif
+
+ #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
+ if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
+ return false;
+ #endif
+
+ #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
+ if (!dma_has_cap(DMA_XOR, device->cap_mask))
+ return false;
+ #endif
+
+ #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
+ if (!dma_has_cap(DMA_PQ, device->cap_mask))
+ return false;
+ #endif
+
+ return true;
+}
+
static int get_dma_id(struct dma_device *device)
{
int rc;
!device->device_prep_dma_memcpy);
BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
!device->device_prep_dma_xor);
- BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
- !device->device_prep_dma_zero_sum);
+ BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
+ !device->device_prep_dma_xor_val);
+ BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
+ !device->device_prep_dma_pq);
+ BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
+ !device->device_prep_dma_pq_val);
BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
!device->device_prep_dma_memset);
BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
BUG_ON(!device->device_issue_pending);
BUG_ON(!device->dev);
+ /* note: this only matters in the
+ * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
+ */
+ if (device_has_all_tx_types(device))
+ dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
+
idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
if (!idr_ref)
return -ENOMEM;
{
tx->chan = chan;
spin_lock_init(&tx->lock);
- INIT_LIST_HEAD(&tx->tx_list);
}
EXPORT_SYMBOL(dma_async_tx_descriptor_init);
/* dma_wait_for_async_tx - spin wait for a transaction to complete
* @tx: in-flight transaction to wait on
- *
- * This routine assumes that tx was obtained from a call to async_memcpy,
- * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
- * and submitted). Walking the parent chain is only meant to cover for DMA
- * drivers that do not implement the DMA_INTERRUPT capability and may race with
- * the driver's descriptor cleanup routine.
*/
enum dma_status
dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
{
- enum dma_status status;
- struct dma_async_tx_descriptor *iter;
- struct dma_async_tx_descriptor *parent;
+ unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
if (!tx)
return DMA_SUCCESS;
- WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for"
- " %s\n", __func__, dma_chan_name(tx->chan));
-
- /* poll through the dependency chain, return when tx is complete */
- do {
- iter = tx;
-
- /* find the root of the unsubmitted dependency chain */
- do {
- parent = iter->parent;
- if (!parent)
- break;
- else
- iter = parent;
- } while (parent);
-
- /* there is a small window for ->parent == NULL and
- * ->cookie == -EBUSY
- */
- while (iter->cookie == -EBUSY)
- cpu_relax();
-
- status = dma_sync_wait(iter->chan, iter->cookie);
- } while (status == DMA_IN_PROGRESS || (iter != tx));
-
- return status;
+ while (tx->cookie == -EBUSY) {
+ if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+ pr_err("%s timeout waiting for descriptor submission\n",
+ __func__);
+ return DMA_ERROR;
+ }
+ cpu_relax();
+ }
+ return dma_sync_wait(tx->chan, tx->cookie);
}
EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
MODULE_PARM_DESC(xor_sources,
"Number of xor source buffers (default: 3)");
+static unsigned int pq_sources = 3;
+module_param(pq_sources, uint, S_IRUGO);
+MODULE_PARM_DESC(pq_sources,
+ "Number of p+q source buffers (default: 3)");
+
/*
* Initialization patterns. All bytes in the source buffer has bit 7
* set, all bytes in the destination buffer has bit 7 cleared.
dma_cookie_t cookie;
enum dma_status status;
enum dma_ctrl_flags flags;
+ u8 pq_coefs[pq_sources];
int ret;
int src_cnt;
int dst_cnt;
else if (thread->type == DMA_XOR) {
src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
dst_cnt = 1;
+ } else if (thread->type == DMA_PQ) {
+ src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
+ dst_cnt = 2;
+ for (i = 0; i < pq_sources; i++)
+ pq_coefs[i] = 1;
} else
goto err_srcs;
dma_addr_t dma_dsts[dst_cnt];
struct completion cmp;
unsigned long tmo = msecs_to_jiffies(3000);
+ u8 align = 0;
total_tests++;
src_off = dmatest_random() % (test_buf_size - len + 1);
dst_off = dmatest_random() % (test_buf_size - len + 1);
+ /* honor alignment restrictions */
+ if (thread->type == DMA_MEMCPY)
+ align = dev->copy_align;
+ else if (thread->type == DMA_XOR)
+ align = dev->xor_align;
+ else if (thread->type == DMA_PQ)
+ align = dev->pq_align;
+
+ len = (len >> align) << align;
+ src_off = (src_off >> align) << align;
+ dst_off = (dst_off >> align) << align;
+
dmatest_init_srcs(thread->srcs, src_off, len);
dmatest_init_dsts(thread->dsts, dst_off, len);
DMA_BIDIRECTIONAL);
}
+
if (thread->type == DMA_MEMCPY)
tx = dev->device_prep_dma_memcpy(chan,
dma_dsts[0] + dst_off,
dma_dsts[0] + dst_off,
dma_srcs, xor_sources,
len, flags);
+ else if (thread->type == DMA_PQ) {
+ dma_addr_t dma_pq[dst_cnt];
+
+ for (i = 0; i < dst_cnt; i++)
+ dma_pq[i] = dma_dsts[i] + dst_off;
+ tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
+ pq_sources, pq_coefs,
+ len, flags);
+ }
if (!tx) {
for (i = 0; i < src_cnt; i++)
op = "copy";
else if (type == DMA_XOR)
op = "xor";
+ else if (type == DMA_PQ)
+ op = "pq";
else
return -EINVAL;
cnt = dmatest_add_threads(dtc, DMA_XOR);
thread_count += cnt > 0 ? cnt : 0;
}
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ cnt = dmatest_add_threads(dtc, DMA_PQ);
+ thread_count += cnt > 0 ?: 0;
+ }
pr_info("dmatest: Started %u threads using %s\n",
thread_count, dma_chan_name(chan));
{
struct dw_desc *child;
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
dma_sync_single_for_cpu(chan2parent(&dwc->chan),
child->txd.phys, sizeof(child->lli),
DMA_TO_DEVICE);
dwc_sync_desc_for_cpu(dwc, desc);
spin_lock_bh(&dwc->lock);
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
dev_vdbg(chan2dev(&dwc->chan),
"moving child desc %p to freelist\n",
child);
- list_splice_init(&desc->txd.tx_list, &dwc->free_list);
+ list_splice_init(&desc->tx_list, &dwc->free_list);
dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
list_add(&desc->desc_node, &dwc->free_list);
spin_unlock_bh(&dwc->lock);
param = txd->callback_param;
dwc_sync_desc_for_cpu(dwc, desc);
- list_splice_init(&txd->tx_list, &dwc->free_list);
+ list_splice_init(&desc->tx_list, &dwc->free_list);
list_move(&desc->desc_node, &dwc->free_list);
- /*
- * We use dma_unmap_page() regardless of how the buffers were
- * mapped before they were submitted...
- */
- if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP))
- dma_unmap_page(chan2parent(&dwc->chan), desc->lli.dar,
- desc->len, DMA_FROM_DEVICE);
- if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
- dma_unmap_page(chan2parent(&dwc->chan), desc->lli.sar,
- desc->len, DMA_TO_DEVICE);
+ if (!dwc->chan.private) {
+ struct device *parent = chan2parent(&dwc->chan);
+ if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+ dma_unmap_single(parent, desc->lli.dar,
+ desc->len, DMA_FROM_DEVICE);
+ else
+ dma_unmap_page(parent, desc->lli.dar,
+ desc->len, DMA_FROM_DEVICE);
+ }
+ if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+ dma_unmap_single(parent, desc->lli.sar,
+ desc->len, DMA_TO_DEVICE);
+ else
+ dma_unmap_page(parent, desc->lli.sar,
+ desc->len, DMA_TO_DEVICE);
+ }
+ }
/*
* The API requires that no submissions are done from a
/* This one is currently in progress */
return;
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
if (child->lli.llp == llp)
/* Currently in progress */
return;
dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
" cookie: %d\n", bad_desc->txd.cookie);
dwc_dump_lli(dwc, &bad_desc->lli);
- list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
dwc_dump_lli(dwc, &child->lli);
/* Pretend the descriptor completed successfully */
prev->txd.phys, sizeof(prev->lli),
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ &first->tx_list);
}
prev = desc;
}
reg_width = dws->reg_width;
prev = first = NULL;
- sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
-
switch (direction) {
case DMA_TO_DEVICE:
ctllo = (DWC_DEFAULT_CTLLO
sizeof(prev->lli),
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ &first->tx_list);
}
prev = desc;
total_len += len;
sizeof(prev->lli),
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ &first->tx_list);
}
prev = desc;
total_len += len;
break;
}
+ INIT_LIST_HEAD(&desc->tx_list);
dma_async_tx_descriptor_init(&desc->txd, chan);
desc->txd.tx_submit = dwc_tx_submit;
desc->txd.flags = DMA_CTRL_ACK;
/* THEN values for driver housekeeping */
struct list_head desc_node;
+ struct list_head tx_list;
struct dma_async_tx_descriptor txd;
size_t len;
};
#include <linux/dmapool.h>
#include <linux/of_platform.h>
+#include <asm/fsldma.h>
#include "fsldma.h"
static void dma_init(struct fsl_dma_chan *fsl_chan)
}
/**
- * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * fsl_chan_set_request_count - Set DMA Request Count for external control
* @fsl_chan : Freescale DMA channel
- * @size : Pause control size, 0 for disable external pause control.
- * The maximum is 1024.
+ * @size : Number of bytes to transfer in a single request
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA request count is how many bytes are allowed to transfer before
+ * pausing the channel, after which a new assertion of DREQ# resumes channel
+ * operation.
*
- * The Freescale DMA channel can be controlled by the external
- * signal DREQ#. The pause control size is how many bytes are allowed
- * to transfer before pausing the channel, after which a new assertion
- * of DREQ# resumes channel operation.
+ * A size of 0 disables external pause control. The maximum size is 1024.
*/
-static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size)
+static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size)
{
- if (size > 1024)
- return;
+ BUG_ON(size > 1024);
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+ | ((__ilog2(size) << 24) & 0x0f000000),
+ 32);
+}
- if (size) {
- DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
- DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
- | ((__ilog2(size) << 24) & 0x0f000000),
- 32);
+/**
+ * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * @fsl_chan : Freescale DMA channel
+ * @enable : 0 is disabled, 1 is enabled.
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA Request Count feature should be used in addition to this feature
+ * to set the number of bytes to transfer before pausing the channel.
+ */
+static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable)
+{
+ if (enable)
fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
- } else
+ else
fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
}
static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
- struct fsl_desc_sw *desc;
+ struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
+ struct fsl_desc_sw *child;
unsigned long flags;
dma_cookie_t cookie;
spin_lock_irqsave(&fsl_chan->desc_lock, flags);
cookie = fsl_chan->common.cookie;
- list_for_each_entry(desc, &tx->tx_list, node) {
+ list_for_each_entry(child, &desc->tx_list, node) {
cookie++;
if (cookie < 0)
cookie = 1;
}
fsl_chan->common.cookie = cookie;
- append_ld_queue(fsl_chan, tx_to_fsl_desc(tx));
- list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev);
+ append_ld_queue(fsl_chan, desc);
+ list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev);
spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc);
if (desc_sw) {
memset(desc_sw, 0, sizeof(struct fsl_desc_sw));
+ INIT_LIST_HEAD(&desc_sw->tx_list);
dma_async_tx_descriptor_init(&desc_sw->async_tx,
&fsl_chan->common);
desc_sw->async_tx.tx_submit = fsl_dma_tx_submit;
new->async_tx.flags = flags;
/* Insert the link descriptor to the LD ring */
- list_add_tail(&new->node, &new->async_tx.tx_list);
+ list_add_tail(&new->node, &new->tx_list);
/* Set End-of-link to the last link descriptor of new list*/
set_ld_eol(fsl_chan, new);
dma_dest += copy;
/* Insert the link descriptor to the LD ring */
- list_add_tail(&new->node, &first->async_tx.tx_list);
+ list_add_tail(&new->node, &first->tx_list);
} while (len);
new->async_tx.flags = flags; /* client is in control of this ack */
if (!first)
return NULL;
- list = &first->async_tx.tx_list;
+ list = &first->tx_list;
list_for_each_entry_safe_reverse(new, prev, list, node) {
list_del(&new->node);
dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
return NULL;
}
+/**
+ * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @chan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: DMAEngine flags
+ *
+ * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the
+ * DMA_SLAVE API, this gets the device-specific information from the
+ * chan->private variable.
+ */
+static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_data_direction direction, unsigned long flags)
+{
+ struct fsl_dma_chan *fsl_chan;
+ struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+ struct fsl_dma_slave *slave;
+ struct list_head *tx_list;
+ size_t copy;
+
+ int i;
+ struct scatterlist *sg;
+ size_t sg_used;
+ size_t hw_used;
+ struct fsl_dma_hw_addr *hw;
+ dma_addr_t dma_dst, dma_src;
+
+ if (!chan)
+ return NULL;
+
+ if (!chan->private)
+ return NULL;
+
+ fsl_chan = to_fsl_chan(chan);
+ slave = chan->private;
+
+ if (list_empty(&slave->addresses))
+ return NULL;
+
+ hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry);
+ hw_used = 0;
+
+ /*
+ * Build the hardware transaction to copy from the scatterlist to
+ * the hardware, or from the hardware to the scatterlist
+ *
+ * If you are copying from the hardware to the scatterlist and it
+ * takes two hardware entries to fill an entire page, then both
+ * hardware entries will be coalesced into the same page
+ *
+ * If you are copying from the scatterlist to the hardware and a
+ * single page can fill two hardware entries, then the data will
+ * be read out of the page into the first hardware entry, and so on
+ */
+ for_each_sg(sgl, sg, sg_len, i) {
+ sg_used = 0;
+
+ /* Loop until the entire scatterlist entry is used */
+ while (sg_used < sg_dma_len(sg)) {
+
+ /*
+ * If we've used up the current hardware address/length
+ * pair, we need to load a new one
+ *
+ * This is done in a while loop so that descriptors with
+ * length == 0 will be skipped
+ */
+ while (hw_used >= hw->length) {
+
+ /*
+ * If the current hardware entry is the last
+ * entry in the list, we're finished
+ */
+ if (list_is_last(&hw->entry, &slave->addresses))
+ goto finished;
+
+ /* Get the next hardware address/length pair */
+ hw = list_entry(hw->entry.next,
+ struct fsl_dma_hw_addr, entry);
+ hw_used = 0;
+ }
+
+ /* Allocate the link descriptor from DMA pool */
+ new = fsl_dma_alloc_descriptor(fsl_chan);
+ if (!new) {
+ dev_err(fsl_chan->dev, "No free memory for "
+ "link descriptor\n");
+ goto fail;
+ }
+#ifdef FSL_DMA_LD_DEBUG
+ dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
+#endif
+
+ /*
+ * Calculate the maximum number of bytes to transfer,
+ * making sure it is less than the DMA controller limit
+ */
+ copy = min_t(size_t, sg_dma_len(sg) - sg_used,
+ hw->length - hw_used);
+ copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT);
+
+ /*
+ * DMA_FROM_DEVICE
+ * from the hardware to the scatterlist
+ *
+ * DMA_TO_DEVICE
+ * from the scatterlist to the hardware
+ */
+ if (direction == DMA_FROM_DEVICE) {
+ dma_src = hw->address + hw_used;
+ dma_dst = sg_dma_address(sg) + sg_used;
+ } else {
+ dma_src = sg_dma_address(sg) + sg_used;
+ dma_dst = hw->address + hw_used;
+ }
+
+ /* Fill in the descriptor */
+ set_desc_cnt(fsl_chan, &new->hw, copy);
+ set_desc_src(fsl_chan, &new->hw, dma_src);
+ set_desc_dest(fsl_chan, &new->hw, dma_dst);
+
+ /*
+ * If this is not the first descriptor, chain the
+ * current descriptor after the previous descriptor
+ */
+ if (!first) {
+ first = new;
+ } else {
+ set_desc_next(fsl_chan, &prev->hw,
+ new->async_tx.phys);
+ }
+
+ new->async_tx.cookie = 0;
+ async_tx_ack(&new->async_tx);
+
+ prev = new;
+ sg_used += copy;
+ hw_used += copy;
+
+ /* Insert the link descriptor into the LD ring */
+ list_add_tail(&new->node, &first->tx_list);
+ }
+ }
+
+finished:
+
+ /* All of the hardware address/length pairs had length == 0 */
+ if (!first || !new)
+ return NULL;
+
+ new->async_tx.flags = flags;
+ new->async_tx.cookie = -EBUSY;
+
+ /* Set End-of-link to the last link descriptor of new list */
+ set_ld_eol(fsl_chan, new);
+
+ /* Enable extra controller features */
+ if (fsl_chan->set_src_loop_size)
+ fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size);
+
+ if (fsl_chan->set_dest_loop_size)
+ fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size);
+
+ if (fsl_chan->toggle_ext_start)
+ fsl_chan->toggle_ext_start(fsl_chan, slave->external_start);
+
+ if (fsl_chan->toggle_ext_pause)
+ fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause);
+
+ if (fsl_chan->set_request_count)
+ fsl_chan->set_request_count(fsl_chan, slave->request_count);
+
+ return &first->async_tx;
+
+fail:
+ /* If first was not set, then we failed to allocate the very first
+ * descriptor, and we're done */
+ if (!first)
+ return NULL;
+
+ /*
+ * First is set, so all of the descriptors we allocated have been added
+ * to first->tx_list, INCLUDING "first" itself. Therefore we
+ * must traverse the list backwards freeing each descriptor in turn
+ *
+ * We're re-using variables for the loop, oh well
+ */
+ tx_list = &first->tx_list;
+ list_for_each_entry_safe_reverse(new, prev, tx_list, node) {
+ list_del_init(&new->node);
+ dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
+ }
+
+ return NULL;
+}
+
+static void fsl_dma_device_terminate_all(struct dma_chan *chan)
+{
+ struct fsl_dma_chan *fsl_chan;
+ struct fsl_desc_sw *desc, *tmp;
+ unsigned long flags;
+
+ if (!chan)
+ return;
+
+ fsl_chan = to_fsl_chan(chan);
+
+ /* Halt the DMA engine */
+ dma_halt(fsl_chan);
+
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+ /* Remove and free all of the descriptors in the LD queue */
+ list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) {
+ list_del(&desc->node);
+ dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
+ }
+
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+}
+
/**
* fsl_dma_update_completed_cookie - Update the completed cookie.
* @fsl_chan : Freescale DMA channel
new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start;
new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size;
new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size;
+ new_fsl_chan->set_request_count = fsl_chan_set_request_count;
}
spin_lock_init(&new_fsl_chan->desc_lock);
dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
+ dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
fdev->common.device_is_tx_complete = fsl_dma_is_complete;
fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
+ fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;
+ fdev->common.device_terminate_all = fsl_dma_device_terminate_all;
fdev->common.dev = &dev->dev;
fdev->irq = irq_of_parse_and_map(dev->node, 0);
struct fsl_desc_sw {
struct fsl_dma_ld_hw hw;
struct list_head node;
+ struct list_head tx_list;
struct dma_async_tx_descriptor async_tx;
struct list_head *ld;
void *priv;
struct tasklet_struct tasklet;
u32 feature;
- void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size);
+ void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable);
void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable);
void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
+ void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size);
};
#define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common)
+++ /dev/null
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2007 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-/*
- * This driver supports an Intel I/OAT DMA engine, which does asynchronous
- * copy operations.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/dca.h>
-#include "ioatdma.h"
-#include "ioatdma_registers.h"
-#include "ioatdma_hw.h"
-
-MODULE_VERSION(IOAT_DMA_VERSION);
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Intel Corporation");
-
-static struct pci_device_id ioat_pci_tbl[] = {
- /* I/OAT v1 platforms */
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
- { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
-
- /* I/OAT v2 platforms */
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
-
- /* I/OAT v3 platforms */
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
- { 0, }
-};
-
-struct ioat_device {
- struct pci_dev *pdev;
- void __iomem *iobase;
- struct ioatdma_device *dma;
- struct dca_provider *dca;
-};
-
-static int __devinit ioat_probe(struct pci_dev *pdev,
- const struct pci_device_id *id);
-static void __devexit ioat_remove(struct pci_dev *pdev);
-
-static int ioat_dca_enabled = 1;
-module_param(ioat_dca_enabled, int, 0644);
-MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
-
-static struct pci_driver ioat_pci_driver = {
- .name = "ioatdma",
- .id_table = ioat_pci_tbl,
- .probe = ioat_probe,
- .remove = __devexit_p(ioat_remove),
-};
-
-static int __devinit ioat_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
-{
- void __iomem *iobase;
- struct ioat_device *device;
- unsigned long mmio_start, mmio_len;
- int err;
-
- err = pci_enable_device(pdev);
- if (err)
- goto err_enable_device;
-
- err = pci_request_regions(pdev, ioat_pci_driver.name);
- if (err)
- goto err_request_regions;
-
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err)
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err)
- goto err_set_dma_mask;
-
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err)
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err)
- goto err_set_dma_mask;
-
- mmio_start = pci_resource_start(pdev, 0);
- mmio_len = pci_resource_len(pdev, 0);
- iobase = ioremap(mmio_start, mmio_len);
- if (!iobase) {
- err = -ENOMEM;
- goto err_ioremap;
- }
-
- device = kzalloc(sizeof(*device), GFP_KERNEL);
- if (!device) {
- err = -ENOMEM;
- goto err_kzalloc;
- }
- device->pdev = pdev;
- pci_set_drvdata(pdev, device);
- device->iobase = iobase;
-
- pci_set_master(pdev);
-
- switch (readb(iobase + IOAT_VER_OFFSET)) {
- case IOAT_VER_1_2:
- device->dma = ioat_dma_probe(pdev, iobase);
- if (device->dma && ioat_dca_enabled)
- device->dca = ioat_dca_init(pdev, iobase);
- break;
- case IOAT_VER_2_0:
- device->dma = ioat_dma_probe(pdev, iobase);
- if (device->dma && ioat_dca_enabled)
- device->dca = ioat2_dca_init(pdev, iobase);
- break;
- case IOAT_VER_3_0:
- device->dma = ioat_dma_probe(pdev, iobase);
- if (device->dma && ioat_dca_enabled)
- device->dca = ioat3_dca_init(pdev, iobase);
- break;
- default:
- err = -ENODEV;
- break;
- }
- if (!device->dma)
- err = -ENODEV;
-
- if (err)
- goto err_version;
-
- return 0;
-
-err_version:
- kfree(device);
-err_kzalloc:
- iounmap(iobase);
-err_ioremap:
-err_set_dma_mask:
- pci_release_regions(pdev);
- pci_disable_device(pdev);
-err_request_regions:
-err_enable_device:
- return err;
-}
-
-static void __devexit ioat_remove(struct pci_dev *pdev)
-{
- struct ioat_device *device = pci_get_drvdata(pdev);
-
- dev_err(&pdev->dev, "Removing dma and dca services\n");
- if (device->dca) {
- unregister_dca_provider(device->dca);
- free_dca_provider(device->dca);
- device->dca = NULL;
- }
-
- if (device->dma) {
- ioat_dma_remove(device->dma);
- device->dma = NULL;
- }
-
- kfree(device);
-}
-
-static int __init ioat_init_module(void)
-{
- return pci_register_driver(&ioat_pci_driver);
-}
-module_init(ioat_init_module);
-
-static void __exit ioat_exit_module(void)
-{
- pci_unregister_driver(&ioat_pci_driver);
-}
-module_exit(ioat_exit_module);
--- /dev/null
+obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
+ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o
--- /dev/null
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+
+/* either a kernel change is needed, or we need something like this in kernel */
+#ifndef CONFIG_SMP
+#include <asm/smp.h>
+#undef cpu_physical_id
+#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
+#endif
+
+#include "dma.h"
+#include "registers.h"
+
+/*
+ * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
+ * contain the bit number of the APIC ID to map into the DCA tag. If the valid
+ * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
+ */
+#define DCA_TAG_MAP_VALID 0x80
+
+#define DCA3_TAG_MAP_BIT_TO_INV 0x80
+#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
+#define DCA3_TAG_MAP_LITERAL_VAL 0x1
+
+#define DCA_TAG_MAP_MASK 0xDF
+
+/* expected tag map bytes for I/OAT ver.2 */
+#define DCA2_TAG_MAP_BYTE0 0x80
+#define DCA2_TAG_MAP_BYTE1 0x0
+#define DCA2_TAG_MAP_BYTE2 0x81
+#define DCA2_TAG_MAP_BYTE3 0x82
+#define DCA2_TAG_MAP_BYTE4 0x82
+
+/* verify if tag map matches expected values */
+static inline int dca2_tag_map_valid(u8 *tag_map)
+{
+ return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) &&
+ (tag_map[1] == DCA2_TAG_MAP_BYTE1) &&
+ (tag_map[2] == DCA2_TAG_MAP_BYTE2) &&
+ (tag_map[3] == DCA2_TAG_MAP_BYTE3) &&
+ (tag_map[4] == DCA2_TAG_MAP_BYTE4));
+}
+
+/*
+ * "Legacy" DCA systems do not implement the DCA register set in the
+ * I/OAT device. Software needs direct support for their tag mappings.
+ */
+
+#define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x))
+#define IOAT_TAG_MAP_LEN 8
+
+static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
+ 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
+ 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
+ 1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
+static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
+
+/* pack PCI B/D/F into a u16 */
+static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
+{
+ return (pci->bus->number << 8) | pci->devfn;
+}
+
+static int dca_enabled_in_bios(struct pci_dev *pdev)
+{
+ /* CPUID level 9 returns DCA configuration */
+ /* Bit 0 indicates DCA enabled by the BIOS */
+ unsigned long cpuid_level_9;
+ int res;
+
+ cpuid_level_9 = cpuid_eax(9);
+ res = test_bit(0, &cpuid_level_9);
+ if (!res)
+ dev_err(&pdev->dev, "DCA is disabled in BIOS\n");
+
+ return res;
+}
+
+static int system_has_dca_enabled(struct pci_dev *pdev)
+{
+ if (boot_cpu_has(X86_FEATURE_DCA))
+ return dca_enabled_in_bios(pdev);
+
+ dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
+ return 0;
+}
+
+struct ioat_dca_slot {
+ struct pci_dev *pdev; /* requester device */
+ u16 rid; /* requester id, as used by IOAT */
+};
+
+#define IOAT_DCA_MAX_REQ 6
+#define IOAT3_DCA_MAX_REQ 2
+
+struct ioat_dca_priv {
+ void __iomem *iobase;
+ void __iomem *dca_base;
+ int max_requesters;
+ int requester_count;
+ u8 tag_map[IOAT_TAG_MAP_LEN];
+ struct ioat_dca_slot req_slots[0];
+};
+
+/* 5000 series chipset DCA Port Requester ID Table Entry Format
+ * [15:8] PCI-Express Bus Number
+ * [7:3] PCI-Express Device Number
+ * [2:0] PCI-Express Function Number
+ *
+ * 5000 series chipset DCA control register format
+ * [7:1] Reserved (0)
+ * [0] Ignore Function Number
+ */
+
+static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 id;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+ id = dcaid_from_pcidev(pdev);
+
+ if (ioatdca->requester_count == ioatdca->max_requesters)
+ return -ENODEV;
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == NULL) {
+ /* found an empty slot */
+ ioatdca->requester_count++;
+ ioatdca->req_slots[i].pdev = pdev;
+ ioatdca->req_slots[i].rid = id;
+ writew(id, ioatdca->dca_base + (i * 4));
+ /* make sure the ignore function bit is off */
+ writeb(0, ioatdca->dca_base + (i * 4) + 2);
+ return i;
+ }
+ }
+ /* Error, ioatdma->requester_count is out of whack */
+ return -EFAULT;
+}
+
+static int ioat_dca_remove_requester(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev) {
+ writew(0, ioatdca->dca_base + (i * 4));
+ ioatdca->req_slots[i].pdev = NULL;
+ ioatdca->req_slots[i].rid = 0;
+ ioatdca->requester_count--;
+ return i;
+ }
+ }
+ return -ENODEV;
+}
+
+static u8 ioat_dca_get_tag(struct dca_provider *dca,
+ struct device *dev,
+ int cpu)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ int i, apic_id, bit, value;
+ u8 entry, tag;
+
+ tag = 0;
+ apic_id = cpu_physical_id(cpu);
+
+ for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+ entry = ioatdca->tag_map[i];
+ if (entry & DCA_TAG_MAP_VALID) {
+ bit = entry & ~DCA_TAG_MAP_VALID;
+ value = (apic_id & (1 << bit)) ? 1 : 0;
+ } else {
+ value = entry ? 1 : 0;
+ }
+ tag |= (value << i);
+ }
+ return tag;
+}
+
+static int ioat_dca_dev_managed(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+
+ pdev = to_pci_dev(dev);
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev)
+ return 1;
+ }
+ return 0;
+}
+
+static struct dca_ops ioat_dca_ops = {
+ .add_requester = ioat_dca_add_requester,
+ .remove_requester = ioat_dca_remove_requester,
+ .get_tag = ioat_dca_get_tag,
+ .dev_managed = ioat_dca_dev_managed,
+};
+
+
+struct dca_provider * __devinit
+ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct dca_provider *dca;
+ struct ioat_dca_priv *ioatdca;
+ u8 *tag_map = NULL;
+ int i;
+ int err;
+ u8 version;
+ u8 max_requesters;
+
+ if (!system_has_dca_enabled(pdev))
+ return NULL;
+
+ /* I/OAT v1 systems must have a known tag_map to support DCA */
+ switch (pdev->vendor) {
+ case PCI_VENDOR_ID_INTEL:
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_IOAT:
+ tag_map = ioat_tag_map_BNB;
+ break;
+ case PCI_DEVICE_ID_INTEL_IOAT_CNB:
+ tag_map = ioat_tag_map_CNB;
+ break;
+ case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
+ tag_map = ioat_tag_map_SCNB;
+ break;
+ }
+ break;
+ case PCI_VENDOR_ID_UNISYS:
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
+ tag_map = ioat_tag_map_UNISYS;
+ break;
+ }
+ break;
+ }
+ if (tag_map == NULL)
+ return NULL;
+
+ version = readb(iobase + IOAT_VER_OFFSET);
+ if (version == IOAT_VER_3_0)
+ max_requesters = IOAT3_DCA_MAX_REQ;
+ else
+ max_requesters = IOAT_DCA_MAX_REQ;
+
+ dca = alloc_dca_provider(&ioat_dca_ops,
+ sizeof(*ioatdca) +
+ (sizeof(struct ioat_dca_slot) * max_requesters));
+ if (!dca)
+ return NULL;
+
+ ioatdca = dca_priv(dca);
+ ioatdca->max_requesters = max_requesters;
+ ioatdca->dca_base = iobase + 0x54;
+
+ /* copy over the APIC ID to DCA tag mapping */
+ for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
+ ioatdca->tag_map[i] = tag_map[i];
+
+ err = register_dca_provider(dca, &pdev->dev);
+ if (err) {
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ return dca;
+}
+
+
+static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 id;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+ id = dcaid_from_pcidev(pdev);
+
+ if (ioatdca->requester_count == ioatdca->max_requesters)
+ return -ENODEV;
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == NULL) {
+ /* found an empty slot */
+ ioatdca->requester_count++;
+ ioatdca->req_slots[i].pdev = pdev;
+ ioatdca->req_slots[i].rid = id;
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+ writel(id | IOAT_DCA_GREQID_VALID,
+ ioatdca->iobase + global_req_table + (i * 4));
+ return i;
+ }
+ }
+ /* Error, ioatdma->requester_count is out of whack */
+ return -EFAULT;
+}
+
+static int ioat2_dca_remove_requester(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev) {
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+ writel(0, ioatdca->iobase + global_req_table + (i * 4));
+ ioatdca->req_slots[i].pdev = NULL;
+ ioatdca->req_slots[i].rid = 0;
+ ioatdca->requester_count--;
+ return i;
+ }
+ }
+ return -ENODEV;
+}
+
+static u8 ioat2_dca_get_tag(struct dca_provider *dca,
+ struct device *dev,
+ int cpu)
+{
+ u8 tag;
+
+ tag = ioat_dca_get_tag(dca, dev, cpu);
+ tag = (~tag) & 0x1F;
+ return tag;
+}
+
+static struct dca_ops ioat2_dca_ops = {
+ .add_requester = ioat2_dca_add_requester,
+ .remove_requester = ioat2_dca_remove_requester,
+ .get_tag = ioat2_dca_get_tag,
+ .dev_managed = ioat_dca_dev_managed,
+};
+
+static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
+{
+ int slots = 0;
+ u32 req;
+ u16 global_req_table;
+
+ global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
+ if (global_req_table == 0)
+ return 0;
+ do {
+ req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+ slots++;
+ } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+ return slots;
+}
+
+struct dca_provider * __devinit
+ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct dca_provider *dca;
+ struct ioat_dca_priv *ioatdca;
+ int slots;
+ int i;
+ int err;
+ u32 tag_map;
+ u16 dca_offset;
+ u16 csi_fsb_control;
+ u16 pcie_control;
+ u8 bit;
+
+ if (!system_has_dca_enabled(pdev))
+ return NULL;
+
+ dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+ if (dca_offset == 0)
+ return NULL;
+
+ slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
+ if (slots == 0)
+ return NULL;
+
+ dca = alloc_dca_provider(&ioat2_dca_ops,
+ sizeof(*ioatdca)
+ + (sizeof(struct ioat_dca_slot) * slots));
+ if (!dca)
+ return NULL;
+
+ ioatdca = dca_priv(dca);
+ ioatdca->iobase = iobase;
+ ioatdca->dca_base = iobase + dca_offset;
+ ioatdca->max_requesters = slots;
+
+ /* some bios might not know to turn these on */
+ csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+ if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
+ csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
+ writew(csi_fsb_control,
+ ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+ }
+ pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+ if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
+ pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
+ writew(pcie_control,
+ ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+ }
+
+
+ /* TODO version, compatibility and configuration checks */
+
+ /* copy out the APIC to DCA tag map */
+ tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
+ for (i = 0; i < 5; i++) {
+ bit = (tag_map >> (4 * i)) & 0x0f;
+ if (bit < 8)
+ ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
+ else
+ ioatdca->tag_map[i] = 0;
+ }
+
+ if (!dca2_tag_map_valid(ioatdca->tag_map)) {
+ dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, "
+ "disabling DCA\n");
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ err = register_dca_provider(dca, &pdev->dev);
+ if (err) {
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ return dca;
+}
+
+static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 id;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+ id = dcaid_from_pcidev(pdev);
+
+ if (ioatdca->requester_count == ioatdca->max_requesters)
+ return -ENODEV;
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == NULL) {
+ /* found an empty slot */
+ ioatdca->requester_count++;
+ ioatdca->req_slots[i].pdev = pdev;
+ ioatdca->req_slots[i].rid = id;
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+ writel(id | IOAT_DCA_GREQID_VALID,
+ ioatdca->iobase + global_req_table + (i * 4));
+ return i;
+ }
+ }
+ /* Error, ioatdma->requester_count is out of whack */
+ return -EFAULT;
+}
+
+static int ioat3_dca_remove_requester(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev) {
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+ writel(0, ioatdca->iobase + global_req_table + (i * 4));
+ ioatdca->req_slots[i].pdev = NULL;
+ ioatdca->req_slots[i].rid = 0;
+ ioatdca->requester_count--;
+ return i;
+ }
+ }
+ return -ENODEV;
+}
+
+static u8 ioat3_dca_get_tag(struct dca_provider *dca,
+ struct device *dev,
+ int cpu)
+{
+ u8 tag;
+
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ int i, apic_id, bit, value;
+ u8 entry;
+
+ tag = 0;
+ apic_id = cpu_physical_id(cpu);
+
+ for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+ entry = ioatdca->tag_map[i];
+ if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
+ bit = entry &
+ ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
+ value = (apic_id & (1 << bit)) ? 1 : 0;
+ } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
+ bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
+ value = (apic_id & (1 << bit)) ? 0 : 1;
+ } else {
+ value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
+ }
+ tag |= (value << i);
+ }
+
+ return tag;
+}
+
+static struct dca_ops ioat3_dca_ops = {
+ .add_requester = ioat3_dca_add_requester,
+ .remove_requester = ioat3_dca_remove_requester,
+ .get_tag = ioat3_dca_get_tag,
+ .dev_managed = ioat_dca_dev_managed,
+};
+
+static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
+{
+ int slots = 0;
+ u32 req;
+ u16 global_req_table;
+
+ global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
+ if (global_req_table == 0)
+ return 0;
+
+ do {
+ req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+ slots++;
+ } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+ return slots;
+}
+
+struct dca_provider * __devinit
+ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct dca_provider *dca;
+ struct ioat_dca_priv *ioatdca;
+ int slots;
+ int i;
+ int err;
+ u16 dca_offset;
+ u16 csi_fsb_control;
+ u16 pcie_control;
+ u8 bit;
+
+ union {
+ u64 full;
+ struct {
+ u32 low;
+ u32 high;
+ };
+ } tag_map;
+
+ if (!system_has_dca_enabled(pdev))
+ return NULL;
+
+ dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+ if (dca_offset == 0)
+ return NULL;
+
+ slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
+ if (slots == 0)
+ return NULL;
+
+ dca = alloc_dca_provider(&ioat3_dca_ops,
+ sizeof(*ioatdca)
+ + (sizeof(struct ioat_dca_slot) * slots));
+ if (!dca)
+ return NULL;
+
+ ioatdca = dca_priv(dca);
+ ioatdca->iobase = iobase;
+ ioatdca->dca_base = iobase + dca_offset;
+ ioatdca->max_requesters = slots;
+
+ /* some bios might not know to turn these on */
+ csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+ if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
+ csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
+ writew(csi_fsb_control,
+ ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+ }
+ pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+ if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
+ pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
+ writew(pcie_control,
+ ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+ }
+
+
+ /* TODO version, compatibility and configuration checks */
+
+ /* copy out the APIC to DCA tag map */
+ tag_map.low =
+ readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
+ tag_map.high =
+ readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
+ for (i = 0; i < 8; i++) {
+ bit = tag_map.full >> (8 * i);
+ ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
+ }
+
+ err = register_dca_provider(dca, &pdev->dev);
+ if (err) {
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ return dca;
+}
--- /dev/null
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/i7300_idle.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+int ioat_pending_level = 4;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+ "high-water mark for pushing ioat descriptors (default: 4)");
+
+/* internal functions */
+static void ioat1_cleanup(struct ioat_dma_chan *ioat);
+static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
+
+/**
+ * ioat_dma_do_interrupt - handler used for single vector interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+{
+ struct ioatdma_device *instance = data;
+ struct ioat_chan_common *chan;
+ unsigned long attnstatus;
+ int bit;
+ u8 intrctrl;
+
+ intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
+
+ if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+ return IRQ_NONE;
+
+ if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+ return IRQ_NONE;
+ }
+
+ attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+ for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
+ chan = ioat_chan_by_index(instance, bit);
+ tasklet_schedule(&chan->cleanup_task);
+ }
+
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+ return IRQ_HANDLED;
+}
+
+/**
+ * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+{
+ struct ioat_chan_common *chan = data;
+
+ tasklet_schedule(&chan->cleanup_task);
+
+ return IRQ_HANDLED;
+}
+
+static void ioat1_cleanup_tasklet(unsigned long data);
+
+/* common channel initialization */
+void ioat_init_channel(struct ioatdma_device *device,
+ struct ioat_chan_common *chan, int idx,
+ void (*timer_fn)(unsigned long),
+ void (*tasklet)(unsigned long),
+ unsigned long ioat)
+{
+ struct dma_device *dma = &device->common;
+
+ chan->device = device;
+ chan->reg_base = device->reg_base + (0x80 * (idx + 1));
+ spin_lock_init(&chan->cleanup_lock);
+ chan->common.device = dma;
+ list_add_tail(&chan->common.device_node, &dma->channels);
+ device->idx[idx] = chan;
+ init_timer(&chan->timer);
+ chan->timer.function = timer_fn;
+ chan->timer.data = ioat;
+ tasklet_init(&chan->cleanup_task, tasklet, ioat);
+ tasklet_disable(&chan->cleanup_task);
+}
+
+static void ioat1_timer_event(unsigned long data);
+
+/**
+ * ioat1_dma_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+static int ioat1_enumerate_channels(struct ioatdma_device *device)
+{
+ u8 xfercap_scale;
+ u32 xfercap;
+ int i;
+ struct ioat_dma_chan *ioat;
+ struct device *dev = &device->pdev->dev;
+ struct dma_device *dma = &device->common;
+
+ INIT_LIST_HEAD(&dma->channels);
+ dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+ dma->chancnt &= 0x1f; /* bits [4:0] valid */
+ if (dma->chancnt > ARRAY_SIZE(device->idx)) {
+ dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+ dma->chancnt, ARRAY_SIZE(device->idx));
+ dma->chancnt = ARRAY_SIZE(device->idx);
+ }
+ xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+ xfercap_scale &= 0x1f; /* bits [4:0] valid */
+ xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
+ dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
+
+#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
+ if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
+ dma->chancnt--;
+#endif
+ for (i = 0; i < dma->chancnt; i++) {
+ ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
+ if (!ioat)
+ break;
+
+ ioat_init_channel(device, &ioat->base, i,
+ ioat1_timer_event,
+ ioat1_cleanup_tasklet,
+ (unsigned long) ioat);
+ ioat->xfercap = xfercap;
+ spin_lock_init(&ioat->desc_lock);
+ INIT_LIST_HEAD(&ioat->free_desc);
+ INIT_LIST_HEAD(&ioat->used_desc);
+ }
+ dma->chancnt = i;
+ return i;
+}
+
+/**
+ * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
+ * descriptors to hw
+ * @chan: DMA channel handle
+ */
+static inline void
+__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
+{
+ void __iomem *reg_base = ioat->base.reg_base;
+
+ dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
+ __func__, ioat->pending);
+ ioat->pending = 0;
+ writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
+}
+
+static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(chan);
+
+ if (ioat->pending > 0) {
+ spin_lock_bh(&ioat->desc_lock);
+ __ioat1_dma_memcpy_issue_pending(ioat);
+ spin_unlock_bh(&ioat->desc_lock);
+ }
+}
+
+/**
+ * ioat1_reset_channel - restart a channel
+ * @ioat: IOAT DMA channel handle
+ */
+static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ void __iomem *reg_base = chan->reg_base;
+ u32 chansts, chanerr;
+
+ dev_warn(to_dev(chan), "reset\n");
+ chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
+ chansts = *chan->completion & IOAT_CHANSTS_STATUS;
+ if (chanerr) {
+ dev_err(to_dev(chan),
+ "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
+ chan_num(chan), chansts, chanerr);
+ writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
+ }
+
+ /*
+ * whack it upside the head with a reset
+ * and wait for things to settle out.
+ * force the pending count to a really big negative
+ * to make sure no one forces an issue_pending
+ * while we're waiting.
+ */
+
+ ioat->pending = INT_MIN;
+ writeb(IOAT_CHANCMD_RESET,
+ reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+ set_bit(IOAT_RESET_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + RESET_DELAY);
+}
+
+static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct dma_chan *c = tx->chan;
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_desc_sw *first;
+ struct ioat_desc_sw *chain_tail;
+ dma_cookie_t cookie;
+
+ spin_lock_bh(&ioat->desc_lock);
+ /* cookie incr and addition to used_list must be atomic */
+ cookie = c->cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ c->cookie = cookie;
+ tx->cookie = cookie;
+ dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+
+ /* write address into NextDescriptor field of last desc in chain */
+ first = to_ioat_desc(desc->tx_list.next);
+ chain_tail = to_ioat_desc(ioat->used_desc.prev);
+ /* make descriptor updates globally visible before chaining */
+ wmb();
+ chain_tail->hw->next = first->txd.phys;
+ list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
+ dump_desc_dbg(ioat, chain_tail);
+ dump_desc_dbg(ioat, first);
+
+ if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+ ioat->active += desc->hw->tx_cnt;
+ ioat->pending += desc->hw->tx_cnt;
+ if (ioat->pending >= ioat_pending_level)
+ __ioat1_dma_memcpy_issue_pending(ioat);
+ spin_unlock_bh(&ioat->desc_lock);
+
+ return cookie;
+}
+
+/**
+ * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
+ * @ioat: the channel supplying the memory pool for the descriptors
+ * @flags: allocation flags
+ */
+static struct ioat_desc_sw *
+ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
+{
+ struct ioat_dma_descriptor *desc;
+ struct ioat_desc_sw *desc_sw;
+ struct ioatdma_device *ioatdma_device;
+ dma_addr_t phys;
+
+ ioatdma_device = ioat->base.device;
+ desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
+ if (unlikely(!desc))
+ return NULL;
+
+ desc_sw = kzalloc(sizeof(*desc_sw), flags);
+ if (unlikely(!desc_sw)) {
+ pci_pool_free(ioatdma_device->dma_pool, desc, phys);
+ return NULL;
+ }
+
+ memset(desc, 0, sizeof(*desc));
+
+ INIT_LIST_HEAD(&desc_sw->tx_list);
+ dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
+ desc_sw->txd.tx_submit = ioat1_tx_submit;
+ desc_sw->hw = desc;
+ desc_sw->txd.phys = phys;
+ set_desc_id(desc_sw, -1);
+
+ return desc_sw;
+}
+
+static int ioat_initial_desc_count = 256;
+module_param(ioat_initial_desc_count, int, 0644);
+MODULE_PARM_DESC(ioat_initial_desc_count,
+ "ioat1: initial descriptors per channel (default: 256)");
+/**
+ * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
+ * @chan: the channel to be filled out
+ */
+static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_desc_sw *desc;
+ u32 chanerr;
+ int i;
+ LIST_HEAD(tmp_list);
+
+ /* have we already been set up? */
+ if (!list_empty(&ioat->free_desc))
+ return ioat->desccount;
+
+ /* Setup register to interrupt and write completion status on error */
+ writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ if (chanerr) {
+ dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
+ writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+ }
+
+ /* Allocate descriptors */
+ for (i = 0; i < ioat_initial_desc_count; i++) {
+ desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
+ if (!desc) {
+ dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
+ break;
+ }
+ set_desc_id(desc, i);
+ list_add_tail(&desc->node, &tmp_list);
+ }
+ spin_lock_bh(&ioat->desc_lock);
+ ioat->desccount = i;
+ list_splice(&tmp_list, &ioat->free_desc);
+ spin_unlock_bh(&ioat->desc_lock);
+
+ /* allocate a completion writeback area */
+ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+ chan->completion = pci_pool_alloc(chan->device->completion_pool,
+ GFP_KERNEL, &chan->completion_dma);
+ memset(chan->completion, 0, sizeof(*chan->completion));
+ writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+ writel(((u64) chan->completion_dma) >> 32,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+ tasklet_enable(&chan->cleanup_task);
+ ioat1_dma_start_null_desc(ioat); /* give chain to dma device */
+ dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+ __func__, ioat->desccount);
+ return ioat->desccount;
+}
+
+/**
+ * ioat1_dma_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+static void ioat1_dma_free_chan_resources(struct dma_chan *c)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioatdma_device *ioatdma_device = chan->device;
+ struct ioat_desc_sw *desc, *_desc;
+ int in_use_descs = 0;
+
+ /* Before freeing channel resources first check
+ * if they have been previously allocated for this channel.
+ */
+ if (ioat->desccount == 0)
+ return;
+
+ tasklet_disable(&chan->cleanup_task);
+ del_timer_sync(&chan->timer);
+ ioat1_cleanup(ioat);
+
+ /* Delay 100ms after reset to allow internal DMA logic to quiesce
+ * before removing DMA descriptor resources.
+ */
+ writeb(IOAT_CHANCMD_RESET,
+ chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+ mdelay(100);
+
+ spin_lock_bh(&ioat->desc_lock);
+ list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
+ dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
+ __func__, desc_id(desc));
+ dump_desc_dbg(ioat, desc);
+ in_use_descs++;
+ list_del(&desc->node);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->txd.phys);
+ kfree(desc);
+ }
+ list_for_each_entry_safe(desc, _desc,
+ &ioat->free_desc, node) {
+ list_del(&desc->node);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->txd.phys);
+ kfree(desc);
+ }
+ spin_unlock_bh(&ioat->desc_lock);
+
+ pci_pool_free(ioatdma_device->completion_pool,
+ chan->completion,
+ chan->completion_dma);
+
+ /* one is ok since we left it on there on purpose */
+ if (in_use_descs > 1)
+ dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
+ in_use_descs - 1);
+
+ chan->last_completion = 0;
+ chan->completion_dma = 0;
+ ioat->pending = 0;
+ ioat->desccount = 0;
+}
+
+/**
+ * ioat1_dma_get_next_descriptor - return the next available descriptor
+ * @ioat: IOAT DMA channel handle
+ *
+ * Gets the next descriptor from the chain, and must be called with the
+ * channel's desc_lock held. Allocates more descriptors if the channel
+ * has run out.
+ */
+static struct ioat_desc_sw *
+ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
+{
+ struct ioat_desc_sw *new;
+
+ if (!list_empty(&ioat->free_desc)) {
+ new = to_ioat_desc(ioat->free_desc.next);
+ list_del(&new->node);
+ } else {
+ /* try to get another desc */
+ new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
+ if (!new) {
+ dev_err(to_dev(&ioat->base), "alloc failed\n");
+ return NULL;
+ }
+ }
+ dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
+ __func__, desc_id(new));
+ prefetch(new->hw);
+ return new;
+}
+
+static struct dma_async_tx_descriptor *
+ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ struct ioat_desc_sw *desc;
+ size_t copy;
+ LIST_HEAD(chain);
+ dma_addr_t src = dma_src;
+ dma_addr_t dest = dma_dest;
+ size_t total_len = len;
+ struct ioat_dma_descriptor *hw = NULL;
+ int tx_cnt = 0;
+
+ spin_lock_bh(&ioat->desc_lock);
+ desc = ioat1_dma_get_next_descriptor(ioat);
+ do {
+ if (!desc)
+ break;
+
+ tx_cnt++;
+ copy = min_t(size_t, len, ioat->xfercap);
+
+ hw = desc->hw;
+ hw->size = copy;
+ hw->ctl = 0;
+ hw->src_addr = src;
+ hw->dst_addr = dest;
+
+ list_add_tail(&desc->node, &chain);
+
+ len -= copy;
+ dest += copy;
+ src += copy;
+ if (len) {
+ struct ioat_desc_sw *next;
+
+ async_tx_ack(&desc->txd);
+ next = ioat1_dma_get_next_descriptor(ioat);
+ hw->next = next ? next->txd.phys : 0;
+ dump_desc_dbg(ioat, desc);
+ desc = next;
+ } else
+ hw->next = 0;
+ } while (len);
+
+ if (!desc) {
+ struct ioat_chan_common *chan = &ioat->base;
+
+ dev_err(to_dev(chan),
+ "chan%d - get_next_desc failed\n", chan_num(chan));
+ list_splice(&chain, &ioat->free_desc);
+ spin_unlock_bh(&ioat->desc_lock);
+ return NULL;
+ }
+ spin_unlock_bh(&ioat->desc_lock);
+
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ list_splice(&chain, &desc->tx_list);
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.compl_write = 1;
+ hw->tx_cnt = tx_cnt;
+ dump_desc_dbg(ioat, desc);
+
+ return &desc->txd;
+}
+
+static void ioat1_cleanup_tasklet(unsigned long data)
+{
+ struct ioat_dma_chan *chan = (void *)data;
+
+ ioat1_cleanup(chan);
+ writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
+ size_t len, struct ioat_dma_descriptor *hw)
+{
+ struct pci_dev *pdev = chan->device->pdev;
+ size_t offset = len - hw->size;
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+ ioat_unmap(pdev, hw->dst_addr - offset, len,
+ PCI_DMA_FROMDEVICE, flags, 1);
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
+ ioat_unmap(pdev, hw->src_addr - offset, len,
+ PCI_DMA_TODEVICE, flags, 0);
+}
+
+unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
+{
+ unsigned long phys_complete;
+ u64 completion;
+
+ completion = *chan->completion;
+ phys_complete = ioat_chansts_to_addr(completion);
+
+ dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
+ (unsigned long long) phys_complete);
+
+ if (is_ioat_halted(completion)) {
+ u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
+ chanerr);
+
+ /* TODO do something to salvage the situation */
+ }
+
+ return phys_complete;
+}
+
+bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
+ unsigned long *phys_complete)
+{
+ *phys_complete = ioat_get_current_completion(chan);
+ if (*phys_complete == chan->last_completion)
+ return false;
+ clear_bit(IOAT_COMPLETION_ACK, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+ return true;
+}
+
+static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct list_head *_desc, *n;
+ struct dma_async_tx_descriptor *tx;
+
+ dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
+ __func__, phys_complete);
+ list_for_each_safe(_desc, n, &ioat->used_desc) {
+ struct ioat_desc_sw *desc;
+
+ prefetch(n);
+ desc = list_entry(_desc, typeof(*desc), node);
+ tx = &desc->txd;
+ /*
+ * Incoming DMA requests may use multiple descriptors,
+ * due to exceeding xfercap, perhaps. If so, only the
+ * last one will have a cookie, and require unmapping.
+ */
+ dump_desc_dbg(ioat, desc);
+ if (tx->cookie) {
+ chan->completed_cookie = tx->cookie;
+ tx->cookie = 0;
+ ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+ ioat->active -= desc->hw->tx_cnt;
+ if (tx->callback) {
+ tx->callback(tx->callback_param);
+ tx->callback = NULL;
+ }
+ }
+
+ if (tx->phys != phys_complete) {
+ /*
+ * a completed entry, but not the last, so clean
+ * up if the client is done with the descriptor
+ */
+ if (async_tx_test_ack(tx))
+ list_move_tail(&desc->node, &ioat->free_desc);
+ } else {
+ /*
+ * last used desc. Do not remove, so we can
+ * append from it.
+ */
+
+ /* if nothing else is pending, cancel the
+ * completion timeout
+ */
+ if (n == &ioat->used_desc) {
+ dev_dbg(to_dev(chan),
+ "%s cancel completion timeout\n",
+ __func__);
+ clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ }
+
+ /* TODO check status bits? */
+ break;
+ }
+ }
+
+ chan->last_completion = phys_complete;
+}
+
+/**
+ * ioat1_cleanup - cleanup up finished descriptors
+ * @chan: ioat channel to be cleaned up
+ *
+ * To prevent lock contention we defer cleanup when the locks are
+ * contended with a terminal timeout that forces cleanup and catches
+ * completion notification errors.
+ */
+static void ioat1_cleanup(struct ioat_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+
+ prefetch(chan->completion);
+
+ if (!spin_trylock_bh(&chan->cleanup_lock))
+ return;
+
+ if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ if (!spin_trylock_bh(&ioat->desc_lock)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ __cleanup(ioat, phys_complete);
+
+ spin_unlock_bh(&ioat->desc_lock);
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat1_timer_event(unsigned long data)
+{
+ struct ioat_dma_chan *ioat = (void *) data;
+ struct ioat_chan_common *chan = &ioat->base;
+
+ dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state);
+
+ spin_lock_bh(&chan->cleanup_lock);
+ if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) {
+ struct ioat_desc_sw *desc;
+
+ spin_lock_bh(&ioat->desc_lock);
+
+ /* restart active descriptors */
+ desc = to_ioat_desc(ioat->used_desc.prev);
+ ioat_set_chainaddr(ioat, desc->txd.phys);
+ ioat_start(chan);
+
+ ioat->pending = 0;
+ set_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ spin_unlock_bh(&ioat->desc_lock);
+ } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+ unsigned long phys_complete;
+
+ spin_lock_bh(&ioat->desc_lock);
+ /* if we haven't made progress and we have already
+ * acknowledged a pending completion once, then be more
+ * forceful with a restart
+ */
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+ else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+ ioat1_reset_channel(ioat);
+ else {
+ u64 status = ioat_chansts(chan);
+
+ /* manually update the last completion address */
+ if (ioat_chansts_to_addr(status) != 0)
+ *chan->completion = status;
+
+ set_bit(IOAT_COMPLETION_ACK, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ }
+ spin_unlock_bh(&ioat->desc_lock);
+ }
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static enum dma_status
+ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+ if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+ return DMA_SUCCESS;
+
+ ioat1_cleanup(ioat);
+
+ return ioat_is_complete(c, cookie, done, used);
+}
+
+static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_desc_sw *desc;
+ struct ioat_dma_descriptor *hw;
+
+ spin_lock_bh(&ioat->desc_lock);
+
+ desc = ioat1_dma_get_next_descriptor(ioat);
+
+ if (!desc) {
+ dev_err(to_dev(chan),
+ "Unable to start null desc - get next desc failed\n");
+ spin_unlock_bh(&ioat->desc_lock);
+ return;
+ }
+
+ hw = desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = 1;
+ hw->ctl_f.compl_write = 1;
+ /* set size to non-zero value (channel returns error when size is 0) */
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ hw->src_addr = 0;
+ hw->dst_addr = 0;
+ async_tx_ack(&desc->txd);
+ hw->next = 0;
+ list_add_tail(&desc->node, &ioat->used_desc);
+ dump_desc_dbg(ioat, desc);
+
+ ioat_set_chainaddr(ioat, desc->txd.phys);
+ ioat_start(chan);
+ spin_unlock_bh(&ioat->desc_lock);
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static void __devinit ioat_dma_test_callback(void *dma_async_param)
+{
+ struct completion *cmp = dma_async_param;
+
+ complete(cmp);
+}
+
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @device: device to be tested
+ */
+int __devinit ioat_dma_self_test(struct ioatdma_device *device)
+{
+ int i;
+ u8 *src;
+ u8 *dest;
+ struct dma_device *dma = &device->common;
+ struct device *dev = &device->pdev->dev;
+ struct dma_chan *dma_chan;
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_dest, dma_src;
+ dma_cookie_t cookie;
+ int err = 0;
+ struct completion cmp;
+ unsigned long tmo;
+ unsigned long flags;
+
+ src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+ dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < IOAT_TEST_SIZE; i++)
+ src[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(dma->channels.next, struct dma_chan,
+ device_node);
+ if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+ dev_err(dev, "selftest cannot allocate chan resource\n");
+ err = -ENODEV;
+ goto out;
+ }
+
+ dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+ dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+ flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE |
+ DMA_PREP_INTERRUPT;
+ tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
+ IOAT_TEST_SIZE, flags);
+ if (!tx) {
+ dev_err(dev, "Self-test prep failed, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test setup failed, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (tmo == 0 ||
+ dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL)
+ != DMA_SUCCESS) {
+ dev_err(dev, "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+ dev_err(dev, "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ dma->device_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+ sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+ "set ioat interrupt style: msix (default), "
+ "msix-single-vector, msi, intx)");
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @device: ioat device
+ */
+static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
+{
+ struct ioat_chan_common *chan;
+ struct pci_dev *pdev = device->pdev;
+ struct device *dev = &pdev->dev;
+ struct msix_entry *msix;
+ int i, j, msixcnt;
+ int err = -EINVAL;
+ u8 intrctrl = 0;
+
+ if (!strcmp(ioat_interrupt_style, "msix"))
+ goto msix;
+ if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
+ goto msix_single_vector;
+ if (!strcmp(ioat_interrupt_style, "msi"))
+ goto msi;
+ if (!strcmp(ioat_interrupt_style, "intx"))
+ goto intx;
+ dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
+ goto err_no_irq;
+
+msix:
+ /* The number of MSI-X vectors should equal the number of channels */
+ msixcnt = device->common.chancnt;
+ for (i = 0; i < msixcnt; i++)
+ device->msix_entries[i].entry = i;
+
+ err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
+ if (err < 0)
+ goto msi;
+ if (err > 0)
+ goto msix_single_vector;
+
+ for (i = 0; i < msixcnt; i++) {
+ msix = &device->msix_entries[i];
+ chan = ioat_chan_by_index(device, i);
+ err = devm_request_irq(dev, msix->vector,
+ ioat_dma_do_interrupt_msix, 0,
+ "ioat-msix", chan);
+ if (err) {
+ for (j = 0; j < i; j++) {
+ msix = &device->msix_entries[j];
+ chan = ioat_chan_by_index(device, j);
+ devm_free_irq(dev, msix->vector, chan);
+ }
+ goto msix_single_vector;
+ }
+ }
+ intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+ goto done;
+
+msix_single_vector:
+ msix = &device->msix_entries[0];
+ msix->entry = 0;
+ err = pci_enable_msix(pdev, device->msix_entries, 1);
+ if (err)
+ goto msi;
+
+ err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
+ "ioat-msix", device);
+ if (err) {
+ pci_disable_msix(pdev);
+ goto msi;
+ }
+ goto done;
+
+msi:
+ err = pci_enable_msi(pdev);
+ if (err)
+ goto intx;
+
+ err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
+ "ioat-msi", device);
+ if (err) {
+ pci_disable_msi(pdev);
+ goto intx;
+ }
+ goto done;
+
+intx:
+ err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
+ IRQF_SHARED, "ioat-intx", device);
+ if (err)
+ goto err_no_irq;
+
+done:
+ if (device->intr_quirk)
+ device->intr_quirk(device);
+ intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+ writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
+ return 0;
+
+err_no_irq:
+ /* Disable all interrupt generation */
+ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+ dev_err(dev, "no usable interrupts\n");
+ return err;
+}
+
+static void ioat_disable_interrupts(struct ioatdma_device *device)
+{
+ /* Disable all interrupt generation */
+ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+}
+
+int __devinit ioat_probe(struct ioatdma_device *device)
+{
+ int err = -ENODEV;
+ struct dma_device *dma = &device->common;
+ struct pci_dev *pdev = device->pdev;
+ struct device *dev = &pdev->dev;
+
+ /* DMA coherent memory pool for DMA descriptor allocations */
+ device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
+ sizeof(struct ioat_dma_descriptor),
+ 64, 0);
+ if (!device->dma_pool) {
+ err = -ENOMEM;
+ goto err_dma_pool;
+ }
+
+ device->completion_pool = pci_pool_create("completion_pool", pdev,
+ sizeof(u64), SMP_CACHE_BYTES,
+ SMP_CACHE_BYTES);
+
+ if (!device->completion_pool) {
+ err = -ENOMEM;
+ goto err_completion_pool;
+ }
+
+ device->enumerate_channels(device);
+
+ dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+ dma->dev = &pdev->dev;
+
+ if (!dma->chancnt) {
+ dev_err(dev, "zero channels detected\n");
+ goto err_setup_interrupts;
+ }
+
+ err = ioat_dma_setup_interrupts(device);
+ if (err)
+ goto err_setup_interrupts;
+
+ err = device->self_test(device);
+ if (err)
+ goto err_self_test;
+
+ return 0;
+
+err_self_test:
+ ioat_disable_interrupts(device);
+err_setup_interrupts:
+ pci_pool_destroy(device->completion_pool);
+err_completion_pool:
+ pci_pool_destroy(device->dma_pool);
+err_dma_pool:
+ return err;
+}
+
+int __devinit ioat_register(struct ioatdma_device *device)
+{
+ int err = dma_async_device_register(&device->common);
+
+ if (err) {
+ ioat_disable_interrupts(device);
+ pci_pool_destroy(device->completion_pool);
+ pci_pool_destroy(device->dma_pool);
+ }
+
+ return err;
+}
+
+/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
+static void ioat1_intr_quirk(struct ioatdma_device *device)
+{
+ struct pci_dev *pdev = device->pdev;
+ u32 dmactrl;
+
+ pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
+ if (pdev->msi_enabled)
+ dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
+ else
+ dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
+ pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+ return sprintf(page, "%d\n", ioat->desccount);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+ return sprintf(page, "%d\n", ioat->active);
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static ssize_t cap_show(struct dma_chan *c, char *page)
+{
+ struct dma_device *dma = c->device;
+
+ return sprintf(page, "copy%s%s%s%s%s%s\n",
+ dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
+ dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
+ dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
+ dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
+ dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "",
+ dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
+
+}
+struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
+
+static ssize_t version_show(struct dma_chan *c, char *page)
+{
+ struct dma_device *dma = c->device;
+ struct ioatdma_device *device = to_ioatdma_device(dma);
+
+ return sprintf(page, "%d.%d\n",
+ device->version >> 4, device->version & 0xf);
+}
+struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
+
+static struct attribute *ioat1_attrs[] = {
+ &ring_size_attr.attr,
+ &ring_active_attr.attr,
+ &ioat_cap_attr.attr,
+ &ioat_version_attr.attr,
+ NULL,
+};
+
+static ssize_t
+ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct ioat_sysfs_entry *entry;
+ struct ioat_chan_common *chan;
+
+ entry = container_of(attr, struct ioat_sysfs_entry, attr);
+ chan = container_of(kobj, struct ioat_chan_common, kobj);
+
+ if (!entry->show)
+ return -EIO;
+ return entry->show(&chan->common, page);
+}
+
+struct sysfs_ops ioat_sysfs_ops = {
+ .show = ioat_attr_show,
+};
+
+static struct kobj_type ioat1_ktype = {
+ .sysfs_ops = &ioat_sysfs_ops,
+ .default_attrs = ioat1_attrs,
+};
+
+void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
+{
+ struct dma_device *dma = &device->common;
+ struct dma_chan *c;
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ struct ioat_chan_common *chan = to_chan_common(c);
+ struct kobject *parent = &c->dev->device.kobj;
+ int err;
+
+ err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
+ if (err) {
+ dev_warn(to_dev(chan),
+ "sysfs init error (%d), continuing...\n", err);
+ kobject_put(&chan->kobj);
+ set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
+ }
+ }
+}
+
+void ioat_kobject_del(struct ioatdma_device *device)
+{
+ struct dma_device *dma = &device->common;
+ struct dma_chan *c;
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ struct ioat_chan_common *chan = to_chan_common(c);
+
+ if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
+ kobject_del(&chan->kobj);
+ kobject_put(&chan->kobj);
+ }
+ }
+}
+
+int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
+{
+ struct pci_dev *pdev = device->pdev;
+ struct dma_device *dma;
+ int err;
+
+ device->intr_quirk = ioat1_intr_quirk;
+ device->enumerate_channels = ioat1_enumerate_channels;
+ device->self_test = ioat_dma_self_test;
+ dma = &device->common;
+ dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
+ dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
+ dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
+ dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
+ dma->device_is_tx_complete = ioat1_dma_is_complete;
+
+ err = ioat_probe(device);
+ if (err)
+ return err;
+ ioat_set_tcp_copy_break(4096);
+ err = ioat_register(device);
+ if (err)
+ return err;
+ ioat_kobject_add(device, &ioat1_ktype);
+
+ if (dca)
+ device->dca = ioat_dca_init(pdev, device->reg_base);
+
+ return err;
+}
+
+void __devexit ioat_dma_remove(struct ioatdma_device *device)
+{
+ struct dma_device *dma = &device->common;
+
+ ioat_disable_interrupts(device);
+
+ ioat_kobject_del(device);
+
+ dma_async_device_unregister(dma);
+
+ pci_pool_destroy(device->dma_pool);
+ pci_pool_destroy(device->completion_pool);
+
+ INIT_LIST_HEAD(&dma->channels);
+}
--- /dev/null
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_H
+#define IOATDMA_H
+
+#include <linux/dmaengine.h>
+#include "hw.h"
+#include "registers.h"
+#include <linux/init.h>
+#include <linux/dmapool.h>
+#include <linux/cache.h>
+#include <linux/pci_ids.h>
+#include <net/tcp.h>
+
+#define IOAT_DMA_VERSION "4.00"
+
+#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
+#define IOAT_DMA_DCA_ANY_CPU ~0
+
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
+#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
+#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd)
+#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev)
+
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+/**
+ * struct ioatdma_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+ * @reg_base: MMIO register space base address
+ * @dma_pool: for allocating DMA descriptors
+ * @common: embedded struct dma_device
+ * @version: version of ioatdma device
+ * @msix_entries: irq handlers
+ * @idx: per channel data
+ * @dca: direct cache access context
+ * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
+ * @enumerate_channels: hw version specific channel enumeration
+ * @cleanup_tasklet: select between the v2 and v3 cleanup routines
+ * @timer_fn: select between the v2 and v3 timer watchdog routines
+ * @self_test: hardware version specific self test for each supported op type
+ *
+ * Note: the v3 cleanup routine supports raid operations
+ */
+struct ioatdma_device {
+ struct pci_dev *pdev;
+ void __iomem *reg_base;
+ struct pci_pool *dma_pool;
+ struct pci_pool *completion_pool;
+ struct dma_device common;
+ u8 version;
+ struct msix_entry msix_entries[4];
+ struct ioat_chan_common *idx[4];
+ struct dca_provider *dca;
+ void (*intr_quirk)(struct ioatdma_device *device);
+ int (*enumerate_channels)(struct ioatdma_device *device);
+ void (*cleanup_tasklet)(unsigned long data);
+ void (*timer_fn)(unsigned long data);
+ int (*self_test)(struct ioatdma_device *device);
+};
+
+struct ioat_chan_common {
+ struct dma_chan common;
+ void __iomem *reg_base;
+ unsigned long last_completion;
+ spinlock_t cleanup_lock;
+ dma_cookie_t completed_cookie;
+ unsigned long state;
+ #define IOAT_COMPLETION_PENDING 0
+ #define IOAT_COMPLETION_ACK 1
+ #define IOAT_RESET_PENDING 2
+ #define IOAT_KOBJ_INIT_FAIL 3
+ struct timer_list timer;
+ #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
+ #define IDLE_TIMEOUT msecs_to_jiffies(2000)
+ #define RESET_DELAY msecs_to_jiffies(100)
+ struct ioatdma_device *device;
+ dma_addr_t completion_dma;
+ u64 *completion;
+ struct tasklet_struct cleanup_task;
+ struct kobject kobj;
+};
+
+struct ioat_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct dma_chan *, char *);
+};
+
+/**
+ * struct ioat_dma_chan - internal representation of a DMA channel
+ */
+struct ioat_dma_chan {
+ struct ioat_chan_common base;
+
+ size_t xfercap; /* XFERCAP register value expanded out */
+
+ spinlock_t desc_lock;
+ struct list_head free_desc;
+ struct list_head used_desc;
+
+ int pending;
+ u16 desccount;
+ u16 active;
+};
+
+static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
+{
+ return container_of(c, struct ioat_chan_common, common);
+}
+
+static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
+{
+ struct ioat_chan_common *chan = to_chan_common(c);
+
+ return container_of(chan, struct ioat_dma_chan, base);
+}
+
+/**
+ * ioat_is_complete - poll the status of an ioat transaction
+ * @c: channel handle
+ * @cookie: transaction identifier
+ * @done: if set, updated with last completed transaction
+ * @used: if set, updated with last used transaction
+ */
+static inline enum dma_status
+ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ioat_chan_common *chan = to_chan_common(c);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+
+ last_used = c->cookie;
+ last_complete = chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+/* wrapper around hardware descriptor format + additional software fields */
+
+/**
+ * struct ioat_desc_sw - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @node: this descriptor will either be on the free list,
+ * or attached to a transaction list (tx_list)
+ * @txd: the generic software descriptor for all engines
+ * @id: identifier for debug
+ */
+struct ioat_desc_sw {
+ struct ioat_dma_descriptor *hw;
+ struct list_head node;
+ size_t len;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor txd;
+ #ifdef DEBUG
+ int id;
+ #endif
+};
+
+#ifdef DEBUG
+#define set_desc_id(desc, i) ((desc)->id = (i))
+#define desc_id(desc) ((desc)->id)
+#else
+#define set_desc_id(desc, i)
+#define desc_id(desc) (0)
+#endif
+
+static inline void
+__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
+ struct dma_async_tx_descriptor *tx, int id)
+{
+ struct device *dev = to_dev(chan);
+
+ dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
+ " ctl: %#x (op: %d int_en: %d compl: %d)\n", id,
+ (unsigned long long) tx->phys,
+ (unsigned long long) hw->next, tx->cookie, tx->flags,
+ hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
+}
+
+#define dump_desc_dbg(c, d) \
+ ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
+
+static inline void ioat_set_tcp_copy_break(unsigned long copybreak)
+{
+ #ifdef CONFIG_NET_DMA
+ sysctl_tcp_dma_copybreak = copybreak;
+ #endif
+}
+
+static inline struct ioat_chan_common *
+ioat_chan_by_index(struct ioatdma_device *device, int index)
+{
+ return device->idx[index];
+}
+
+static inline u64 ioat_chansts(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+ u64 status;
+ u32 status_lo;
+
+ /* We need to read the low address first as this causes the
+ * chipset to latch the upper bits for the subsequent read
+ */
+ status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
+ status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
+ status <<= 32;
+ status |= status_lo;
+
+ return status;
+}
+
+static inline void ioat_start(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+
+ writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline u64 ioat_chansts_to_addr(u64 status)
+{
+ return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+}
+
+static inline u32 ioat_chanerr(struct ioat_chan_common *chan)
+{
+ return readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+}
+
+static inline void ioat_suspend(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+
+ writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+
+ writel(addr & 0x00000000FFFFFFFF,
+ chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
+ writel(addr >> 32,
+ chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
+}
+
+static inline bool is_ioat_active(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+static inline bool is_ioat_idle(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
+}
+
+static inline bool is_ioat_halted(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
+}
+
+static inline bool is_ioat_suspended(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
+}
+
+/* channel was fatally programmed */
+static inline bool is_ioat_bug(unsigned long err)
+{
+ return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR|
+ IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR|
+ IOAT_CHANERR_LENGTH_ERR));
+}
+
+static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
+ int direction, enum dma_ctrl_flags flags, bool dst)
+{
+ if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
+ (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
+ pci_unmap_single(pdev, addr, len, direction);
+ else
+ pci_unmap_page(pdev, addr, len, direction);
+}
+
+int __devinit ioat_probe(struct ioatdma_device *device);
+int __devinit ioat_register(struct ioatdma_device *device);
+int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
+int __devinit ioat_dma_self_test(struct ioatdma_device *device);
+void __devexit ioat_dma_remove(struct ioatdma_device *device);
+struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
+ void __iomem *iobase);
+unsigned long ioat_get_current_completion(struct ioat_chan_common *chan);
+void ioat_init_channel(struct ioatdma_device *device,
+ struct ioat_chan_common *chan, int idx,
+ void (*timer_fn)(unsigned long),
+ void (*tasklet)(unsigned long),
+ unsigned long ioat);
+void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
+ size_t len, struct ioat_dma_descriptor *hw);
+bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
+ unsigned long *phys_complete);
+void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
+void ioat_kobject_del(struct ioatdma_device *device);
+extern struct sysfs_ops ioat_sysfs_ops;
+extern struct ioat_sysfs_entry ioat_version_attr;
+extern struct ioat_sysfs_entry ioat_cap_attr;
+#endif /* IOATDMA_H */
--- /dev/null
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine (versions >= 2), which
+ * does asynchronous data movement and checksumming operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/i7300_idle.h>
+#include "dma.h"
+#include "dma_v2.h"
+#include "registers.h"
+#include "hw.h"
+
+int ioat_ring_alloc_order = 8;
+module_param(ioat_ring_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_alloc_order,
+ "ioat2+: allocate 2^n descriptors per channel"
+ " (default: 8 max: 16)");
+static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
+module_param(ioat_ring_max_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_max_alloc_order,
+ "ioat2+: upper limit for ring size (default: 16)");
+
+void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
+{
+ void * __iomem reg_base = ioat->base.reg_base;
+
+ ioat->pending = 0;
+ ioat->dmacount += ioat2_ring_pending(ioat);
+ ioat->issued = ioat->head;
+ /* make descriptor updates globally visible before notifying channel */
+ wmb();
+ writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+ dev_dbg(to_dev(&ioat->base),
+ "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
+}
+
+void ioat2_issue_pending(struct dma_chan *chan)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
+
+ spin_lock_bh(&ioat->ring_lock);
+ if (ioat->pending == 1)
+ __ioat2_issue_pending(ioat);
+ spin_unlock_bh(&ioat->ring_lock);
+}
+
+/**
+ * ioat2_update_pending - log pending descriptors
+ * @ioat: ioat2+ channel
+ *
+ * set pending to '1' unless pending is already set to '2', pending == 2
+ * indicates that submission is temporarily blocked due to an in-flight
+ * reset. If we are already above the ioat_pending_level threshold then
+ * just issue pending.
+ *
+ * called with ring_lock held
+ */
+static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
+{
+ if (unlikely(ioat->pending == 2))
+ return;
+ else if (ioat2_ring_pending(ioat) > ioat_pending_level)
+ __ioat2_issue_pending(ioat);
+ else
+ ioat->pending = 1;
+}
+
+static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_ring_ent *desc;
+ struct ioat_dma_descriptor *hw;
+ int idx;
+
+ if (ioat2_ring_space(ioat) < 1) {
+ dev_err(to_dev(&ioat->base),
+ "Unable to start null desc - ring full\n");
+ return;
+ }
+
+ dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued);
+ idx = ioat2_desc_alloc(ioat, 1);
+ desc = ioat2_get_ring_ent(ioat, idx);
+
+ hw = desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = 1;
+ hw->ctl_f.compl_write = 1;
+ /* set size to non-zero value (channel returns error when size is 0) */
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ hw->src_addr = 0;
+ hw->dst_addr = 0;
+ async_tx_ack(&desc->txd);
+ ioat2_set_chainaddr(ioat, desc->txd.phys);
+ dump_desc_dbg(ioat, desc);
+ __ioat2_issue_pending(ioat);
+}
+
+static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
+{
+ spin_lock_bh(&ioat->ring_lock);
+ __ioat2_start_null_desc(ioat);
+ spin_unlock_bh(&ioat->ring_lock);
+}
+
+static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct dma_async_tx_descriptor *tx;
+ struct ioat_ring_ent *desc;
+ bool seen_current = false;
+ u16 active;
+ int i;
+
+ dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued);
+
+ active = ioat2_ring_active(ioat);
+ for (i = 0; i < active && !seen_current; i++) {
+ prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
+ desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+ tx = &desc->txd;
+ dump_desc_dbg(ioat, desc);
+ if (tx->cookie) {
+ ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+ chan->completed_cookie = tx->cookie;
+ tx->cookie = 0;
+ if (tx->callback) {
+ tx->callback(tx->callback_param);
+ tx->callback = NULL;
+ }
+ }
+
+ if (tx->phys == phys_complete)
+ seen_current = true;
+ }
+ ioat->tail += i;
+ BUG_ON(!seen_current); /* no active descs have written a completion? */
+
+ chan->last_completion = phys_complete;
+ if (ioat->head == ioat->tail) {
+ dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
+ __func__);
+ clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+}
+
+/**
+ * ioat2_cleanup - clean finished descriptors (advance tail pointer)
+ * @chan: ioat channel to be cleaned up
+ */
+static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+
+ prefetch(chan->completion);
+
+ if (!spin_trylock_bh(&chan->cleanup_lock))
+ return;
+
+ if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ if (!spin_trylock_bh(&ioat->ring_lock)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ __cleanup(ioat, phys_complete);
+
+ spin_unlock_bh(&ioat->ring_lock);
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+void ioat2_cleanup_tasklet(unsigned long data)
+{
+ struct ioat2_dma_chan *ioat = (void *) data;
+
+ ioat2_cleanup(ioat);
+ writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+
+ /* set the tail to be re-issued */
+ ioat->issued = ioat->tail;
+ ioat->dmacount = 0;
+ set_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+ dev_dbg(to_dev(chan),
+ "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
+
+ if (ioat2_ring_pending(ioat)) {
+ struct ioat_ring_ent *desc;
+
+ desc = ioat2_get_ring_ent(ioat, ioat->tail);
+ ioat2_set_chainaddr(ioat, desc->txd.phys);
+ __ioat2_issue_pending(ioat);
+ } else
+ __ioat2_start_null_desc(ioat);
+}
+
+static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+ u32 status;
+
+ status = ioat_chansts(chan);
+ if (is_ioat_active(status) || is_ioat_idle(status))
+ ioat_suspend(chan);
+ while (is_ioat_active(status) || is_ioat_idle(status)) {
+ status = ioat_chansts(chan);
+ cpu_relax();
+ }
+
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+
+ __ioat2_restart_chan(ioat);
+}
+
+void ioat2_timer_event(unsigned long data)
+{
+ struct ioat2_dma_chan *ioat = (void *) data;
+ struct ioat_chan_common *chan = &ioat->base;
+
+ spin_lock_bh(&chan->cleanup_lock);
+ if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+ unsigned long phys_complete;
+ u64 status;
+
+ spin_lock_bh(&ioat->ring_lock);
+ status = ioat_chansts(chan);
+
+ /* when halted due to errors check for channel
+ * programming errors before advancing the completion state
+ */
+ if (is_ioat_halted(status)) {
+ u32 chanerr;
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ BUG_ON(is_ioat_bug(chanerr));
+ }
+
+ /* if we haven't made progress and we have already
+ * acknowledged a pending completion once, then be more
+ * forceful with a restart
+ */
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+ else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+ ioat2_restart_channel(ioat);
+ else {
+ set_bit(IOAT_COMPLETION_ACK, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ }
+ spin_unlock_bh(&ioat->ring_lock);
+ } else {
+ u16 active;
+
+ /* if the ring is idle, empty, and oversized try to step
+ * down the size
+ */
+ spin_lock_bh(&ioat->ring_lock);
+ active = ioat2_ring_active(ioat);
+ if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
+ reshape_ring(ioat, ioat->alloc_order-1);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ /* keep shrinking until we get back to our minimum
+ * default size
+ */
+ if (ioat->alloc_order > ioat_get_alloc_order())
+ mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+/**
+ * ioat2_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+int ioat2_enumerate_channels(struct ioatdma_device *device)
+{
+ struct ioat2_dma_chan *ioat;
+ struct device *dev = &device->pdev->dev;
+ struct dma_device *dma = &device->common;
+ u8 xfercap_log;
+ int i;
+
+ INIT_LIST_HEAD(&dma->channels);
+ dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+ dma->chancnt &= 0x1f; /* bits [4:0] valid */
+ if (dma->chancnt > ARRAY_SIZE(device->idx)) {
+ dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+ dma->chancnt, ARRAY_SIZE(device->idx));
+ dma->chancnt = ARRAY_SIZE(device->idx);
+ }
+ xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+ xfercap_log &= 0x1f; /* bits [4:0] valid */
+ if (xfercap_log == 0)
+ return 0;
+ dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
+
+ /* FIXME which i/oat version is i7300? */
+#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
+ if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
+ dma->chancnt--;
+#endif
+ for (i = 0; i < dma->chancnt; i++) {
+ ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
+ if (!ioat)
+ break;
+
+ ioat_init_channel(device, &ioat->base, i,
+ device->timer_fn,
+ device->cleanup_tasklet,
+ (unsigned long) ioat);
+ ioat->xfercap_log = xfercap_log;
+ spin_lock_init(&ioat->ring_lock);
+ }
+ dma->chancnt = i;
+ return i;
+}
+
+static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
+{
+ struct dma_chan *c = tx->chan;
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ dma_cookie_t cookie = c->cookie;
+
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ tx->cookie = cookie;
+ c->cookie = cookie;
+ dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+
+ if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ ioat2_update_pending(ioat);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ return cookie;
+}
+
+static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
+{
+ struct ioat_dma_descriptor *hw;
+ struct ioat_ring_ent *desc;
+ struct ioatdma_device *dma;
+ dma_addr_t phys;
+
+ dma = to_ioatdma_device(chan->device);
+ hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
+ if (!hw)
+ return NULL;
+ memset(hw, 0, sizeof(*hw));
+
+ desc = kmem_cache_alloc(ioat2_cache, flags);
+ if (!desc) {
+ pci_pool_free(dma->dma_pool, hw, phys);
+ return NULL;
+ }
+ memset(desc, 0, sizeof(*desc));
+
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ desc->txd.tx_submit = ioat2_tx_submit_unlock;
+ desc->hw = hw;
+ desc->txd.phys = phys;
+ return desc;
+}
+
+static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
+{
+ struct ioatdma_device *dma;
+
+ dma = to_ioatdma_device(chan->device);
+ pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
+ kmem_cache_free(ioat2_cache, desc);
+}
+
+static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
+{
+ struct ioat_ring_ent **ring;
+ int descs = 1 << order;
+ int i;
+
+ if (order > ioat_get_max_alloc_order())
+ return NULL;
+
+ /* allocate the array to hold the software ring */
+ ring = kcalloc(descs, sizeof(*ring), flags);
+ if (!ring)
+ return NULL;
+ for (i = 0; i < descs; i++) {
+ ring[i] = ioat2_alloc_ring_ent(c, flags);
+ if (!ring[i]) {
+ while (i--)
+ ioat2_free_ring_ent(ring[i], c);
+ kfree(ring);
+ return NULL;
+ }
+ set_desc_id(ring[i], i);
+ }
+
+ /* link descs */
+ for (i = 0; i < descs-1; i++) {
+ struct ioat_ring_ent *next = ring[i+1];
+ struct ioat_dma_descriptor *hw = ring[i]->hw;
+
+ hw->next = next->txd.phys;
+ }
+ ring[i]->hw->next = ring[0]->txd.phys;
+
+ return ring;
+}
+
+/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
+ * @chan: channel to be initialized
+ */
+int ioat2_alloc_chan_resources(struct dma_chan *c)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_ring_ent **ring;
+ u32 chanerr;
+ int order;
+
+ /* have we already been set up? */
+ if (ioat->ring)
+ return 1 << ioat->alloc_order;
+
+ /* Setup register to interrupt and write completion status on error */
+ writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ if (chanerr) {
+ dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
+ writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+ }
+
+ /* allocate a completion writeback area */
+ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+ chan->completion = pci_pool_alloc(chan->device->completion_pool,
+ GFP_KERNEL, &chan->completion_dma);
+ if (!chan->completion)
+ return -ENOMEM;
+
+ memset(chan->completion, 0, sizeof(*chan->completion));
+ writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+ writel(((u64) chan->completion_dma) >> 32,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+ order = ioat_get_alloc_order();
+ ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
+ if (!ring)
+ return -ENOMEM;
+
+ spin_lock_bh(&ioat->ring_lock);
+ ioat->ring = ring;
+ ioat->head = 0;
+ ioat->issued = 0;
+ ioat->tail = 0;
+ ioat->pending = 0;
+ ioat->alloc_order = order;
+ spin_unlock_bh(&ioat->ring_lock);
+
+ tasklet_enable(&chan->cleanup_task);
+ ioat2_start_null_desc(ioat);
+
+ return 1 << ioat->alloc_order;
+}
+
+bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
+{
+ /* reshape differs from normal ring allocation in that we want
+ * to allocate a new software ring while only
+ * extending/truncating the hardware ring
+ */
+ struct ioat_chan_common *chan = &ioat->base;
+ struct dma_chan *c = &chan->common;
+ const u16 curr_size = ioat2_ring_mask(ioat) + 1;
+ const u16 active = ioat2_ring_active(ioat);
+ const u16 new_size = 1 << order;
+ struct ioat_ring_ent **ring;
+ u16 i;
+
+ if (order > ioat_get_max_alloc_order())
+ return false;
+
+ /* double check that we have at least 1 free descriptor */
+ if (active == curr_size)
+ return false;
+
+ /* when shrinking, verify that we can hold the current active
+ * set in the new ring
+ */
+ if (active >= new_size)
+ return false;
+
+ /* allocate the array to hold the software ring */
+ ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
+ if (!ring)
+ return false;
+
+ /* allocate/trim descriptors as needed */
+ if (new_size > curr_size) {
+ /* copy current descriptors to the new ring */
+ for (i = 0; i < curr_size; i++) {
+ u16 curr_idx = (ioat->tail+i) & (curr_size-1);
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+ ring[new_idx] = ioat->ring[curr_idx];
+ set_desc_id(ring[new_idx], new_idx);
+ }
+
+ /* add new descriptors to the ring */
+ for (i = curr_size; i < new_size; i++) {
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+ ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
+ if (!ring[new_idx]) {
+ while (i--) {
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+ ioat2_free_ring_ent(ring[new_idx], c);
+ }
+ kfree(ring);
+ return false;
+ }
+ set_desc_id(ring[new_idx], new_idx);
+ }
+
+ /* hw link new descriptors */
+ for (i = curr_size-1; i < new_size; i++) {
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+ struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
+ struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
+
+ hw->next = next->txd.phys;
+ }
+ } else {
+ struct ioat_dma_descriptor *hw;
+ struct ioat_ring_ent *next;
+
+ /* copy current descriptors to the new ring, dropping the
+ * removed descriptors
+ */
+ for (i = 0; i < new_size; i++) {
+ u16 curr_idx = (ioat->tail+i) & (curr_size-1);
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+ ring[new_idx] = ioat->ring[curr_idx];
+ set_desc_id(ring[new_idx], new_idx);
+ }
+
+ /* free deleted descriptors */
+ for (i = new_size; i < curr_size; i++) {
+ struct ioat_ring_ent *ent;
+
+ ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
+ ioat2_free_ring_ent(ent, c);
+ }
+
+ /* fix up hardware ring */
+ hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
+ next = ring[(ioat->tail+new_size) & (new_size-1)];
+ hw->next = next->txd.phys;
+ }
+
+ dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+ __func__, new_size);
+
+ kfree(ioat->ring);
+ ioat->ring = ring;
+ ioat->alloc_order = order;
+
+ return true;
+}
+
+/**
+ * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops
+ * @idx: gets starting descriptor index on successful allocation
+ * @ioat: ioat2,3 channel (ring) to operate on
+ * @num_descs: allocation length
+ */
+int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+
+ spin_lock_bh(&ioat->ring_lock);
+ /* never allow the last descriptor to be consumed, we need at
+ * least one free at all times to allow for on-the-fly ring
+ * resizing.
+ */
+ while (unlikely(ioat2_ring_space(ioat) <= num_descs)) {
+ if (reshape_ring(ioat, ioat->alloc_order + 1) &&
+ ioat2_ring_space(ioat) > num_descs)
+ break;
+
+ if (printk_ratelimit())
+ dev_dbg(to_dev(chan),
+ "%s: ring full! num_descs: %d (%x:%x:%x)\n",
+ __func__, num_descs, ioat->head, ioat->tail,
+ ioat->issued);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ /* progress reclaim in the allocation failure case we
+ * may be called under bh_disabled so we need to trigger
+ * the timer event directly
+ */
+ spin_lock_bh(&chan->cleanup_lock);
+ if (jiffies > chan->timer.expires &&
+ timer_pending(&chan->timer)) {
+ struct ioatdma_device *device = chan->device;
+
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ spin_unlock_bh(&chan->cleanup_lock);
+ device->timer_fn((unsigned long) ioat);
+ } else
+ spin_unlock_bh(&chan->cleanup_lock);
+ return -ENOMEM;
+ }
+
+ dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
+ __func__, num_descs, ioat->head, ioat->tail, ioat->issued);
+
+ *idx = ioat2_desc_alloc(ioat, num_descs);
+ return 0; /* with ioat->ring_lock held */
+}
+
+struct dma_async_tx_descriptor *
+ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_dma_descriptor *hw;
+ struct ioat_ring_ent *desc;
+ dma_addr_t dst = dma_dest;
+ dma_addr_t src = dma_src;
+ size_t total_len = len;
+ int num_descs;
+ u16 idx;
+ int i;
+
+ num_descs = ioat2_xferlen_to_descs(ioat, len);
+ if (likely(num_descs) &&
+ ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
+ /* pass */;
+ else
+ return NULL;
+ i = 0;
+ do {
+ size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+ desc = ioat2_get_ring_ent(ioat, idx + i);
+ hw = desc->hw;
+
+ hw->size = copy;
+ hw->ctl = 0;
+ hw->src_addr = src;
+ hw->dst_addr = dst;
+
+ len -= copy;
+ dst += copy;
+ src += copy;
+ dump_desc_dbg(ioat, desc);
+ } while (++i < num_descs);
+
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ hw->ctl_f.compl_write = 1;
+ dump_desc_dbg(ioat, desc);
+ /* we leave the channel locked to ensure in order submission */
+
+ return &desc->txd;
+}
+
+/**
+ * ioat2_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+void ioat2_free_chan_resources(struct dma_chan *c)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioatdma_device *device = chan->device;
+ struct ioat_ring_ent *desc;
+ const u16 total_descs = 1 << ioat->alloc_order;
+ int descs;
+ int i;
+
+ /* Before freeing channel resources first check
+ * if they have been previously allocated for this channel.
+ */
+ if (!ioat->ring)
+ return;
+
+ tasklet_disable(&chan->cleanup_task);
+ del_timer_sync(&chan->timer);
+ device->cleanup_tasklet((unsigned long) ioat);
+
+ /* Delay 100ms after reset to allow internal DMA logic to quiesce
+ * before removing DMA descriptor resources.
+ */
+ writeb(IOAT_CHANCMD_RESET,
+ chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+ mdelay(100);
+
+ spin_lock_bh(&ioat->ring_lock);
+ descs = ioat2_ring_space(ioat);
+ dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
+ for (i = 0; i < descs; i++) {
+ desc = ioat2_get_ring_ent(ioat, ioat->head + i);
+ ioat2_free_ring_ent(desc, c);
+ }
+
+ if (descs < total_descs)
+ dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
+ total_descs - descs);
+
+ for (i = 0; i < total_descs - descs; i++) {
+ desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+ dump_desc_dbg(ioat, desc);
+ ioat2_free_ring_ent(desc, c);
+ }
+
+ kfree(ioat->ring);
+ ioat->ring = NULL;
+ ioat->alloc_order = 0;
+ pci_pool_free(device->completion_pool, chan->completion,
+ chan->completion_dma);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ chan->last_completion = 0;
+ chan->completion_dma = 0;
+ ioat->pending = 0;
+ ioat->dmacount = 0;
+}
+
+enum dma_status
+ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioatdma_device *device = ioat->base.device;
+
+ if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+ return DMA_SUCCESS;
+
+ device->cleanup_tasklet((unsigned long) ioat);
+
+ return ioat_is_complete(c, cookie, done, used);
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+ return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+ /* ...taken outside the lock, no need to be precise */
+ return sprintf(page, "%d\n", ioat2_ring_active(ioat));
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static struct attribute *ioat2_attrs[] = {
+ &ring_size_attr.attr,
+ &ring_active_attr.attr,
+ &ioat_cap_attr.attr,
+ &ioat_version_attr.attr,
+ NULL,
+};
+
+struct kobj_type ioat2_ktype = {
+ .sysfs_ops = &ioat_sysfs_ops,
+ .default_attrs = ioat2_attrs,
+};
+
+int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
+{
+ struct pci_dev *pdev = device->pdev;
+ struct dma_device *dma;
+ struct dma_chan *c;
+ struct ioat_chan_common *chan;
+ int err;
+
+ device->enumerate_channels = ioat2_enumerate_channels;
+ device->cleanup_tasklet = ioat2_cleanup_tasklet;
+ device->timer_fn = ioat2_timer_event;
+ device->self_test = ioat_dma_self_test;
+ dma = &device->common;
+ dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
+ dma->device_issue_pending = ioat2_issue_pending;
+ dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
+ dma->device_free_chan_resources = ioat2_free_chan_resources;
+ dma->device_is_tx_complete = ioat2_is_complete;
+
+ err = ioat_probe(device);
+ if (err)
+ return err;
+ ioat_set_tcp_copy_break(2048);
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ chan = to_chan_common(c);
+ writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
+ chan->reg_base + IOAT_DCACTRL_OFFSET);
+ }
+
+ err = ioat_register(device);
+ if (err)
+ return err;
+
+ ioat_kobject_add(device, &ioat2_ktype);
+
+ if (dca)
+ device->dca = ioat2_dca_init(pdev, device->reg_base);
+
+ return err;
+}
--- /dev/null
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_V2_H
+#define IOATDMA_V2_H
+
+#include <linux/dmaengine.h>
+#include "dma.h"
+#include "hw.h"
+
+
+extern int ioat_pending_level;
+extern int ioat_ring_alloc_order;
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+#define IOAT_MAX_ORDER 16
+#define ioat_get_alloc_order() \
+ (min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
+#define ioat_get_max_alloc_order() \
+ (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
+
+/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes
+ * @base: common ioat channel parameters
+ * @xfercap_log; log2 of channel max transfer length (for fast division)
+ * @head: allocated index
+ * @issued: hardware notification point
+ * @tail: cleanup index
+ * @pending: lock free indicator for issued != head
+ * @dmacount: identical to 'head' except for occasionally resetting to zero
+ * @alloc_order: log2 of the number of allocated descriptors
+ * @ring: software ring buffer implementation of hardware ring
+ * @ring_lock: protects ring attributes
+ */
+struct ioat2_dma_chan {
+ struct ioat_chan_common base;
+ size_t xfercap_log;
+ u16 head;
+ u16 issued;
+ u16 tail;
+ u16 dmacount;
+ u16 alloc_order;
+ int pending;
+ struct ioat_ring_ent **ring;
+ spinlock_t ring_lock;
+};
+
+static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c)
+{
+ struct ioat_chan_common *chan = to_chan_common(c);
+
+ return container_of(chan, struct ioat2_dma_chan, base);
+}
+
+static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat)
+{
+ return (1 << ioat->alloc_order) - 1;
+}
+
+/* count of descriptors in flight with the engine */
+static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat)
+{
+ return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat);
+}
+
+/* count of descriptors pending submission to hardware */
+static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat)
+{
+ return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat);
+}
+
+static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat)
+{
+ u16 num_descs = ioat2_ring_mask(ioat) + 1;
+ u16 active = ioat2_ring_active(ioat);
+
+ BUG_ON(active > num_descs);
+
+ return num_descs - active;
+}
+
+/* assumes caller already checked space */
+static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len)
+{
+ ioat->head += len;
+ return ioat->head - len;
+}
+
+static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len)
+{
+ u16 num_descs = len >> ioat->xfercap_log;
+
+ num_descs += !!(len & ((1 << ioat->xfercap_log) - 1));
+ return num_descs;
+}
+
+/**
+ * struct ioat_ring_ent - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @fill: hardware fill descriptor
+ * @xor: hardware xor descriptor
+ * @xor_ex: hardware xor extension descriptor
+ * @pq: hardware pq descriptor
+ * @pq_ex: hardware pq extension descriptor
+ * @pqu: hardware pq update descriptor
+ * @raw: hardware raw (un-typed) descriptor
+ * @txd: the generic software descriptor for all engines
+ * @len: total transaction length for unmap
+ * @result: asynchronous result of validate operations
+ * @id: identifier for debug
+ */
+
+struct ioat_ring_ent {
+ union {
+ struct ioat_dma_descriptor *hw;
+ struct ioat_fill_descriptor *fill;
+ struct ioat_xor_descriptor *xor;
+ struct ioat_xor_ext_descriptor *xor_ex;
+ struct ioat_pq_descriptor *pq;
+ struct ioat_pq_ext_descriptor *pq_ex;
+ struct ioat_pq_update_descriptor *pqu;
+ struct ioat_raw_descriptor *raw;
+ };
+ size_t len;
+ struct dma_async_tx_descriptor txd;
+ enum sum_check_flags *result;
+ #ifdef DEBUG
+ int id;
+ #endif
+};
+
+static inline struct ioat_ring_ent *
+ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx)
+{
+ return ioat->ring[idx & ioat2_ring_mask(ioat)];
+}
+
+static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+
+ writel(addr & 0x00000000FFFFFFFF,
+ chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+ writel(addr >> 32,
+ chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+}
+
+int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca);
+int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
+struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
+int ioat2_enumerate_channels(struct ioatdma_device *device);
+struct dma_async_tx_descriptor *
+ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags);
+void ioat2_issue_pending(struct dma_chan *chan);
+int ioat2_alloc_chan_resources(struct dma_chan *c);
+void ioat2_free_chan_resources(struct dma_chan *c);
+enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used);
+void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
+bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
+void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
+void ioat2_cleanup_tasklet(unsigned long data);
+void ioat2_timer_event(unsigned long data);
+extern struct kobj_type ioat2_ktype;
+extern struct kmem_cache *ioat2_cache;
+#endif /* IOATDMA_V2_H */
--- /dev/null
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Support routines for v3+ hardware
+ */
+
+#include <linux/pci.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include "registers.h"
+#include "hw.h"
+#include "dma.h"
+#include "dma_v2.h"
+
+/* ioat hardware assumes at least two sources for raid operations */
+#define src_cnt_to_sw(x) ((x) + 2)
+#define src_cnt_to_hw(x) ((x) - 2)
+
+/* provide a lookup table for setting the source address in the base or
+ * extended descriptor of an xor or pq descriptor
+ */
+static const u8 xor_idx_to_desc __read_mostly = 0xd0;
+static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 };
+static const u8 pq_idx_to_desc __read_mostly = 0xf8;
+static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 };
+
+static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+ struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+ return raw->field[xor_idx_to_field[idx]];
+}
+
+static void xor_set_src(struct ioat_raw_descriptor *descs[2],
+ dma_addr_t addr, u32 offset, int idx)
+{
+ struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+ raw->field[xor_idx_to_field[idx]] = addr + offset;
+}
+
+static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+ struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+ return raw->field[pq_idx_to_field[idx]];
+}
+
+static void pq_set_src(struct ioat_raw_descriptor *descs[2],
+ dma_addr_t addr, u32 offset, u8 coef, int idx)
+{
+ struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
+ struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+ raw->field[pq_idx_to_field[idx]] = addr + offset;
+ pq->coef[idx] = coef;
+}
+
+static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
+ struct ioat_ring_ent *desc, int idx)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct pci_dev *pdev = chan->device->pdev;
+ size_t len = desc->len;
+ size_t offset = len - desc->hw->size;
+ struct dma_async_tx_descriptor *tx = &desc->txd;
+ enum dma_ctrl_flags flags = tx->flags;
+
+ switch (desc->hw->ctl_f.op) {
+ case IOAT_OP_COPY:
+ if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
+ ioat_dma_unmap(chan, flags, len, desc->hw);
+ break;
+ case IOAT_OP_FILL: {
+ struct ioat_fill_descriptor *hw = desc->fill;
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+ ioat_unmap(pdev, hw->dst_addr - offset, len,
+ PCI_DMA_FROMDEVICE, flags, 1);
+ break;
+ }
+ case IOAT_OP_XOR_VAL:
+ case IOAT_OP_XOR: {
+ struct ioat_xor_descriptor *xor = desc->xor;
+ struct ioat_ring_ent *ext;
+ struct ioat_xor_ext_descriptor *xor_ex = NULL;
+ int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
+ struct ioat_raw_descriptor *descs[2];
+ int i;
+
+ if (src_cnt > 5) {
+ ext = ioat2_get_ring_ent(ioat, idx + 1);
+ xor_ex = ext->xor_ex;
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ descs[0] = (struct ioat_raw_descriptor *) xor;
+ descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+ for (i = 0; i < src_cnt; i++) {
+ dma_addr_t src = xor_get_src(descs, i);
+
+ ioat_unmap(pdev, src - offset, len,
+ PCI_DMA_TODEVICE, flags, 0);
+ }
+
+ /* dest is a source in xor validate operations */
+ if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
+ ioat_unmap(pdev, xor->dst_addr - offset, len,
+ PCI_DMA_TODEVICE, flags, 1);
+ break;
+ }
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+ ioat_unmap(pdev, xor->dst_addr - offset, len,
+ PCI_DMA_FROMDEVICE, flags, 1);
+ break;
+ }
+ case IOAT_OP_PQ_VAL:
+ case IOAT_OP_PQ: {
+ struct ioat_pq_descriptor *pq = desc->pq;
+ struct ioat_ring_ent *ext;
+ struct ioat_pq_ext_descriptor *pq_ex = NULL;
+ int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+ struct ioat_raw_descriptor *descs[2];
+ int i;
+
+ if (src_cnt > 3) {
+ ext = ioat2_get_ring_ent(ioat, idx + 1);
+ pq_ex = ext->pq_ex;
+ }
+
+ /* in the 'continue' case don't unmap the dests as sources */
+ if (dmaf_p_disabled_continue(flags))
+ src_cnt--;
+ else if (dmaf_continue(flags))
+ src_cnt -= 3;
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ descs[0] = (struct ioat_raw_descriptor *) pq;
+ descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+ for (i = 0; i < src_cnt; i++) {
+ dma_addr_t src = pq_get_src(descs, i);
+
+ ioat_unmap(pdev, src - offset, len,
+ PCI_DMA_TODEVICE, flags, 0);
+ }
+
+ /* the dests are sources in pq validate operations */
+ if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
+ if (!(flags & DMA_PREP_PQ_DISABLE_P))
+ ioat_unmap(pdev, pq->p_addr - offset,
+ len, PCI_DMA_TODEVICE, flags, 0);
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ ioat_unmap(pdev, pq->q_addr - offset,
+ len, PCI_DMA_TODEVICE, flags, 0);
+ break;
+ }
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ if (!(flags & DMA_PREP_PQ_DISABLE_P))
+ ioat_unmap(pdev, pq->p_addr - offset, len,
+ PCI_DMA_BIDIRECTIONAL, flags, 1);
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ ioat_unmap(pdev, pq->q_addr - offset, len,
+ PCI_DMA_BIDIRECTIONAL, flags, 1);
+ }
+ break;
+ }
+ default:
+ dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
+ __func__, desc->hw->ctl_f.op);
+ }
+}
+
+static bool desc_has_ext(struct ioat_ring_ent *desc)
+{
+ struct ioat_dma_descriptor *hw = desc->hw;
+
+ if (hw->ctl_f.op == IOAT_OP_XOR ||
+ hw->ctl_f.op == IOAT_OP_XOR_VAL) {
+ struct ioat_xor_descriptor *xor = desc->xor;
+
+ if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
+ return true;
+ } else if (hw->ctl_f.op == IOAT_OP_PQ ||
+ hw->ctl_f.op == IOAT_OP_PQ_VAL) {
+ struct ioat_pq_descriptor *pq = desc->pq;
+
+ if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * __cleanup - reclaim used descriptors
+ * @ioat: channel (ring) to clean
+ *
+ * The difference from the dma_v2.c __cleanup() is that this routine
+ * handles extended descriptors and dma-unmapping raid operations.
+ */
+static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_ring_ent *desc;
+ bool seen_current = false;
+ u16 active;
+ int i;
+
+ dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued);
+
+ active = ioat2_ring_active(ioat);
+ for (i = 0; i < active && !seen_current; i++) {
+ struct dma_async_tx_descriptor *tx;
+
+ prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
+ desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+ dump_desc_dbg(ioat, desc);
+ tx = &desc->txd;
+ if (tx->cookie) {
+ chan->completed_cookie = tx->cookie;
+ ioat3_dma_unmap(ioat, desc, ioat->tail + i);
+ tx->cookie = 0;
+ if (tx->callback) {
+ tx->callback(tx->callback_param);
+ tx->callback = NULL;
+ }
+ }
+
+ if (tx->phys == phys_complete)
+ seen_current = true;
+
+ /* skip extended descriptors */
+ if (desc_has_ext(desc)) {
+ BUG_ON(i + 1 >= active);
+ i++;
+ }
+ }
+ ioat->tail += i;
+ BUG_ON(!seen_current); /* no active descs have written a completion? */
+ chan->last_completion = phys_complete;
+ if (ioat->head == ioat->tail) {
+ dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
+ __func__);
+ clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+}
+
+static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+
+ prefetch(chan->completion);
+
+ if (!spin_trylock_bh(&chan->cleanup_lock))
+ return;
+
+ if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ if (!spin_trylock_bh(&ioat->ring_lock)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ __cleanup(ioat, phys_complete);
+
+ spin_unlock_bh(&ioat->ring_lock);
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat3_cleanup_tasklet(unsigned long data)
+{
+ struct ioat2_dma_chan *ioat = (void *) data;
+
+ ioat3_cleanup(ioat);
+ writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN,
+ ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+ u32 status;
+
+ status = ioat_chansts(chan);
+ if (is_ioat_active(status) || is_ioat_idle(status))
+ ioat_suspend(chan);
+ while (is_ioat_active(status) || is_ioat_idle(status)) {
+ status = ioat_chansts(chan);
+ cpu_relax();
+ }
+
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+
+ __ioat2_restart_chan(ioat);
+}
+
+static void ioat3_timer_event(unsigned long data)
+{
+ struct ioat2_dma_chan *ioat = (void *) data;
+ struct ioat_chan_common *chan = &ioat->base;
+
+ spin_lock_bh(&chan->cleanup_lock);
+ if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+ unsigned long phys_complete;
+ u64 status;
+
+ spin_lock_bh(&ioat->ring_lock);
+ status = ioat_chansts(chan);
+
+ /* when halted due to errors check for channel
+ * programming errors before advancing the completion state
+ */
+ if (is_ioat_halted(status)) {
+ u32 chanerr;
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ BUG_ON(is_ioat_bug(chanerr));
+ }
+
+ /* if we haven't made progress and we have already
+ * acknowledged a pending completion once, then be more
+ * forceful with a restart
+ */
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+ else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+ ioat3_restart_channel(ioat);
+ else {
+ set_bit(IOAT_COMPLETION_ACK, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ }
+ spin_unlock_bh(&ioat->ring_lock);
+ } else {
+ u16 active;
+
+ /* if the ring is idle, empty, and oversized try to step
+ * down the size
+ */
+ spin_lock_bh(&ioat->ring_lock);
+ active = ioat2_ring_active(ioat);
+ if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
+ reshape_ring(ioat, ioat->alloc_order-1);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ /* keep shrinking until we get back to our minimum
+ * default size
+ */
+ if (ioat->alloc_order > ioat_get_alloc_order())
+ mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static enum dma_status
+ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+ if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+ return DMA_SUCCESS;
+
+ ioat3_cleanup(ioat);
+
+ return ioat_is_complete(c, cookie, done, used);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_ring_ent *desc;
+ size_t total_len = len;
+ struct ioat_fill_descriptor *fill;
+ int num_descs;
+ u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
+ u16 idx;
+ int i;
+
+ num_descs = ioat2_xferlen_to_descs(ioat, len);
+ if (likely(num_descs) &&
+ ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
+ /* pass */;
+ else
+ return NULL;
+ i = 0;
+ do {
+ size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+ desc = ioat2_get_ring_ent(ioat, idx + i);
+ fill = desc->fill;
+
+ fill->size = xfer_size;
+ fill->src_data = src_data;
+ fill->dst_addr = dest;
+ fill->ctl = 0;
+ fill->ctl_f.op = IOAT_OP_FILL;
+
+ len -= xfer_size;
+ dest += xfer_size;
+ dump_desc_dbg(ioat, desc);
+ } while (++i < num_descs);
+
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ fill->ctl_f.compl_write = 1;
+ dump_desc_dbg(ioat, desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
+ dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
+ size_t len, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_ring_ent *compl_desc;
+ struct ioat_ring_ent *desc;
+ struct ioat_ring_ent *ext;
+ size_t total_len = len;
+ struct ioat_xor_descriptor *xor;
+ struct ioat_xor_ext_descriptor *xor_ex = NULL;
+ struct ioat_dma_descriptor *hw;
+ u32 offset = 0;
+ int num_descs;
+ int with_ext;
+ int i;
+ u16 idx;
+ u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
+
+ BUG_ON(src_cnt < 2);
+
+ num_descs = ioat2_xferlen_to_descs(ioat, len);
+ /* we need 2x the number of descriptors to cover greater than 5
+ * sources
+ */
+ if (src_cnt > 5) {
+ with_ext = 1;
+ num_descs *= 2;
+ } else
+ with_ext = 0;
+
+ /* completion writes from the raid engine may pass completion
+ * writes from the legacy engine, so we need one extra null
+ * (legacy) descriptor to ensure all completion writes arrive in
+ * order.
+ */
+ if (likely(num_descs) &&
+ ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
+ /* pass */;
+ else
+ return NULL;
+ i = 0;
+ do {
+ struct ioat_raw_descriptor *descs[2];
+ size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+ int s;
+
+ desc = ioat2_get_ring_ent(ioat, idx + i);
+ xor = desc->xor;
+
+ /* save a branch by unconditionally retrieving the
+ * extended descriptor xor_set_src() knows to not write
+ * to it in the single descriptor case
+ */
+ ext = ioat2_get_ring_ent(ioat, idx + i + 1);
+ xor_ex = ext->xor_ex;
+
+ descs[0] = (struct ioat_raw_descriptor *) xor;
+ descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+ for (s = 0; s < src_cnt; s++)
+ xor_set_src(descs, src[s], offset, s);
+ xor->size = xfer_size;
+ xor->dst_addr = dest + offset;
+ xor->ctl = 0;
+ xor->ctl_f.op = op;
+ xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
+
+ len -= xfer_size;
+ offset += xfer_size;
+ dump_desc_dbg(ioat, desc);
+ } while ((i += 1 + with_ext) < num_descs);
+
+ /* last xor descriptor carries the unmap parameters and fence bit */
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ if (result)
+ desc->result = result;
+ xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+ /* completion descriptor carries interrupt bit */
+ compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+ compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+ hw = compl_desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.compl_write = 1;
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ dump_desc_dbg(ioat, compl_desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+ unsigned int src_cnt, size_t len,
+ enum sum_check_flags *result, unsigned long flags)
+{
+ /* the cleanup routine only sets bits on validate failure, it
+ * does not clear bits on validate success... so clear it here
+ */
+ *result = 0;
+
+ return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
+ src_cnt - 1, len, flags);
+}
+
+static void
+dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
+{
+ struct device *dev = to_dev(&ioat->base);
+ struct ioat_pq_descriptor *pq = desc->pq;
+ struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
+ struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
+ int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+ int i;
+
+ dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+ " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
+ desc_id(desc), (unsigned long long) desc->txd.phys,
+ (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
+ desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
+ pq->ctl_f.compl_write,
+ pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+ pq->ctl_f.src_cnt);
+ for (i = 0; i < src_cnt; i++)
+ dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+ (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
+ dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+ dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+}
+
+static struct dma_async_tx_descriptor *
+__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
+ const dma_addr_t *dst, const dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_ring_ent *compl_desc;
+ struct ioat_ring_ent *desc;
+ struct ioat_ring_ent *ext;
+ size_t total_len = len;
+ struct ioat_pq_descriptor *pq;
+ struct ioat_pq_ext_descriptor *pq_ex = NULL;
+ struct ioat_dma_descriptor *hw;
+ u32 offset = 0;
+ int num_descs;
+ int with_ext;
+ int i, s;
+ u16 idx;
+ u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
+
+ dev_dbg(to_dev(chan), "%s\n", __func__);
+ /* the engine requires at least two sources (we provide
+ * at least 1 implied source in the DMA_PREP_CONTINUE case)
+ */
+ BUG_ON(src_cnt + dmaf_continue(flags) < 2);
+
+ num_descs = ioat2_xferlen_to_descs(ioat, len);
+ /* we need 2x the number of descriptors to cover greater than 3
+ * sources
+ */
+ if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) {
+ with_ext = 1;
+ num_descs *= 2;
+ } else
+ with_ext = 0;
+
+ /* completion writes from the raid engine may pass completion
+ * writes from the legacy engine, so we need one extra null
+ * (legacy) descriptor to ensure all completion writes arrive in
+ * order.
+ */
+ if (likely(num_descs) &&
+ ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
+ /* pass */;
+ else
+ return NULL;
+ i = 0;
+ do {
+ struct ioat_raw_descriptor *descs[2];
+ size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+ desc = ioat2_get_ring_ent(ioat, idx + i);
+ pq = desc->pq;
+
+ /* save a branch by unconditionally retrieving the
+ * extended descriptor pq_set_src() knows to not write
+ * to it in the single descriptor case
+ */
+ ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
+ pq_ex = ext->pq_ex;
+
+ descs[0] = (struct ioat_raw_descriptor *) pq;
+ descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+
+ for (s = 0; s < src_cnt; s++)
+ pq_set_src(descs, src[s], offset, scf[s], s);
+
+ /* see the comment for dma_maxpq in include/linux/dmaengine.h */
+ if (dmaf_p_disabled_continue(flags))
+ pq_set_src(descs, dst[1], offset, 1, s++);
+ else if (dmaf_continue(flags)) {
+ pq_set_src(descs, dst[0], offset, 0, s++);
+ pq_set_src(descs, dst[1], offset, 1, s++);
+ pq_set_src(descs, dst[1], offset, 0, s++);
+ }
+ pq->size = xfer_size;
+ pq->p_addr = dst[0] + offset;
+ pq->q_addr = dst[1] + offset;
+ pq->ctl = 0;
+ pq->ctl_f.op = op;
+ pq->ctl_f.src_cnt = src_cnt_to_hw(s);
+ pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+ pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+ len -= xfer_size;
+ offset += xfer_size;
+ } while ((i += 1 + with_ext) < num_descs);
+
+ /* last pq descriptor carries the unmap parameters and fence bit */
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ if (result)
+ desc->result = result;
+ pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ dump_pq_desc_dbg(ioat, desc, ext);
+
+ /* completion descriptor carries interrupt bit */
+ compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+ compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+ hw = compl_desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.compl_write = 1;
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ dump_desc_dbg(ioat, compl_desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ unsigned long flags)
+{
+ /* handle the single source multiply case from the raid6
+ * recovery path
+ */
+ if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) {
+ dma_addr_t single_source[2];
+ unsigned char single_source_coef[2];
+
+ BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
+ single_source[0] = src[0];
+ single_source[1] = src[0];
+ single_source_coef[0] = scf[0];
+ single_source_coef[1] = 0;
+
+ return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
+ single_source_coef, len, flags);
+ } else
+ return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
+ len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ enum sum_check_flags *pqres, unsigned long flags)
+{
+ /* the cleanup routine only sets bits on validate failure, it
+ * does not clear bits on validate success... so clear it here
+ */
+ *pqres = 0;
+
+ return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
+ flags);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ unsigned char scf[src_cnt];
+ dma_addr_t pq[2];
+
+ memset(scf, 0, src_cnt);
+ flags |= DMA_PREP_PQ_DISABLE_Q;
+ pq[0] = dst;
+ pq[1] = ~0;
+
+ return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
+ flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+ unsigned int src_cnt, size_t len,
+ enum sum_check_flags *result, unsigned long flags)
+{
+ unsigned char scf[src_cnt];
+ dma_addr_t pq[2];
+
+ /* the cleanup routine only sets bits on validate failure, it
+ * does not clear bits on validate success... so clear it here
+ */
+ *result = 0;
+
+ memset(scf, 0, src_cnt);
+ flags |= DMA_PREP_PQ_DISABLE_Q;
+ pq[0] = src[0];
+ pq[1] = ~0;
+
+ return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
+ len, flags);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_ring_ent *desc;
+ struct ioat_dma_descriptor *hw;
+ u16 idx;
+
+ if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0)
+ desc = ioat2_get_ring_ent(ioat, idx);
+ else
+ return NULL;
+
+ hw = desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = 1;
+ hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ hw->ctl_f.compl_write = 1;
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ hw->src_addr = 0;
+ hw->dst_addr = 0;
+
+ desc->txd.flags = flags;
+ desc->len = 1;
+
+ dump_desc_dbg(ioat, desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &desc->txd;
+}
+
+static void __devinit ioat3_dma_test_callback(void *dma_async_param)
+{
+ struct completion *cmp = dma_async_param;
+
+ complete(cmp);
+}
+
+#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
+static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[IOAT_NUM_SRC_TEST];
+ struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
+ dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
+ dma_addr_t dma_addr, dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ u32 xor_val_result;
+ int err = 0;
+ struct completion cmp;
+ unsigned long tmo;
+ struct device *dev = &device->pdev->dev;
+ struct dma_device *dma = &device->common;
+
+ dev_dbg(dev, "%s\n", __func__);
+
+ if (!dma_has_cap(DMA_XOR, dma->cap_mask))
+ return 0;
+
+ for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx]) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(dma->channels.next, struct dma_chan,
+ device_node);
+ if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+ dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ IOAT_NUM_SRC_TEST, PAGE_SIZE,
+ DMA_PREP_INTERRUPT);
+
+ if (!tx) {
+ dev_err(dev, "Self-test xor prep failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat3_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test xor setup failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_err(dev, "Self-test xor timed out\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_err(dev, "Self-test xor failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+ dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
+
+ /* skip validate if the capability is not present */
+ if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+ goto free_resources;
+
+ /* validate the sources with the destintation page */
+ for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+ xor_val_srcs[i] = xor_srcs[i];
+ xor_val_srcs[i] = dest;
+
+ xor_val_result = 1;
+
+ for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+ dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &xor_val_result, DMA_PREP_INTERRUPT);
+ if (!tx) {
+ dev_err(dev, "Self-test zero prep failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat3_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test zero setup failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_err(dev, "Self-test validate timed out\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (xor_val_result != 0) {
+ dev_err(dev, "Self-test validate failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* skip memset if the capability is not present */
+ if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
+ goto free_resources;
+
+ /* test memset */
+ dma_addr = dma_map_page(dev, dest, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
+ DMA_PREP_INTERRUPT);
+ if (!tx) {
+ dev_err(dev, "Self-test memset prep failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat3_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test memset setup failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_err(dev, "Self-test memset timed out\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i]) {
+ dev_err(dev, "Self-test memset failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+
+ /* test for non-zero parity sum */
+ xor_val_result = 0;
+ for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+ dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &xor_val_result, DMA_PREP_INTERRUPT);
+ if (!tx) {
+ dev_err(dev, "Self-test 2nd zero prep failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat3_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test 2nd zero setup failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_err(dev, "Self-test 2nd validate timed out\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (xor_val_result != SUM_CHECK_P_RESULT) {
+ dev_err(dev, "Self-test validate failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ dma->device_free_chan_resources(dma_chan);
+out:
+ src_idx = IOAT_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
+{
+ int rc = ioat_dma_self_test(device);
+
+ if (rc)
+ return rc;
+
+ rc = ioat_xor_val_self_test(device);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
+{
+ struct pci_dev *pdev = device->pdev;
+ struct dma_device *dma;
+ struct dma_chan *c;
+ struct ioat_chan_common *chan;
+ bool is_raid_device = false;
+ int err;
+ u16 dev_id;
+ u32 cap;
+
+ device->enumerate_channels = ioat2_enumerate_channels;
+ device->self_test = ioat3_dma_self_test;
+ dma = &device->common;
+ dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
+ dma->device_issue_pending = ioat2_issue_pending;
+ dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
+ dma->device_free_chan_resources = ioat2_free_chan_resources;
+
+ dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
+ dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
+
+ cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
+ if (cap & IOAT_CAP_XOR) {
+ is_raid_device = true;
+ dma->max_xor = 8;
+ dma->xor_align = 2;
+
+ dma_cap_set(DMA_XOR, dma->cap_mask);
+ dma->device_prep_dma_xor = ioat3_prep_xor;
+
+ dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+ dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
+ }
+ if (cap & IOAT_CAP_PQ) {
+ is_raid_device = true;
+ dma_set_maxpq(dma, 8, 0);
+ dma->pq_align = 2;
+
+ dma_cap_set(DMA_PQ, dma->cap_mask);
+ dma->device_prep_dma_pq = ioat3_prep_pq;
+
+ dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
+ dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
+
+ if (!(cap & IOAT_CAP_XOR)) {
+ dma->max_xor = 8;
+ dma->xor_align = 2;
+
+ dma_cap_set(DMA_XOR, dma->cap_mask);
+ dma->device_prep_dma_xor = ioat3_prep_pqxor;
+
+ dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+ dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
+ }
+ }
+ if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
+ dma_cap_set(DMA_MEMSET, dma->cap_mask);
+ dma->device_prep_dma_memset = ioat3_prep_memset_lock;
+ }
+
+
+ if (is_raid_device) {
+ dma->device_is_tx_complete = ioat3_is_complete;
+ device->cleanup_tasklet = ioat3_cleanup_tasklet;
+ device->timer_fn = ioat3_timer_event;
+ } else {
+ dma->device_is_tx_complete = ioat2_is_complete;
+ device->cleanup_tasklet = ioat2_cleanup_tasklet;
+ device->timer_fn = ioat2_timer_event;
+ }
+
+ /* -= IOAT ver.3 workarounds =- */
+ /* Write CHANERRMSK_INT with 3E07h to mask out the errors
+ * that can cause stability issues for IOAT ver.3
+ */
+ pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
+
+ /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+ * (workaround for spurious config parity error after restart)
+ */
+ pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
+ if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
+ pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
+
+ err = ioat_probe(device);
+ if (err)
+ return err;
+ ioat_set_tcp_copy_break(262144);
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ chan = to_chan_common(c);
+ writel(IOAT_DMA_DCA_ANY_CPU,
+ chan->reg_base + IOAT_DCACTRL_OFFSET);
+ }
+
+ err = ioat_register(device);
+ if (err)
+ return err;
+
+ ioat_kobject_add(device, &ioat2_ktype);
+
+ if (dca)
+ device->dca = ioat3_dca_init(pdev, device->reg_base);
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_HW_H_
+#define _IOAT_HW_H_
+
+/* PCI Configuration Space Values */
+#define IOAT_PCI_VID 0x8086
+#define IOAT_MMIO_BAR 0
+
+/* CB device ID's */
+#define IOAT_PCI_DID_5000 0x1A38
+#define IOAT_PCI_DID_CNB 0x360B
+#define IOAT_PCI_DID_SCNB 0x65FF
+#define IOAT_PCI_DID_SNB 0x402F
+
+#define IOAT_PCI_RID 0x00
+#define IOAT_PCI_SVID 0x8086
+#define IOAT_PCI_SID 0x8086
+#define IOAT_VER_1_2 0x12 /* Version 1.2 */
+#define IOAT_VER_2_0 0x20 /* Version 2.0 */
+#define IOAT_VER_3_0 0x30 /* Version 3.0 */
+#define IOAT_VER_3_2 0x32 /* Version 3.2 */
+
+struct ioat_dma_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int null:1;
+ unsigned int src_brk:1;
+ unsigned int dest_brk:1;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int rsvd2:13;
+ #define IOAT_OP_COPY 0x00
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t dst_addr;
+ uint64_t next;
+ uint64_t rsv1;
+ uint64_t rsv2;
+ /* store some driver data in an unused portion of the descriptor */
+ union {
+ uint64_t user1;
+ uint64_t tx_cnt;
+ };
+ uint64_t user2;
+};
+
+struct ioat_fill_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int rsvd:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int rsvd2:2;
+ unsigned int dest_brk:1;
+ unsigned int bundle:1;
+ unsigned int rsvd4:15;
+ #define IOAT_OP_FILL 0x01
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_data;
+ uint64_t dst_addr;
+ uint64_t next;
+ uint64_t rsv1;
+ uint64_t next_dst_addr;
+ uint64_t user1;
+ uint64_t user2;
+};
+
+struct ioat_xor_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int src_cnt:3;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int rsvd:13;
+ #define IOAT_OP_XOR 0x87
+ #define IOAT_OP_XOR_VAL 0x88
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t dst_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t src_addr3;
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+};
+
+struct ioat_xor_ext_descriptor {
+ uint64_t src_addr6;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t next;
+ uint64_t rsvd[4];
+};
+
+struct ioat_pq_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int src_cnt:3;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int p_disable:1;
+ unsigned int q_disable:1;
+ unsigned int rsvd:11;
+ #define IOAT_OP_PQ 0x89
+ #define IOAT_OP_PQ_VAL 0x8a
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t p_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t src_addr3;
+ uint8_t coef[8];
+ uint64_t q_addr;
+};
+
+struct ioat_pq_ext_descriptor {
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+ uint64_t src_addr6;
+ uint64_t next;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t rsvd[2];
+};
+
+struct ioat_pq_update_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int src_cnt:3;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int p_disable:1;
+ unsigned int q_disable:1;
+ unsigned int rsvd:3;
+ unsigned int coef:8;
+ #define IOAT_OP_PQ_UP 0x8b
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t p_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t p_src;
+ uint64_t q_src;
+ uint64_t q_addr;
+};
+
+struct ioat_raw_descriptor {
+ uint64_t field[8];
+};
+#endif
--- /dev/null
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+#include "dma.h"
+#include "dma_v2.h"
+#include "registers.h"
+#include "hw.h"
+
+MODULE_VERSION(IOAT_DMA_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static struct pci_device_id ioat_pci_tbl[] = {
+ /* I/OAT v1 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
+ { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
+
+ /* I/OAT v2 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
+
+ /* I/OAT v3 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+ /* I/OAT v3.2 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
+
+static int __devinit ioat_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id);
+static void __devexit ioat_remove(struct pci_dev *pdev);
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+
+struct kmem_cache *ioat2_cache;
+
+#define DRV_NAME "ioatdma"
+
+static struct pci_driver ioat_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = ioat_pci_tbl,
+ .probe = ioat_pci_probe,
+ .remove = __devexit_p(ioat_remove),
+};
+
+static struct ioatdma_device *
+alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct device *dev = &pdev->dev;
+ struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+
+ if (!d)
+ return NULL;
+ d->pdev = pdev;
+ d->reg_base = iobase;
+ return d;
+}
+
+static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ void __iomem * const *iomap;
+ struct device *dev = &pdev->dev;
+ struct ioatdma_device *device;
+ int err;
+
+ err = pcim_enable_device(pdev);
+ if (err)
+ return err;
+
+ err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
+ if (err)
+ return err;
+ iomap = pcim_iomap_table(pdev);
+ if (!iomap)
+ return -ENOMEM;
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err)
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err)
+ return err;
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err)
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err)
+ return err;
+
+ device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL);
+ if (!device)
+ return -ENOMEM;
+
+ pci_set_master(pdev);
+
+ device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
+ if (!device)
+ return -ENOMEM;
+ pci_set_drvdata(pdev, device);
+
+ device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+ if (device->version == IOAT_VER_1_2)
+ err = ioat1_dma_probe(device, ioat_dca_enabled);
+ else if (device->version == IOAT_VER_2_0)
+ err = ioat2_dma_probe(device, ioat_dca_enabled);
+ else if (device->version >= IOAT_VER_3_0)
+ err = ioat3_dma_probe(device, ioat_dca_enabled);
+ else
+ return -ENODEV;
+
+ if (err) {
+ dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static void __devexit ioat_remove(struct pci_dev *pdev)
+{
+ struct ioatdma_device *device = pci_get_drvdata(pdev);
+
+ if (!device)
+ return;
+
+ dev_err(&pdev->dev, "Removing dma and dca services\n");
+ if (device->dca) {
+ unregister_dca_provider(device->dca, &pdev->dev);
+ free_dca_provider(device->dca);
+ device->dca = NULL;
+ }
+ ioat_dma_remove(device);
+}
+
+static int __init ioat_init_module(void)
+{
+ int err;
+
+ pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
+ DRV_NAME, IOAT_DMA_VERSION);
+
+ ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!ioat2_cache)
+ return -ENOMEM;
+
+ err = pci_register_driver(&ioat_pci_driver);
+ if (err)
+ kmem_cache_destroy(ioat2_cache);
+
+ return err;
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+ pci_unregister_driver(&ioat_pci_driver);
+ kmem_cache_destroy(ioat2_cache);
+}
+module_exit(ioat_exit_module);
--- /dev/null
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define IOAT_PCI_DMACTRL_OFFSET 0x48
+#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET 0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */
+
+#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */
+#define IOAT_XFERCAP_4KB 12
+#define IOAT_XFERCAP_8KB 13
+#define IOAT_XFERCAP_16KB 14
+#define IOAT_XFERCAP_32KB 15
+#define IOAT_XFERCAP_32GB 0
+
+#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */
+#define IOAT_GENCTRL_DEBUG_EN 0x01
+
+#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */
+
+#define IOAT_VER_OFFSET 0x08 /* 8-bit */
+#define IOAT_VER_MAJOR_MASK 0xF0
+#define IOAT_VER_MINOR_MASK 0x0F
+#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */
+#define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
+#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002
+#define IOAT_DEVICE_MEMORY_BYPASS 0x0004
+#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008
+
+#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */
+#define IOAT_CAP_PAGE_BREAK 0x00000001
+#define IOAT_CAP_CRC 0x00000002
+#define IOAT_CAP_SKIP_MARKER 0x00000004
+#define IOAT_CAP_DCA 0x00000010
+#define IOAT_CAP_CRC_MOVE 0x00000020
+#define IOAT_CAP_FILL_BLOCK 0x00000040
+#define IOAT_CAP_APIC 0x00000080
+#define IOAT_CAP_XOR 0x00000100
+#define IOAT_CAP_PQ 0x00000200
+
+#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
+#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
+#define IOAT_CHANCTRL_INT_REARM 0x0001
+#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\
+ IOAT_CHANCTRL_ERR_COMPLETION_EN |\
+ IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\
+ IOAT_CHANCTRL_ERR_INT_EN)
+
+#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */
+
+
+#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */
+#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
+#define IOAT1_CHANSTS_OFFSET_LOW 0x04
+#define IOAT2_CHANSTS_OFFSET_LOW 0x08
+#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
+#define IOAT1_CHANSTS_OFFSET_HIGH 0x08
+#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C
+#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
+#define IOAT_CHANSTS_SOFT_ERR 0x10ULL
+#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL
+#define IOAT_CHANSTS_STATUS 0x7ULL
+#define IOAT_CHANSTS_ACTIVE 0x0
+#define IOAT_CHANSTS_DONE 0x1
+#define IOAT_CHANSTS_SUSPENDED 0x2
+#define IOAT_CHANSTS_HALTED 0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET 0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET 0x00
+#define IOAT_DCA_VER_MAJOR_MASK 0xF0
+#define IOAT_DCA_VER_MINOR_MASK 0x0F
+
+#define IOAT_DCA_COMP_OFFSET 0x02
+#define IOAT_DCA_COMP_V1 0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET 0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH 0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET 0x06
+#define IOAT_PCI_CAPABILITY_MEMWR 0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET 0x0C
+#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID 0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET 0x10
+#define IOAT_DCA_GREQID_SIZE 0x04
+#define IOAT_DCA_GREQID_MASK 0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000
+#define IOAT_DCA_GREQID_VALID 0x20000000
+#define IOAT_DCA_GREQID_LASTID 0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR 0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET 0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH 0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET 0x0E
+#define IOAT3_PCI_CONTROL_MEMWR 0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET 0x02
+
+#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW 0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET 0x20
+#define IOAT_CHANCMD_RESUME 0x10
+#define IOAT_CHANCMD_ABORT 0x08
+#define IOAT_CHANCMD_SUSPEND 0x04
+#define IOAT_CHANCMD_APPEND 0x02
+#define IOAT_CHANCMD_START 0x01
+
+#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW 0x18
+#define IOAT_CHANCMP_OFFSET_HIGH 0x1C
+
+#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW 0x20
+#define IOAT_CDAR_OFFSET_HIGH 0x24
+
+#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */
+#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001
+#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002
+#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004
+#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010
+#define IOAT_CHANERR_CHANCMD_ERR 0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080
+#define IOAT_CHANERR_READ_DATA_ERR 0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200
+#define IOAT_CHANERR_CONTROL_ERR 0x0400
+#define IOAT_CHANERR_LENGTH_ERR 0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
+#define IOAT_CHANERR_SOFT_ERR 0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
+#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000
+#define IOAT_CHANERR_XOR_Q_ERR 0x20000
+#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000
+
+#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+
+#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */
+
+#endif /* _IOAT_REGISTERS_H_ */
+++ /dev/null
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2007 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/dca.h>
-
-/* either a kernel change is needed, or we need something like this in kernel */
-#ifndef CONFIG_SMP
-#include <asm/smp.h>
-#undef cpu_physical_id
-#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
-#endif
-
-#include "ioatdma.h"
-#include "ioatdma_registers.h"
-
-/*
- * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
- * contain the bit number of the APIC ID to map into the DCA tag. If the valid
- * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
- */
-#define DCA_TAG_MAP_VALID 0x80
-
-#define DCA3_TAG_MAP_BIT_TO_INV 0x80
-#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
-#define DCA3_TAG_MAP_LITERAL_VAL 0x1
-
-#define DCA_TAG_MAP_MASK 0xDF
-
-/* expected tag map bytes for I/OAT ver.2 */
-#define DCA2_TAG_MAP_BYTE0 0x80
-#define DCA2_TAG_MAP_BYTE1 0x0
-#define DCA2_TAG_MAP_BYTE2 0x81
-#define DCA2_TAG_MAP_BYTE3 0x82
-#define DCA2_TAG_MAP_BYTE4 0x82
-
-/* verify if tag map matches expected values */
-static inline int dca2_tag_map_valid(u8 *tag_map)
-{
- return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) &&
- (tag_map[1] == DCA2_TAG_MAP_BYTE1) &&
- (tag_map[2] == DCA2_TAG_MAP_BYTE2) &&
- (tag_map[3] == DCA2_TAG_MAP_BYTE3) &&
- (tag_map[4] == DCA2_TAG_MAP_BYTE4));
-}
-
-/*
- * "Legacy" DCA systems do not implement the DCA register set in the
- * I/OAT device. Software needs direct support for their tag mappings.
- */
-
-#define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x))
-#define IOAT_TAG_MAP_LEN 8
-
-static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
- 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
-static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
- 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
-static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
- 1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
-static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
-
-/* pack PCI B/D/F into a u16 */
-static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
-{
- return (pci->bus->number << 8) | pci->devfn;
-}
-
-static int dca_enabled_in_bios(struct pci_dev *pdev)
-{
- /* CPUID level 9 returns DCA configuration */
- /* Bit 0 indicates DCA enabled by the BIOS */
- unsigned long cpuid_level_9;
- int res;
-
- cpuid_level_9 = cpuid_eax(9);
- res = test_bit(0, &cpuid_level_9);
- if (!res)
- dev_err(&pdev->dev, "DCA is disabled in BIOS\n");
-
- return res;
-}
-
-static int system_has_dca_enabled(struct pci_dev *pdev)
-{
- if (boot_cpu_has(X86_FEATURE_DCA))
- return dca_enabled_in_bios(pdev);
-
- dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
- return 0;
-}
-
-struct ioat_dca_slot {
- struct pci_dev *pdev; /* requester device */
- u16 rid; /* requester id, as used by IOAT */
-};
-
-#define IOAT_DCA_MAX_REQ 6
-#define IOAT3_DCA_MAX_REQ 2
-
-struct ioat_dca_priv {
- void __iomem *iobase;
- void __iomem *dca_base;
- int max_requesters;
- int requester_count;
- u8 tag_map[IOAT_TAG_MAP_LEN];
- struct ioat_dca_slot req_slots[0];
-};
-
-/* 5000 series chipset DCA Port Requester ID Table Entry Format
- * [15:8] PCI-Express Bus Number
- * [7:3] PCI-Express Device Number
- * [2:0] PCI-Express Function Number
- *
- * 5000 series chipset DCA control register format
- * [7:1] Reserved (0)
- * [0] Ignore Function Number
- */
-
-static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
-{
- struct ioat_dca_priv *ioatdca = dca_priv(dca);
- struct pci_dev *pdev;
- int i;
- u16 id;
-
- /* This implementation only supports PCI-Express */
- if (dev->bus != &pci_bus_type)
- return -ENODEV;
- pdev = to_pci_dev(dev);
- id = dcaid_from_pcidev(pdev);
-
- if (ioatdca->requester_count == ioatdca->max_requesters)
- return -ENODEV;
-
- for (i = 0; i < ioatdca->max_requesters; i++) {
- if (ioatdca->req_slots[i].pdev == NULL) {
- /* found an empty slot */
- ioatdca->requester_count++;
- ioatdca->req_slots[i].pdev = pdev;
- ioatdca->req_slots[i].rid = id;
- writew(id, ioatdca->dca_base + (i * 4));
- /* make sure the ignore function bit is off */
- writeb(0, ioatdca->dca_base + (i * 4) + 2);
- return i;
- }
- }
- /* Error, ioatdma->requester_count is out of whack */
- return -EFAULT;
-}
-
-static int ioat_dca_remove_requester(struct dca_provider *dca,
- struct device *dev)
-{
- struct ioat_dca_priv *ioatdca = dca_priv(dca);
- struct pci_dev *pdev;
- int i;
-
- /* This implementation only supports PCI-Express */
- if (dev->bus != &pci_bus_type)
- return -ENODEV;
- pdev = to_pci_dev(dev);
-
- for (i = 0; i < ioatdca->max_requesters; i++) {
- if (ioatdca->req_slots[i].pdev == pdev) {
- writew(0, ioatdca->dca_base + (i * 4));
- ioatdca->req_slots[i].pdev = NULL;
- ioatdca->req_slots[i].rid = 0;
- ioatdca->requester_count--;
- return i;
- }
- }
- return -ENODEV;
-}
-
-static u8 ioat_dca_get_tag(struct dca_provider *dca,
- struct device *dev,
- int cpu)
-{
- struct ioat_dca_priv *ioatdca = dca_priv(dca);
- int i, apic_id, bit, value;
- u8 entry, tag;
-
- tag = 0;
- apic_id = cpu_physical_id(cpu);
-
- for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
- entry = ioatdca->tag_map[i];
- if (entry & DCA_TAG_MAP_VALID) {
- bit = entry & ~DCA_TAG_MAP_VALID;
- value = (apic_id & (1 << bit)) ? 1 : 0;
- } else {
- value = entry ? 1 : 0;
- }
- tag |= (value << i);
- }
- return tag;
-}
-
-static int ioat_dca_dev_managed(struct dca_provider *dca,
- struct device *dev)
-{
- struct ioat_dca_priv *ioatdca = dca_priv(dca);
- struct pci_dev *pdev;
- int i;
-
- pdev = to_pci_dev(dev);
- for (i = 0; i < ioatdca->max_requesters; i++) {
- if (ioatdca->req_slots[i].pdev == pdev)
- return 1;
- }
- return 0;
-}
-
-static struct dca_ops ioat_dca_ops = {
- .add_requester = ioat_dca_add_requester,
- .remove_requester = ioat_dca_remove_requester,
- .get_tag = ioat_dca_get_tag,
- .dev_managed = ioat_dca_dev_managed,
-};
-
-
-struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
-{
- struct dca_provider *dca;
- struct ioat_dca_priv *ioatdca;
- u8 *tag_map = NULL;
- int i;
- int err;
- u8 version;
- u8 max_requesters;
-
- if (!system_has_dca_enabled(pdev))
- return NULL;
-
- /* I/OAT v1 systems must have a known tag_map to support DCA */
- switch (pdev->vendor) {
- case PCI_VENDOR_ID_INTEL:
- switch (pdev->device) {
- case PCI_DEVICE_ID_INTEL_IOAT:
- tag_map = ioat_tag_map_BNB;
- break;
- case PCI_DEVICE_ID_INTEL_IOAT_CNB:
- tag_map = ioat_tag_map_CNB;
- break;
- case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
- tag_map = ioat_tag_map_SCNB;
- break;
- }
- break;
- case PCI_VENDOR_ID_UNISYS:
- switch (pdev->device) {
- case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
- tag_map = ioat_tag_map_UNISYS;
- break;
- }
- break;
- }
- if (tag_map == NULL)
- return NULL;
-
- version = readb(iobase + IOAT_VER_OFFSET);
- if (version == IOAT_VER_3_0)
- max_requesters = IOAT3_DCA_MAX_REQ;
- else
- max_requesters = IOAT_DCA_MAX_REQ;
-
- dca = alloc_dca_provider(&ioat_dca_ops,
- sizeof(*ioatdca) +
- (sizeof(struct ioat_dca_slot) * max_requesters));
- if (!dca)
- return NULL;
-
- ioatdca = dca_priv(dca);
- ioatdca->max_requesters = max_requesters;
- ioatdca->dca_base = iobase + 0x54;
-
- /* copy over the APIC ID to DCA tag mapping */
- for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
- ioatdca->tag_map[i] = tag_map[i];
-
- err = register_dca_provider(dca, &pdev->dev);
- if (err) {
- free_dca_provider(dca);
- return NULL;
- }
-
- return dca;
-}
-
-
-static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
-{
- struct ioat_dca_priv *ioatdca = dca_priv(dca);
- struct pci_dev *pdev;
- int i;
- u16 id;
- u16 global_req_table;
-
- /* This implementation only supports PCI-Express */
- if (dev->bus != &pci_bus_type)
- return -ENODEV;
- pdev = to_pci_dev(dev);
- id = dcaid_from_pcidev(pdev);
-
- if (ioatdca->requester_count == ioatdca->max_requesters)
- return -ENODEV;
-
- for (i = 0; i < ioatdca->max_requesters; i++) {
- if (ioatdca->req_slots[i].pdev == NULL) {
- /* found an empty slot */
- ioatdca->requester_count++;
- ioatdca->req_slots[i].pdev = pdev;
- ioatdca->req_slots[i].rid = id;
- global_req_table =
- readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
- writel(id | IOAT_DCA_GREQID_VALID,
- ioatdca->iobase + global_req_table + (i * 4));
- return i;
- }
- }
- /* Error, ioatdma->requester_count is out of whack */
- return -EFAULT;
-}
-
-static int ioat2_dca_remove_requester(struct dca_provider *dca,
- struct device *dev)
-{
- struct ioat_dca_priv *ioatdca = dca_priv(dca);
- struct pci_dev *pdev;
- int i;
- u16 global_req_table;
-
- /* This implementation only supports PCI-Express */
- if (dev->bus != &pci_bus_type)
- return -ENODEV;
- pdev = to_pci_dev(dev);
-
- for (i = 0; i < ioatdca->max_requesters; i++) {
- if (ioatdca->req_slots[i].pdev == pdev) {
- global_req_table =
- readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
- writel(0, ioatdca->iobase + global_req_table + (i * 4));
- ioatdca->req_slots[i].pdev = NULL;
- ioatdca->req_slots[i].rid = 0;
- ioatdca->requester_count--;
- return i;
- }
- }
- return -ENODEV;
-}
-
-static u8 ioat2_dca_get_tag(struct dca_provider *dca,
- struct device *dev,
- int cpu)
-{
- u8 tag;
-
- tag = ioat_dca_get_tag(dca, dev, cpu);
- tag = (~tag) & 0x1F;
- return tag;
-}
-
-static struct dca_ops ioat2_dca_ops = {
- .add_requester = ioat2_dca_add_requester,
- .remove_requester = ioat2_dca_remove_requester,
- .get_tag = ioat2_dca_get_tag,
- .dev_managed = ioat_dca_dev_managed,
-};
-
-static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
-{
- int slots = 0;
- u32 req;
- u16 global_req_table;
-
- global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
- if (global_req_table == 0)
- return 0;
- do {
- req = readl(iobase + global_req_table + (slots * sizeof(u32)));
- slots++;
- } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
-
- return slots;
-}
-
-struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
-{
- struct dca_provider *dca;
- struct ioat_dca_priv *ioatdca;
- int slots;
- int i;
- int err;
- u32 tag_map;
- u16 dca_offset;
- u16 csi_fsb_control;
- u16 pcie_control;
- u8 bit;
-
- if (!system_has_dca_enabled(pdev))
- return NULL;
-
- dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
- if (dca_offset == 0)
- return NULL;
-
- slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
- if (slots == 0)
- return NULL;
-
- dca = alloc_dca_provider(&ioat2_dca_ops,
- sizeof(*ioatdca)
- + (sizeof(struct ioat_dca_slot) * slots));
- if (!dca)
- return NULL;
-
- ioatdca = dca_priv(dca);
- ioatdca->iobase = iobase;
- ioatdca->dca_base = iobase + dca_offset;
- ioatdca->max_requesters = slots;
-
- /* some bios might not know to turn these on */
- csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
- if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
- csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
- writew(csi_fsb_control,
- ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
- }
- pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
- if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
- pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
- writew(pcie_control,
- ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
- }
-
-
- /* TODO version, compatibility and configuration checks */
-
- /* copy out the APIC to DCA tag map */
- tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
- for (i = 0; i < 5; i++) {
- bit = (tag_map >> (4 * i)) & 0x0f;
- if (bit < 8)
- ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
- else
- ioatdca->tag_map[i] = 0;
- }
-
- if (!dca2_tag_map_valid(ioatdca->tag_map)) {
- dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, "
- "disabling DCA\n");
- free_dca_provider(dca);
- return NULL;
- }
-
- err = register_dca_provider(dca, &pdev->dev);
- if (err) {
- free_dca_provider(dca);
- return NULL;
- }
-
- return dca;
-}
-
-static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
-{
- struct ioat_dca_priv *ioatdca = dca_priv(dca);
- struct pci_dev *pdev;
- int i;
- u16 id;
- u16 global_req_table;
-
- /* This implementation only supports PCI-Express */
- if (dev->bus != &pci_bus_type)
- return -ENODEV;
- pdev = to_pci_dev(dev);
- id = dcaid_from_pcidev(pdev);
-
- if (ioatdca->requester_count == ioatdca->max_requesters)
- return -ENODEV;
-
- for (i = 0; i < ioatdca->max_requesters; i++) {
- if (ioatdca->req_slots[i].pdev == NULL) {
- /* found an empty slot */
- ioatdca->requester_count++;
- ioatdca->req_slots[i].pdev = pdev;
- ioatdca->req_slots[i].rid = id;
- global_req_table =
- readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
- writel(id | IOAT_DCA_GREQID_VALID,
- ioatdca->iobase + global_req_table + (i * 4));
- return i;
- }
- }
- /* Error, ioatdma->requester_count is out of whack */
- return -EFAULT;
-}
-
-static int ioat3_dca_remove_requester(struct dca_provider *dca,
- struct device *dev)
-{
- struct ioat_dca_priv *ioatdca = dca_priv(dca);
- struct pci_dev *pdev;
- int i;
- u16 global_req_table;
-
- /* This implementation only supports PCI-Express */
- if (dev->bus != &pci_bus_type)
- return -ENODEV;
- pdev = to_pci_dev(dev);
-
- for (i = 0; i < ioatdca->max_requesters; i++) {
- if (ioatdca->req_slots[i].pdev == pdev) {
- global_req_table =
- readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
- writel(0, ioatdca->iobase + global_req_table + (i * 4));
- ioatdca->req_slots[i].pdev = NULL;
- ioatdca->req_slots[i].rid = 0;
- ioatdca->requester_count--;
- return i;
- }
- }
- return -ENODEV;
-}
-
-static u8 ioat3_dca_get_tag(struct dca_provider *dca,
- struct device *dev,
- int cpu)
-{
- u8 tag;
-
- struct ioat_dca_priv *ioatdca = dca_priv(dca);
- int i, apic_id, bit, value;
- u8 entry;
-
- tag = 0;
- apic_id = cpu_physical_id(cpu);
-
- for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
- entry = ioatdca->tag_map[i];
- if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
- bit = entry &
- ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
- value = (apic_id & (1 << bit)) ? 1 : 0;
- } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
- bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
- value = (apic_id & (1 << bit)) ? 0 : 1;
- } else {
- value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
- }
- tag |= (value << i);
- }
-
- return tag;
-}
-
-static struct dca_ops ioat3_dca_ops = {
- .add_requester = ioat3_dca_add_requester,
- .remove_requester = ioat3_dca_remove_requester,
- .get_tag = ioat3_dca_get_tag,
- .dev_managed = ioat_dca_dev_managed,
-};
-
-static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
-{
- int slots = 0;
- u32 req;
- u16 global_req_table;
-
- global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
- if (global_req_table == 0)
- return 0;
-
- do {
- req = readl(iobase + global_req_table + (slots * sizeof(u32)));
- slots++;
- } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
-
- return slots;
-}
-
-struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
-{
- struct dca_provider *dca;
- struct ioat_dca_priv *ioatdca;
- int slots;
- int i;
- int err;
- u16 dca_offset;
- u16 csi_fsb_control;
- u16 pcie_control;
- u8 bit;
-
- union {
- u64 full;
- struct {
- u32 low;
- u32 high;
- };
- } tag_map;
-
- if (!system_has_dca_enabled(pdev))
- return NULL;
-
- dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
- if (dca_offset == 0)
- return NULL;
-
- slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
- if (slots == 0)
- return NULL;
-
- dca = alloc_dca_provider(&ioat3_dca_ops,
- sizeof(*ioatdca)
- + (sizeof(struct ioat_dca_slot) * slots));
- if (!dca)
- return NULL;
-
- ioatdca = dca_priv(dca);
- ioatdca->iobase = iobase;
- ioatdca->dca_base = iobase + dca_offset;
- ioatdca->max_requesters = slots;
-
- /* some bios might not know to turn these on */
- csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
- if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
- csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
- writew(csi_fsb_control,
- ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
- }
- pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
- if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
- pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
- writew(pcie_control,
- ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
- }
-
-
- /* TODO version, compatibility and configuration checks */
-
- /* copy out the APIC to DCA tag map */
- tag_map.low =
- readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
- tag_map.high =
- readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
- for (i = 0; i < 8; i++) {
- bit = tag_map.full >> (8 * i);
- ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
- }
-
- err = register_dca_provider(dca, &pdev->dev);
- if (err) {
- free_dca_provider(dca);
- return NULL;
- }
-
- return dca;
-}
+++ /dev/null
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2004 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-/*
- * This driver supports an Intel I/OAT DMA engine, which does asynchronous
- * copy operations.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/workqueue.h>
-#include <linux/i7300_idle.h>
-#include "ioatdma.h"
-#include "ioatdma_registers.h"
-#include "ioatdma_hw.h"
-
-#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
-#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
-#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
-#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
-
-#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
-static int ioat_pending_level = 4;
-module_param(ioat_pending_level, int, 0644);
-MODULE_PARM_DESC(ioat_pending_level,
- "high-water mark for pushing ioat descriptors (default: 4)");
-
-#define RESET_DELAY msecs_to_jiffies(100)
-#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000))
-static void ioat_dma_chan_reset_part2(struct work_struct *work);
-static void ioat_dma_chan_watchdog(struct work_struct *work);
-
-/*
- * workaround for IOAT ver.3.0 null descriptor issue
- * (channel returns error when size is 0)
- */
-#define NULL_DESC_BUFFER_SIZE 1
-
-/* internal functions */
-static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
-static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
-
-static struct ioat_desc_sw *
-ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
-static struct ioat_desc_sw *
-ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
-
-static inline struct ioat_dma_chan *ioat_lookup_chan_by_index(
- struct ioatdma_device *device,
- int index)
-{
- return device->idx[index];
-}
-
-/**
- * ioat_dma_do_interrupt - handler used for single vector interrupt mode
- * @irq: interrupt id
- * @data: interrupt data
- */
-static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
-{
- struct ioatdma_device *instance = data;
- struct ioat_dma_chan *ioat_chan;
- unsigned long attnstatus;
- int bit;
- u8 intrctrl;
-
- intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
-
- if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
- return IRQ_NONE;
-
- if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
- writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
- return IRQ_NONE;
- }
-
- attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
- for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
- ioat_chan = ioat_lookup_chan_by_index(instance, bit);
- tasklet_schedule(&ioat_chan->cleanup_task);
- }
-
- writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
- return IRQ_HANDLED;
-}
-
-/**
- * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
- * @irq: interrupt id
- * @data: interrupt data
- */
-static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
-{
- struct ioat_dma_chan *ioat_chan = data;
-
- tasklet_schedule(&ioat_chan->cleanup_task);
-
- return IRQ_HANDLED;
-}
-
-static void ioat_dma_cleanup_tasklet(unsigned long data);
-
-/**
- * ioat_dma_enumerate_channels - find and initialize the device's channels
- * @device: the device to be enumerated
- */
-static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
-{
- u8 xfercap_scale;
- u32 xfercap;
- int i;
- struct ioat_dma_chan *ioat_chan;
-
- /*
- * IOAT ver.3 workarounds
- */
- if (device->version == IOAT_VER_3_0) {
- u32 chan_err_mask;
- u16 dev_id;
- u32 dmauncerrsts;
-
- /*
- * Write CHANERRMSK_INT with 3E07h to mask out the errors
- * that can cause stability issues for IOAT ver.3
- */
- chan_err_mask = 0x3E07;
- pci_write_config_dword(device->pdev,
- IOAT_PCI_CHANERRMASK_INT_OFFSET,
- chan_err_mask);
-
- /*
- * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
- * (workaround for spurious config parity error after restart)
- */
- pci_read_config_word(device->pdev,
- IOAT_PCI_DEVICE_ID_OFFSET,
- &dev_id);
- if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
- dmauncerrsts = 0x10;
- pci_write_config_dword(device->pdev,
- IOAT_PCI_DMAUNCERRSTS_OFFSET,
- dmauncerrsts);
- }
- }
-
- device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
- xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
- xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
-
-#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
- if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) {
- device->common.chancnt--;
- }
-#endif
- for (i = 0; i < device->common.chancnt; i++) {
- ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
- if (!ioat_chan) {
- device->common.chancnt = i;
- break;
- }
-
- ioat_chan->device = device;
- ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
- ioat_chan->xfercap = xfercap;
- ioat_chan->desccount = 0;
- INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2);
- if (ioat_chan->device->version == IOAT_VER_2_0)
- writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE |
- IOAT_DMA_DCA_ANY_CPU,
- ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
- else if (ioat_chan->device->version == IOAT_VER_3_0)
- writel(IOAT_DMA_DCA_ANY_CPU,
- ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
- spin_lock_init(&ioat_chan->cleanup_lock);
- spin_lock_init(&ioat_chan->desc_lock);
- INIT_LIST_HEAD(&ioat_chan->free_desc);
- INIT_LIST_HEAD(&ioat_chan->used_desc);
- /* This should be made common somewhere in dmaengine.c */
- ioat_chan->common.device = &device->common;
- list_add_tail(&ioat_chan->common.device_node,
- &device->common.channels);
- device->idx[i] = ioat_chan;
- tasklet_init(&ioat_chan->cleanup_task,
- ioat_dma_cleanup_tasklet,
- (unsigned long) ioat_chan);
- tasklet_disable(&ioat_chan->cleanup_task);
- }
- return device->common.chancnt;
-}
-
-/**
- * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
- * descriptors to hw
- * @chan: DMA channel handle
- */
-static inline void __ioat1_dma_memcpy_issue_pending(
- struct ioat_dma_chan *ioat_chan)
-{
- ioat_chan->pending = 0;
- writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET);
-}
-
-static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-
- if (ioat_chan->pending > 0) {
- spin_lock_bh(&ioat_chan->desc_lock);
- __ioat1_dma_memcpy_issue_pending(ioat_chan);
- spin_unlock_bh(&ioat_chan->desc_lock);
- }
-}
-
-static inline void __ioat2_dma_memcpy_issue_pending(
- struct ioat_dma_chan *ioat_chan)
-{
- ioat_chan->pending = 0;
- writew(ioat_chan->dmacount,
- ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
-}
-
-static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-
- if (ioat_chan->pending > 0) {
- spin_lock_bh(&ioat_chan->desc_lock);
- __ioat2_dma_memcpy_issue_pending(ioat_chan);
- spin_unlock_bh(&ioat_chan->desc_lock);
- }
-}
-
-
-/**
- * ioat_dma_chan_reset_part2 - reinit the channel after a reset
- */
-static void ioat_dma_chan_reset_part2(struct work_struct *work)
-{
- struct ioat_dma_chan *ioat_chan =
- container_of(work, struct ioat_dma_chan, work.work);
- struct ioat_desc_sw *desc;
-
- spin_lock_bh(&ioat_chan->cleanup_lock);
- spin_lock_bh(&ioat_chan->desc_lock);
-
- ioat_chan->completion_virt->low = 0;
- ioat_chan->completion_virt->high = 0;
- ioat_chan->pending = 0;
-
- /*
- * count the descriptors waiting, and be sure to do it
- * right for both the CB1 line and the CB2 ring
- */
- ioat_chan->dmacount = 0;
- if (ioat_chan->used_desc.prev) {
- desc = to_ioat_desc(ioat_chan->used_desc.prev);
- do {
- ioat_chan->dmacount++;
- desc = to_ioat_desc(desc->node.next);
- } while (&desc->node != ioat_chan->used_desc.next);
- }
-
- /*
- * write the new starting descriptor address
- * this puts channel engine into ARMED state
- */
- desc = to_ioat_desc(ioat_chan->used_desc.prev);
- switch (ioat_chan->device->version) {
- case IOAT_VER_1_2:
- writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
- ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
- writel(((u64) desc->async_tx.phys) >> 32,
- ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
-
- writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
- + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
- break;
- case IOAT_VER_2_0:
- writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
- ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
- writel(((u64) desc->async_tx.phys) >> 32,
- ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
-
- /* tell the engine to go with what's left to be done */
- writew(ioat_chan->dmacount,
- ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
-
- break;
- }
- dev_err(&ioat_chan->device->pdev->dev,
- "chan%d reset - %d descs waiting, %d total desc\n",
- chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
-
- spin_unlock_bh(&ioat_chan->desc_lock);
- spin_unlock_bh(&ioat_chan->cleanup_lock);
-}
-
-/**
- * ioat_dma_reset_channel - restart a channel
- * @ioat_chan: IOAT DMA channel handle
- */
-static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan)
-{
- u32 chansts, chanerr;
-
- if (!ioat_chan->used_desc.prev)
- return;
-
- chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
- chansts = (ioat_chan->completion_virt->low
- & IOAT_CHANSTS_DMA_TRANSFER_STATUS);
- if (chanerr) {
- dev_err(&ioat_chan->device->pdev->dev,
- "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
- chan_num(ioat_chan), chansts, chanerr);
- writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
- }
-
- /*
- * whack it upside the head with a reset
- * and wait for things to settle out.
- * force the pending count to a really big negative
- * to make sure no one forces an issue_pending
- * while we're waiting.
- */
-
- spin_lock_bh(&ioat_chan->desc_lock);
- ioat_chan->pending = INT_MIN;
- writeb(IOAT_CHANCMD_RESET,
- ioat_chan->reg_base
- + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- /* schedule the 2nd half instead of sleeping a long time */
- schedule_delayed_work(&ioat_chan->work, RESET_DELAY);
-}
-
-/**
- * ioat_dma_chan_watchdog - watch for stuck channels
- */
-static void ioat_dma_chan_watchdog(struct work_struct *work)
-{
- struct ioatdma_device *device =
- container_of(work, struct ioatdma_device, work.work);
- struct ioat_dma_chan *ioat_chan;
- int i;
-
- union {
- u64 full;
- struct {
- u32 low;
- u32 high;
- };
- } completion_hw;
- unsigned long compl_desc_addr_hw;
-
- for (i = 0; i < device->common.chancnt; i++) {
- ioat_chan = ioat_lookup_chan_by_index(device, i);
-
- if (ioat_chan->device->version == IOAT_VER_1_2
- /* have we started processing anything yet */
- && ioat_chan->last_completion
- /* have we completed any since last watchdog cycle? */
- && (ioat_chan->last_completion ==
- ioat_chan->watchdog_completion)
- /* has TCP stuck on one cookie since last watchdog? */
- && (ioat_chan->watchdog_tcp_cookie ==
- ioat_chan->watchdog_last_tcp_cookie)
- && (ioat_chan->watchdog_tcp_cookie !=
- ioat_chan->completed_cookie)
- /* is there something in the chain to be processed? */
- /* CB1 chain always has at least the last one processed */
- && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next)
- && ioat_chan->pending == 0) {
-
- /*
- * check CHANSTS register for completed
- * descriptor address.
- * if it is different than completion writeback,
- * it is not zero
- * and it has changed since the last watchdog
- * we can assume that channel
- * is still working correctly
- * and the problem is in completion writeback.
- * update completion writeback
- * with actual CHANSTS value
- * else
- * try resetting the channel
- */
-
- completion_hw.low = readl(ioat_chan->reg_base +
- IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version));
- completion_hw.high = readl(ioat_chan->reg_base +
- IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version));
-#if (BITS_PER_LONG == 64)
- compl_desc_addr_hw =
- completion_hw.full
- & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
-#else
- compl_desc_addr_hw =
- completion_hw.low & IOAT_LOW_COMPLETION_MASK;
-#endif
-
- if ((compl_desc_addr_hw != 0)
- && (compl_desc_addr_hw != ioat_chan->watchdog_completion)
- && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) {
- ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
- ioat_chan->completion_virt->low = completion_hw.low;
- ioat_chan->completion_virt->high = completion_hw.high;
- } else {
- ioat_dma_reset_channel(ioat_chan);
- ioat_chan->watchdog_completion = 0;
- ioat_chan->last_compl_desc_addr_hw = 0;
- }
-
- /*
- * for version 2.0 if there are descriptors yet to be processed
- * and the last completed hasn't changed since the last watchdog
- * if they haven't hit the pending level
- * issue the pending to push them through
- * else
- * try resetting the channel
- */
- } else if (ioat_chan->device->version == IOAT_VER_2_0
- && ioat_chan->used_desc.prev
- && ioat_chan->last_completion
- && ioat_chan->last_completion == ioat_chan->watchdog_completion) {
-
- if (ioat_chan->pending < ioat_pending_level)
- ioat2_dma_memcpy_issue_pending(&ioat_chan->common);
- else {
- ioat_dma_reset_channel(ioat_chan);
- ioat_chan->watchdog_completion = 0;
- }
- } else {
- ioat_chan->last_compl_desc_addr_hw = 0;
- ioat_chan->watchdog_completion
- = ioat_chan->last_completion;
- }
-
- ioat_chan->watchdog_last_tcp_cookie =
- ioat_chan->watchdog_tcp_cookie;
- }
-
- schedule_delayed_work(&device->work, WATCHDOG_DELAY);
-}
-
-static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
- struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
- struct ioat_desc_sw *prev, *new;
- struct ioat_dma_descriptor *hw;
- dma_cookie_t cookie;
- LIST_HEAD(new_chain);
- u32 copy;
- size_t len;
- dma_addr_t src, dst;
- unsigned long orig_flags;
- unsigned int desc_count = 0;
-
- /* src and dest and len are stored in the initial descriptor */
- len = first->len;
- src = first->src;
- dst = first->dst;
- orig_flags = first->async_tx.flags;
- new = first;
-
- spin_lock_bh(&ioat_chan->desc_lock);
- prev = to_ioat_desc(ioat_chan->used_desc.prev);
- prefetch(prev->hw);
- do {
- copy = min_t(size_t, len, ioat_chan->xfercap);
-
- async_tx_ack(&new->async_tx);
-
- hw = new->hw;
- hw->size = copy;
- hw->ctl = 0;
- hw->src_addr = src;
- hw->dst_addr = dst;
- hw->next = 0;
-
- /* chain together the physical address list for the HW */
- wmb();
- prev->hw->next = (u64) new->async_tx.phys;
-
- len -= copy;
- dst += copy;
- src += copy;
-
- list_add_tail(&new->node, &new_chain);
- desc_count++;
- prev = new;
- } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));
-
- if (!new) {
- dev_err(&ioat_chan->device->pdev->dev,
- "tx submit failed\n");
- spin_unlock_bh(&ioat_chan->desc_lock);
- return -ENOMEM;
- }
-
- hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
- if (first->async_tx.callback) {
- hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
- if (first != new) {
- /* move callback into to last desc */
- new->async_tx.callback = first->async_tx.callback;
- new->async_tx.callback_param
- = first->async_tx.callback_param;
- first->async_tx.callback = NULL;
- first->async_tx.callback_param = NULL;
- }
- }
-
- new->tx_cnt = desc_count;
- new->async_tx.flags = orig_flags; /* client is in control of this ack */
-
- /* store the original values for use in later cleanup */
- if (new != first) {
- new->src = first->src;
- new->dst = first->dst;
- new->len = first->len;
- }
-
- /* cookie incr and addition to used_list must be atomic */
- cookie = ioat_chan->common.cookie;
- cookie++;
- if (cookie < 0)
- cookie = 1;
- ioat_chan->common.cookie = new->async_tx.cookie = cookie;
-
- /* write address into NextDescriptor field of last desc in chain */
- to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
- first->async_tx.phys;
- list_splice_tail(&new_chain, &ioat_chan->used_desc);
-
- ioat_chan->dmacount += desc_count;
- ioat_chan->pending += desc_count;
- if (ioat_chan->pending >= ioat_pending_level)
- __ioat1_dma_memcpy_issue_pending(ioat_chan);
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- return cookie;
-}
-
-static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
- struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
- struct ioat_desc_sw *new;
- struct ioat_dma_descriptor *hw;
- dma_cookie_t cookie;
- u32 copy;
- size_t len;
- dma_addr_t src, dst;
- unsigned long orig_flags;
- unsigned int desc_count = 0;
-
- /* src and dest and len are stored in the initial descriptor */
- len = first->len;
- src = first->src;
- dst = first->dst;
- orig_flags = first->async_tx.flags;
- new = first;
-
- /*
- * ioat_chan->desc_lock is still in force in version 2 path
- * it gets unlocked at end of this function
- */
- do {
- copy = min_t(size_t, len, ioat_chan->xfercap);
-
- async_tx_ack(&new->async_tx);
-
- hw = new->hw;
- hw->size = copy;
- hw->ctl = 0;
- hw->src_addr = src;
- hw->dst_addr = dst;
-
- len -= copy;
- dst += copy;
- src += copy;
- desc_count++;
- } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
-
- if (!new) {
- dev_err(&ioat_chan->device->pdev->dev,
- "tx submit failed\n");
- spin_unlock_bh(&ioat_chan->desc_lock);
- return -ENOMEM;
- }
-
- hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
- if (first->async_tx.callback) {
- hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
- if (first != new) {
- /* move callback into to last desc */
- new->async_tx.callback = first->async_tx.callback;
- new->async_tx.callback_param
- = first->async_tx.callback_param;
- first->async_tx.callback = NULL;
- first->async_tx.callback_param = NULL;
- }
- }
-
- new->tx_cnt = desc_count;
- new->async_tx.flags = orig_flags; /* client is in control of this ack */
-
- /* store the original values for use in later cleanup */
- if (new != first) {
- new->src = first->src;
- new->dst = first->dst;
- new->len = first->len;
- }
-
- /* cookie incr and addition to used_list must be atomic */
- cookie = ioat_chan->common.cookie;
- cookie++;
- if (cookie < 0)
- cookie = 1;
- ioat_chan->common.cookie = new->async_tx.cookie = cookie;
-
- ioat_chan->dmacount += desc_count;
- ioat_chan->pending += desc_count;
- if (ioat_chan->pending >= ioat_pending_level)
- __ioat2_dma_memcpy_issue_pending(ioat_chan);
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- return cookie;
-}
-
-/**
- * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
- * @ioat_chan: the channel supplying the memory pool for the descriptors
- * @flags: allocation flags
- */
-static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
- struct ioat_dma_chan *ioat_chan,
- gfp_t flags)
-{
- struct ioat_dma_descriptor *desc;
- struct ioat_desc_sw *desc_sw;
- struct ioatdma_device *ioatdma_device;
- dma_addr_t phys;
-
- ioatdma_device = to_ioatdma_device(ioat_chan->common.device);
- desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
- if (unlikely(!desc))
- return NULL;
-
- desc_sw = kzalloc(sizeof(*desc_sw), flags);
- if (unlikely(!desc_sw)) {
- pci_pool_free(ioatdma_device->dma_pool, desc, phys);
- return NULL;
- }
-
- memset(desc, 0, sizeof(*desc));
- dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
- switch (ioat_chan->device->version) {
- case IOAT_VER_1_2:
- desc_sw->async_tx.tx_submit = ioat1_tx_submit;
- break;
- case IOAT_VER_2_0:
- case IOAT_VER_3_0:
- desc_sw->async_tx.tx_submit = ioat2_tx_submit;
- break;
- }
-
- desc_sw->hw = desc;
- desc_sw->async_tx.phys = phys;
-
- return desc_sw;
-}
-
-static int ioat_initial_desc_count = 256;
-module_param(ioat_initial_desc_count, int, 0644);
-MODULE_PARM_DESC(ioat_initial_desc_count,
- "initial descriptors per channel (default: 256)");
-
-/**
- * ioat2_dma_massage_chan_desc - link the descriptors into a circle
- * @ioat_chan: the channel to be massaged
- */
-static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
-{
- struct ioat_desc_sw *desc, *_desc;
-
- /* setup used_desc */
- ioat_chan->used_desc.next = ioat_chan->free_desc.next;
- ioat_chan->used_desc.prev = NULL;
-
- /* pull free_desc out of the circle so that every node is a hw
- * descriptor, but leave it pointing to the list
- */
- ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next;
- ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev;
-
- /* circle link the hw descriptors */
- desc = to_ioat_desc(ioat_chan->free_desc.next);
- desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
- list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) {
- desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
- }
-}
-
-/**
- * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
- * @chan: the channel to be filled out
- */
-static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
- struct ioat_desc_sw *desc;
- u16 chanctrl;
- u32 chanerr;
- int i;
- LIST_HEAD(tmp_list);
-
- /* have we already been set up? */
- if (!list_empty(&ioat_chan->free_desc))
- return ioat_chan->desccount;
-
- /* Setup register to interrupt and write completion status on error */
- chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
- IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
- IOAT_CHANCTRL_ERR_COMPLETION_EN;
- writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
-
- chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
- if (chanerr) {
- dev_err(&ioat_chan->device->pdev->dev,
- "CHANERR = %x, clearing\n", chanerr);
- writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
- }
-
- /* Allocate descriptors */
- for (i = 0; i < ioat_initial_desc_count; i++) {
- desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
- if (!desc) {
- dev_err(&ioat_chan->device->pdev->dev,
- "Only %d initial descriptors\n", i);
- break;
- }
- list_add_tail(&desc->node, &tmp_list);
- }
- spin_lock_bh(&ioat_chan->desc_lock);
- ioat_chan->desccount = i;
- list_splice(&tmp_list, &ioat_chan->free_desc);
- if (ioat_chan->device->version != IOAT_VER_1_2)
- ioat2_dma_massage_chan_desc(ioat_chan);
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- /* allocate a completion writeback area */
- /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
- ioat_chan->completion_virt =
- pci_pool_alloc(ioat_chan->device->completion_pool,
- GFP_KERNEL,
- &ioat_chan->completion_addr);
- memset(ioat_chan->completion_virt, 0,
- sizeof(*ioat_chan->completion_virt));
- writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
- ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
- writel(((u64) ioat_chan->completion_addr) >> 32,
- ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
-
- tasklet_enable(&ioat_chan->cleanup_task);
- ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */
- return ioat_chan->desccount;
-}
-
-/**
- * ioat_dma_free_chan_resources - release all the descriptors
- * @chan: the channel to be cleaned
- */
-static void ioat_dma_free_chan_resources(struct dma_chan *chan)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
- struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device);
- struct ioat_desc_sw *desc, *_desc;
- int in_use_descs = 0;
-
- /* Before freeing channel resources first check
- * if they have been previously allocated for this channel.
- */
- if (ioat_chan->desccount == 0)
- return;
-
- tasklet_disable(&ioat_chan->cleanup_task);
- ioat_dma_memcpy_cleanup(ioat_chan);
-
- /* Delay 100ms after reset to allow internal DMA logic to quiesce
- * before removing DMA descriptor resources.
- */
- writeb(IOAT_CHANCMD_RESET,
- ioat_chan->reg_base
- + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
- mdelay(100);
-
- spin_lock_bh(&ioat_chan->desc_lock);
- switch (ioat_chan->device->version) {
- case IOAT_VER_1_2:
- list_for_each_entry_safe(desc, _desc,
- &ioat_chan->used_desc, node) {
- in_use_descs++;
- list_del(&desc->node);
- pci_pool_free(ioatdma_device->dma_pool, desc->hw,
- desc->async_tx.phys);
- kfree(desc);
- }
- list_for_each_entry_safe(desc, _desc,
- &ioat_chan->free_desc, node) {
- list_del(&desc->node);
- pci_pool_free(ioatdma_device->dma_pool, desc->hw,
- desc->async_tx.phys);
- kfree(desc);
- }
- break;
- case IOAT_VER_2_0:
- case IOAT_VER_3_0:
- list_for_each_entry_safe(desc, _desc,
- ioat_chan->free_desc.next, node) {
- list_del(&desc->node);
- pci_pool_free(ioatdma_device->dma_pool, desc->hw,
- desc->async_tx.phys);
- kfree(desc);
- }
- desc = to_ioat_desc(ioat_chan->free_desc.next);
- pci_pool_free(ioatdma_device->dma_pool, desc->hw,
- desc->async_tx.phys);
- kfree(desc);
- INIT_LIST_HEAD(&ioat_chan->free_desc);
- INIT_LIST_HEAD(&ioat_chan->used_desc);
- break;
- }
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- pci_pool_free(ioatdma_device->completion_pool,
- ioat_chan->completion_virt,
- ioat_chan->completion_addr);
-
- /* one is ok since we left it on there on purpose */
- if (in_use_descs > 1)
- dev_err(&ioat_chan->device->pdev->dev,
- "Freeing %d in use descriptors!\n",
- in_use_descs - 1);
-
- ioat_chan->last_completion = ioat_chan->completion_addr = 0;
- ioat_chan->pending = 0;
- ioat_chan->dmacount = 0;
- ioat_chan->desccount = 0;
- ioat_chan->watchdog_completion = 0;
- ioat_chan->last_compl_desc_addr_hw = 0;
- ioat_chan->watchdog_tcp_cookie =
- ioat_chan->watchdog_last_tcp_cookie = 0;
-}
-
-/**
- * ioat_dma_get_next_descriptor - return the next available descriptor
- * @ioat_chan: IOAT DMA channel handle
- *
- * Gets the next descriptor from the chain, and must be called with the
- * channel's desc_lock held. Allocates more descriptors if the channel
- * has run out.
- */
-static struct ioat_desc_sw *
-ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
-{
- struct ioat_desc_sw *new;
-
- if (!list_empty(&ioat_chan->free_desc)) {
- new = to_ioat_desc(ioat_chan->free_desc.next);
- list_del(&new->node);
- } else {
- /* try to get another desc */
- new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
- if (!new) {
- dev_err(&ioat_chan->device->pdev->dev,
- "alloc failed\n");
- return NULL;
- }
- }
-
- prefetch(new->hw);
- return new;
-}
-
-static struct ioat_desc_sw *
-ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
-{
- struct ioat_desc_sw *new;
-
- /*
- * used.prev points to where to start processing
- * used.next points to next free descriptor
- * if used.prev == NULL, there are none waiting to be processed
- * if used.next == used.prev.prev, there is only one free descriptor,
- * and we need to use it to as a noop descriptor before
- * linking in a new set of descriptors, since the device
- * has probably already read the pointer to it
- */
- if (ioat_chan->used_desc.prev &&
- ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) {
-
- struct ioat_desc_sw *desc;
- struct ioat_desc_sw *noop_desc;
- int i;
-
- /* set up the noop descriptor */
- noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
- /* set size to non-zero value (channel returns error when size is 0) */
- noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
- noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
- noop_desc->hw->src_addr = 0;
- noop_desc->hw->dst_addr = 0;
-
- ioat_chan->used_desc.next = ioat_chan->used_desc.next->next;
- ioat_chan->pending++;
- ioat_chan->dmacount++;
-
- /* try to get a few more descriptors */
- for (i = 16; i; i--) {
- desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
- if (!desc) {
- dev_err(&ioat_chan->device->pdev->dev,
- "alloc failed\n");
- break;
- }
- list_add_tail(&desc->node, ioat_chan->used_desc.next);
-
- desc->hw->next
- = to_ioat_desc(desc->node.next)->async_tx.phys;
- to_ioat_desc(desc->node.prev)->hw->next
- = desc->async_tx.phys;
- ioat_chan->desccount++;
- }
-
- ioat_chan->used_desc.next = noop_desc->node.next;
- }
- new = to_ioat_desc(ioat_chan->used_desc.next);
- prefetch(new);
- ioat_chan->used_desc.next = new->node.next;
-
- if (ioat_chan->used_desc.prev == NULL)
- ioat_chan->used_desc.prev = &new->node;
-
- prefetch(new->hw);
- return new;
-}
-
-static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
- struct ioat_dma_chan *ioat_chan)
-{
- if (!ioat_chan)
- return NULL;
-
- switch (ioat_chan->device->version) {
- case IOAT_VER_1_2:
- return ioat1_dma_get_next_descriptor(ioat_chan);
- case IOAT_VER_2_0:
- case IOAT_VER_3_0:
- return ioat2_dma_get_next_descriptor(ioat_chan);
- }
- return NULL;
-}
-
-static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
- struct dma_chan *chan,
- dma_addr_t dma_dest,
- dma_addr_t dma_src,
- size_t len,
- unsigned long flags)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
- struct ioat_desc_sw *new;
-
- spin_lock_bh(&ioat_chan->desc_lock);
- new = ioat_dma_get_next_descriptor(ioat_chan);
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- if (new) {
- new->len = len;
- new->dst = dma_dest;
- new->src = dma_src;
- new->async_tx.flags = flags;
- return &new->async_tx;
- } else {
- dev_err(&ioat_chan->device->pdev->dev,
- "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
- chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
- return NULL;
- }
-}
-
-static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
- struct dma_chan *chan,
- dma_addr_t dma_dest,
- dma_addr_t dma_src,
- size_t len,
- unsigned long flags)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
- struct ioat_desc_sw *new;
-
- spin_lock_bh(&ioat_chan->desc_lock);
- new = ioat2_dma_get_next_descriptor(ioat_chan);
-
- /*
- * leave ioat_chan->desc_lock set in ioat 2 path
- * it will get unlocked at end of tx_submit
- */
-
- if (new) {
- new->len = len;
- new->dst = dma_dest;
- new->src = dma_src;
- new->async_tx.flags = flags;
- return &new->async_tx;
- } else {
- spin_unlock_bh(&ioat_chan->desc_lock);
- dev_err(&ioat_chan->device->pdev->dev,
- "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
- chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
- return NULL;
- }
-}
-
-static void ioat_dma_cleanup_tasklet(unsigned long data)
-{
- struct ioat_dma_chan *chan = (void *)data;
- ioat_dma_memcpy_cleanup(chan);
- writew(IOAT_CHANCTRL_INT_DISABLE,
- chan->reg_base + IOAT_CHANCTRL_OFFSET);
-}
-
-static void
-ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc)
-{
- if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
- pci_unmap_single(ioat_chan->device->pdev,
- pci_unmap_addr(desc, dst),
- pci_unmap_len(desc, len),
- PCI_DMA_FROMDEVICE);
- else
- pci_unmap_page(ioat_chan->device->pdev,
- pci_unmap_addr(desc, dst),
- pci_unmap_len(desc, len),
- PCI_DMA_FROMDEVICE);
- }
-
- if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
- pci_unmap_single(ioat_chan->device->pdev,
- pci_unmap_addr(desc, src),
- pci_unmap_len(desc, len),
- PCI_DMA_TODEVICE);
- else
- pci_unmap_page(ioat_chan->device->pdev,
- pci_unmap_addr(desc, src),
- pci_unmap_len(desc, len),
- PCI_DMA_TODEVICE);
- }
-}
-
-/**
- * ioat_dma_memcpy_cleanup - cleanup up finished descriptors
- * @chan: ioat channel to be cleaned up
- */
-static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
-{
- unsigned long phys_complete;
- struct ioat_desc_sw *desc, *_desc;
- dma_cookie_t cookie = 0;
- unsigned long desc_phys;
- struct ioat_desc_sw *latest_desc;
-
- prefetch(ioat_chan->completion_virt);
-
- if (!spin_trylock_bh(&ioat_chan->cleanup_lock))
- return;
-
- /* The completion writeback can happen at any time,
- so reads by the driver need to be atomic operations
- The descriptor physical addresses are limited to 32-bits
- when the CPU can only do a 32-bit mov */
-
-#if (BITS_PER_LONG == 64)
- phys_complete =
- ioat_chan->completion_virt->full
- & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
-#else
- phys_complete =
- ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
-#endif
-
- if ((ioat_chan->completion_virt->full
- & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
- IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
- dev_err(&ioat_chan->device->pdev->dev,
- "Channel halted, chanerr = %x\n",
- readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET));
-
- /* TODO do something to salvage the situation */
- }
-
- if (phys_complete == ioat_chan->last_completion) {
- spin_unlock_bh(&ioat_chan->cleanup_lock);
- /*
- * perhaps we're stuck so hard that the watchdog can't go off?
- * try to catch it after 2 seconds
- */
- if (ioat_chan->device->version != IOAT_VER_3_0) {
- if (time_after(jiffies,
- ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
- ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
- ioat_chan->last_completion_time = jiffies;
- }
- }
- return;
- }
- ioat_chan->last_completion_time = jiffies;
-
- cookie = 0;
- if (!spin_trylock_bh(&ioat_chan->desc_lock)) {
- spin_unlock_bh(&ioat_chan->cleanup_lock);
- return;
- }
-
- switch (ioat_chan->device->version) {
- case IOAT_VER_1_2:
- list_for_each_entry_safe(desc, _desc,
- &ioat_chan->used_desc, node) {
-
- /*
- * Incoming DMA requests may use multiple descriptors,
- * due to exceeding xfercap, perhaps. If so, only the
- * last one will have a cookie, and require unmapping.
- */
- if (desc->async_tx.cookie) {
- cookie = desc->async_tx.cookie;
- ioat_dma_unmap(ioat_chan, desc);
- if (desc->async_tx.callback) {
- desc->async_tx.callback(desc->async_tx.callback_param);
- desc->async_tx.callback = NULL;
- }
- }
-
- if (desc->async_tx.phys != phys_complete) {
- /*
- * a completed entry, but not the last, so clean
- * up if the client is done with the descriptor
- */
- if (async_tx_test_ack(&desc->async_tx)) {
- list_move_tail(&desc->node,
- &ioat_chan->free_desc);
- } else
- desc->async_tx.cookie = 0;
- } else {
- /*
- * last used desc. Do not remove, so we can
- * append from it, but don't look at it next
- * time, either
- */
- desc->async_tx.cookie = 0;
-
- /* TODO check status bits? */
- break;
- }
- }
- break;
- case IOAT_VER_2_0:
- case IOAT_VER_3_0:
- /* has some other thread has already cleaned up? */
- if (ioat_chan->used_desc.prev == NULL)
- break;
-
- /* work backwards to find latest finished desc */
- desc = to_ioat_desc(ioat_chan->used_desc.next);
- latest_desc = NULL;
- do {
- desc = to_ioat_desc(desc->node.prev);
- desc_phys = (unsigned long)desc->async_tx.phys
- & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
- if (desc_phys == phys_complete) {
- latest_desc = desc;
- break;
- }
- } while (&desc->node != ioat_chan->used_desc.prev);
-
- if (latest_desc != NULL) {
-
- /* work forwards to clear finished descriptors */
- for (desc = to_ioat_desc(ioat_chan->used_desc.prev);
- &desc->node != latest_desc->node.next &&
- &desc->node != ioat_chan->used_desc.next;
- desc = to_ioat_desc(desc->node.next)) {
- if (desc->async_tx.cookie) {
- cookie = desc->async_tx.cookie;
- desc->async_tx.cookie = 0;
- ioat_dma_unmap(ioat_chan, desc);
- if (desc->async_tx.callback) {
- desc->async_tx.callback(desc->async_tx.callback_param);
- desc->async_tx.callback = NULL;
- }
- }
- }
-
- /* move used.prev up beyond those that are finished */
- if (&desc->node == ioat_chan->used_desc.next)
- ioat_chan->used_desc.prev = NULL;
- else
- ioat_chan->used_desc.prev = &desc->node;
- }
- break;
- }
-
- spin_unlock_bh(&ioat_chan->desc_lock);
-
- ioat_chan->last_completion = phys_complete;
- if (cookie != 0)
- ioat_chan->completed_cookie = cookie;
-
- spin_unlock_bh(&ioat_chan->cleanup_lock);
-}
-
-/**
- * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
- * @chan: IOAT DMA channel handle
- * @cookie: DMA transaction identifier
- * @done: if not %NULL, updated with last completed transaction
- * @used: if not %NULL, updated with last used transaction
- */
-static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
- dma_cookie_t cookie,
- dma_cookie_t *done,
- dma_cookie_t *used)
-{
- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
- dma_cookie_t last_used;
- dma_cookie_t last_complete;
- enum dma_status ret;
-
- last_used = chan->cookie;
- last_complete = ioat_chan->completed_cookie;
- ioat_chan->watchdog_tcp_cookie = cookie;
-
- if (done)
- *done = last_complete;
- if (used)
- *used = last_used;
-
- ret = dma_async_is_complete(cookie, last_complete, last_used);
- if (ret == DMA_SUCCESS)
- return ret;
-
- ioat_dma_memcpy_cleanup(ioat_chan);
-
- last_used = chan->cookie;
- last_complete = ioat_chan->completed_cookie;
-
- if (done)
- *done = last_complete;
- if (used)
- *used = last_used;
-
- return dma_async_is_complete(cookie, last_complete, last_used);
-}
-
-static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
-{
- struct ioat_desc_sw *desc;
-
- spin_lock_bh(&ioat_chan->desc_lock);
-
- desc = ioat_dma_get_next_descriptor(ioat_chan);
-
- if (!desc) {
- dev_err(&ioat_chan->device->pdev->dev,
- "Unable to start null desc - get next desc failed\n");
- spin_unlock_bh(&ioat_chan->desc_lock);
- return;
- }
-
- desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
- | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
- | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
- /* set size to non-zero value (channel returns error when size is 0) */
- desc->hw->size = NULL_DESC_BUFFER_SIZE;
- desc->hw->src_addr = 0;
- desc->hw->dst_addr = 0;
- async_tx_ack(&desc->async_tx);
- switch (ioat_chan->device->version) {
- case IOAT_VER_1_2:
- desc->hw->next = 0;
- list_add_tail(&desc->node, &ioat_chan->used_desc);
-
- writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
- ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
- writel(((u64) desc->async_tx.phys) >> 32,
- ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
-
- writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
- + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
- break;
- case IOAT_VER_2_0:
- case IOAT_VER_3_0:
- writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
- ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
- writel(((u64) desc->async_tx.phys) >> 32,
- ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
-
- ioat_chan->dmacount++;
- __ioat2_dma_memcpy_issue_pending(ioat_chan);
- break;
- }
- spin_unlock_bh(&ioat_chan->desc_lock);
-}
-
-/*
- * Perform a IOAT transaction to verify the HW works.
- */
-#define IOAT_TEST_SIZE 2000
-
-static void ioat_dma_test_callback(void *dma_async_param)
-{
- struct completion *cmp = dma_async_param;
-
- complete(cmp);
-}
-
-/**
- * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
- * @device: device to be tested
- */
-static int ioat_dma_self_test(struct ioatdma_device *device)
-{
- int i;
- u8 *src;
- u8 *dest;
- struct dma_chan *dma_chan;
- struct dma_async_tx_descriptor *tx;
- dma_addr_t dma_dest, dma_src;
- dma_cookie_t cookie;
- int err = 0;
- struct completion cmp;
- unsigned long tmo;
- unsigned long flags;
-
- src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
- if (!src)
- return -ENOMEM;
- dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
- if (!dest) {
- kfree(src);
- return -ENOMEM;
- }
-
- /* Fill in src buffer */
- for (i = 0; i < IOAT_TEST_SIZE; i++)
- src[i] = (u8)i;
-
- /* Start copy, using first DMA channel */
- dma_chan = container_of(device->common.channels.next,
- struct dma_chan,
- device_node);
- if (device->common.device_alloc_chan_resources(dma_chan) < 1) {
- dev_err(&device->pdev->dev,
- "selftest cannot allocate chan resource\n");
- err = -ENODEV;
- goto out;
- }
-
- dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
- DMA_TO_DEVICE);
- dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
- DMA_FROM_DEVICE);
- flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE;
- tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
- IOAT_TEST_SIZE, flags);
- if (!tx) {
- dev_err(&device->pdev->dev,
- "Self-test prep failed, disabling\n");
- err = -ENODEV;
- goto free_resources;
- }
-
- async_tx_ack(tx);
- init_completion(&cmp);
- tx->callback = ioat_dma_test_callback;
- tx->callback_param = &cmp;
- cookie = tx->tx_submit(tx);
- if (cookie < 0) {
- dev_err(&device->pdev->dev,
- "Self-test setup failed, disabling\n");
- err = -ENODEV;
- goto free_resources;
- }
- device->common.device_issue_pending(dma_chan);
-
- tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
-
- if (tmo == 0 ||
- device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL)
- != DMA_SUCCESS) {
- dev_err(&device->pdev->dev,
- "Self-test copy timed out, disabling\n");
- err = -ENODEV;
- goto free_resources;
- }
- if (memcmp(src, dest, IOAT_TEST_SIZE)) {
- dev_err(&device->pdev->dev,
- "Self-test copy failed compare, disabling\n");
- err = -ENODEV;
- goto free_resources;
- }
-
-free_resources:
- device->common.device_free_chan_resources(dma_chan);
-out:
- kfree(src);
- kfree(dest);
- return err;
-}
-
-static char ioat_interrupt_style[32] = "msix";
-module_param_string(ioat_interrupt_style, ioat_interrupt_style,
- sizeof(ioat_interrupt_style), 0644);
-MODULE_PARM_DESC(ioat_interrupt_style,
- "set ioat interrupt style: msix (default), "
- "msix-single-vector, msi, intx)");
-
-/**
- * ioat_dma_setup_interrupts - setup interrupt handler
- * @device: ioat device
- */
-static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
-{
- struct ioat_dma_chan *ioat_chan;
- int err, i, j, msixcnt;
- u8 intrctrl = 0;
-
- if (!strcmp(ioat_interrupt_style, "msix"))
- goto msix;
- if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
- goto msix_single_vector;
- if (!strcmp(ioat_interrupt_style, "msi"))
- goto msi;
- if (!strcmp(ioat_interrupt_style, "intx"))
- goto intx;
- dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n",
- ioat_interrupt_style);
- goto err_no_irq;
-
-msix:
- /* The number of MSI-X vectors should equal the number of channels */
- msixcnt = device->common.chancnt;
- for (i = 0; i < msixcnt; i++)
- device->msix_entries[i].entry = i;
-
- err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt);
- if (err < 0)
- goto msi;
- if (err > 0)
- goto msix_single_vector;
-
- for (i = 0; i < msixcnt; i++) {
- ioat_chan = ioat_lookup_chan_by_index(device, i);
- err = request_irq(device->msix_entries[i].vector,
- ioat_dma_do_interrupt_msix,
- 0, "ioat-msix", ioat_chan);
- if (err) {
- for (j = 0; j < i; j++) {
- ioat_chan =
- ioat_lookup_chan_by_index(device, j);
- free_irq(device->msix_entries[j].vector,
- ioat_chan);
- }
- goto msix_single_vector;
- }
- }
- intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
- device->irq_mode = msix_multi_vector;
- goto done;
-
-msix_single_vector:
- device->msix_entries[0].entry = 0;
- err = pci_enable_msix(device->pdev, device->msix_entries, 1);
- if (err)
- goto msi;
-
- err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt,
- 0, "ioat-msix", device);
- if (err) {
- pci_disable_msix(device->pdev);
- goto msi;
- }
- device->irq_mode = msix_single_vector;
- goto done;
-
-msi:
- err = pci_enable_msi(device->pdev);
- if (err)
- goto intx;
-
- err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
- 0, "ioat-msi", device);
- if (err) {
- pci_disable_msi(device->pdev);
- goto intx;
- }
- /*
- * CB 1.2 devices need a bit set in configuration space to enable MSI
- */
- if (device->version == IOAT_VER_1_2) {
- u32 dmactrl;
- pci_read_config_dword(device->pdev,
- IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
- dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
- pci_write_config_dword(device->pdev,
- IOAT_PCI_DMACTRL_OFFSET, dmactrl);
- }
- device->irq_mode = msi;
- goto done;
-
-intx:
- err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
- IRQF_SHARED, "ioat-intx", device);
- if (err)
- goto err_no_irq;
- device->irq_mode = intx;
-
-done:
- intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
- writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
- return 0;
-
-err_no_irq:
- /* Disable all interrupt generation */
- writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
- dev_err(&device->pdev->dev, "no usable interrupts\n");
- device->irq_mode = none;
- return -1;
-}
-
-/**
- * ioat_dma_remove_interrupts - remove whatever interrupts were set
- * @device: ioat device
- */
-static void ioat_dma_remove_interrupts(struct ioatdma_device *device)
-{
- struct ioat_dma_chan *ioat_chan;
- int i;
-
- /* Disable all interrupt generation */
- writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
-
- switch (device->irq_mode) {
- case msix_multi_vector:
- for (i = 0; i < device->common.chancnt; i++) {
- ioat_chan = ioat_lookup_chan_by_index(device, i);
- free_irq(device->msix_entries[i].vector, ioat_chan);
- }
- pci_disable_msix(device->pdev);
- break;
- case msix_single_vector:
- free_irq(device->msix_entries[0].vector, device);
- pci_disable_msix(device->pdev);
- break;
- case msi:
- free_irq(device->pdev->irq, device);
- pci_disable_msi(device->pdev);
- break;
- case intx:
- free_irq(device->pdev->irq, device);
- break;
- case none:
- dev_warn(&device->pdev->dev,
- "call to %s without interrupts setup\n", __func__);
- }
- device->irq_mode = none;
-}
-
-struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
- void __iomem *iobase)
-{
- int err;
- struct ioatdma_device *device;
-
- device = kzalloc(sizeof(*device), GFP_KERNEL);
- if (!device) {
- err = -ENOMEM;
- goto err_kzalloc;
- }
- device->pdev = pdev;
- device->reg_base = iobase;
- device->version = readb(device->reg_base + IOAT_VER_OFFSET);
-
- /* DMA coherent memory pool for DMA descriptor allocations */
- device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
- sizeof(struct ioat_dma_descriptor),
- 64, 0);
- if (!device->dma_pool) {
- err = -ENOMEM;
- goto err_dma_pool;
- }
-
- device->completion_pool = pci_pool_create("completion_pool", pdev,
- sizeof(u64), SMP_CACHE_BYTES,
- SMP_CACHE_BYTES);
- if (!device->completion_pool) {
- err = -ENOMEM;
- goto err_completion_pool;
- }
-
- INIT_LIST_HEAD(&device->common.channels);
- ioat_dma_enumerate_channels(device);
-
- device->common.device_alloc_chan_resources =
- ioat_dma_alloc_chan_resources;
- device->common.device_free_chan_resources =
- ioat_dma_free_chan_resources;
- device->common.dev = &pdev->dev;
-
- dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
- device->common.device_is_tx_complete = ioat_dma_is_complete;
- switch (device->version) {
- case IOAT_VER_1_2:
- device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
- device->common.device_issue_pending =
- ioat1_dma_memcpy_issue_pending;
- break;
- case IOAT_VER_2_0:
- case IOAT_VER_3_0:
- device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
- device->common.device_issue_pending =
- ioat2_dma_memcpy_issue_pending;
- break;
- }
-
- dev_err(&device->pdev->dev,
- "Intel(R) I/OAT DMA Engine found,"
- " %d channels, device version 0x%02x, driver version %s\n",
- device->common.chancnt, device->version, IOAT_DMA_VERSION);
-
- if (!device->common.chancnt) {
- dev_err(&device->pdev->dev,
- "Intel(R) I/OAT DMA Engine problem found: "
- "zero channels detected\n");
- goto err_setup_interrupts;
- }
-
- err = ioat_dma_setup_interrupts(device);
- if (err)
- goto err_setup_interrupts;
-
- err = ioat_dma_self_test(device);
- if (err)
- goto err_self_test;
-
- ioat_set_tcp_copy_break(device);
-
- dma_async_device_register(&device->common);
-
- if (device->version != IOAT_VER_3_0) {
- INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
- schedule_delayed_work(&device->work,
- WATCHDOG_DELAY);
- }
-
- return device;
-
-err_self_test:
- ioat_dma_remove_interrupts(device);
-err_setup_interrupts:
- pci_pool_destroy(device->completion_pool);
-err_completion_pool:
- pci_pool_destroy(device->dma_pool);
-err_dma_pool:
- kfree(device);
-err_kzalloc:
- dev_err(&pdev->dev,
- "Intel(R) I/OAT DMA Engine initialization failed\n");
- return NULL;
-}
-
-void ioat_dma_remove(struct ioatdma_device *device)
-{
- struct dma_chan *chan, *_chan;
- struct ioat_dma_chan *ioat_chan;
-
- if (device->version != IOAT_VER_3_0)
- cancel_delayed_work(&device->work);
-
- ioat_dma_remove_interrupts(device);
-
- dma_async_device_unregister(&device->common);
-
- pci_pool_destroy(device->dma_pool);
- pci_pool_destroy(device->completion_pool);
-
- iounmap(device->reg_base);
- pci_release_regions(device->pdev);
- pci_disable_device(device->pdev);
-
- list_for_each_entry_safe(chan, _chan,
- &device->common.channels, device_node) {
- ioat_chan = to_ioat_chan(chan);
- list_del(&chan->device_node);
- kfree(ioat_chan);
- }
- kfree(device);
-}
-
+++ /dev/null
-/*
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef IOATDMA_H
-#define IOATDMA_H
-
-#include <linux/dmaengine.h>
-#include "ioatdma_hw.h"
-#include <linux/init.h>
-#include <linux/dmapool.h>
-#include <linux/cache.h>
-#include <linux/pci_ids.h>
-#include <net/tcp.h>
-
-#define IOAT_DMA_VERSION "3.64"
-
-enum ioat_interrupt {
- none = 0,
- msix_multi_vector = 1,
- msix_single_vector = 2,
- msi = 3,
- intx = 4,
-};
-
-#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
-#define IOAT_DMA_DCA_ANY_CPU ~0
-#define IOAT_WATCHDOG_PERIOD (2 * HZ)
-
-
-/**
- * struct ioatdma_device - internal representation of a IOAT device
- * @pdev: PCI-Express device
- * @reg_base: MMIO register space base address
- * @dma_pool: for allocating DMA descriptors
- * @common: embedded struct dma_device
- * @version: version of ioatdma device
- * @irq_mode: which style irq to use
- * @msix_entries: irq handlers
- * @idx: per channel data
- */
-
-struct ioatdma_device {
- struct pci_dev *pdev;
- void __iomem *reg_base;
- struct pci_pool *dma_pool;
- struct pci_pool *completion_pool;
- struct dma_device common;
- u8 version;
- enum ioat_interrupt irq_mode;
- struct delayed_work work;
- struct msix_entry msix_entries[4];
- struct ioat_dma_chan *idx[4];
-};
-
-/**
- * struct ioat_dma_chan - internal representation of a DMA channel
- */
-struct ioat_dma_chan {
-
- void __iomem *reg_base;
-
- dma_cookie_t completed_cookie;
- unsigned long last_completion;
- unsigned long last_completion_time;
-
- size_t xfercap; /* XFERCAP register value expanded out */
-
- spinlock_t cleanup_lock;
- spinlock_t desc_lock;
- struct list_head free_desc;
- struct list_head used_desc;
- unsigned long watchdog_completion;
- int watchdog_tcp_cookie;
- u32 watchdog_last_tcp_cookie;
- struct delayed_work work;
-
- int pending;
- int dmacount;
- int desccount;
-
- struct ioatdma_device *device;
- struct dma_chan common;
-
- dma_addr_t completion_addr;
- union {
- u64 full; /* HW completion writeback */
- struct {
- u32 low;
- u32 high;
- };
- } *completion_virt;
- unsigned long last_compl_desc_addr_hw;
- struct tasklet_struct cleanup_task;
-};
-
-/* wrapper around hardware descriptor format + additional software fields */
-
-/**
- * struct ioat_desc_sw - wrapper around hardware descriptor
- * @hw: hardware DMA descriptor
- * @node: this descriptor will either be on the free list,
- * or attached to a transaction list (async_tx.tx_list)
- * @tx_cnt: number of descriptors required to complete the transaction
- * @async_tx: the generic software descriptor for all engines
- */
-struct ioat_desc_sw {
- struct ioat_dma_descriptor *hw;
- struct list_head node;
- int tx_cnt;
- size_t len;
- dma_addr_t src;
- dma_addr_t dst;
- struct dma_async_tx_descriptor async_tx;
-};
-
-static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
-{
- #ifdef CONFIG_NET_DMA
- switch (dev->version) {
- case IOAT_VER_1_2:
- sysctl_tcp_dma_copybreak = 4096;
- break;
- case IOAT_VER_2_0:
- sysctl_tcp_dma_copybreak = 2048;
- break;
- case IOAT_VER_3_0:
- sysctl_tcp_dma_copybreak = 262144;
- break;
- }
- #endif
-}
-
-#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
-struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
- void __iomem *iobase);
-void ioat_dma_remove(struct ioatdma_device *device);
-struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-#else
-#define ioat_dma_probe(pdev, iobase) NULL
-#define ioat_dma_remove(device) do { } while (0)
-#define ioat_dca_init(pdev, iobase) NULL
-#define ioat2_dca_init(pdev, iobase) NULL
-#define ioat3_dca_init(pdev, iobase) NULL
-#endif
-
-#endif /* IOATDMA_H */
+++ /dev/null
-/*
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef _IOAT_HW_H_
-#define _IOAT_HW_H_
-
-/* PCI Configuration Space Values */
-#define IOAT_PCI_VID 0x8086
-
-/* CB device ID's */
-#define IOAT_PCI_DID_5000 0x1A38
-#define IOAT_PCI_DID_CNB 0x360B
-#define IOAT_PCI_DID_SCNB 0x65FF
-#define IOAT_PCI_DID_SNB 0x402F
-
-#define IOAT_PCI_RID 0x00
-#define IOAT_PCI_SVID 0x8086
-#define IOAT_PCI_SID 0x8086
-#define IOAT_VER_1_2 0x12 /* Version 1.2 */
-#define IOAT_VER_2_0 0x20 /* Version 2.0 */
-#define IOAT_VER_3_0 0x30 /* Version 3.0 */
-
-struct ioat_dma_descriptor {
- uint32_t size;
- uint32_t ctl;
- uint64_t src_addr;
- uint64_t dst_addr;
- uint64_t next;
- uint64_t rsv1;
- uint64_t rsv2;
- uint64_t user1;
- uint64_t user2;
-};
-
-#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001
-#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002
-#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004
-#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008
-#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010
-#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020
-#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040
-#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080
-#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100
-#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200
-#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400
-
-#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000
-#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000
-
-#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001
-#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000
-
-#endif
+++ /dev/null
-/*
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef _IOAT_REGISTERS_H_
-#define _IOAT_REGISTERS_H_
-
-#define IOAT_PCI_DMACTRL_OFFSET 0x48
-#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001
-#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002
-
-#define IOAT_PCI_DEVICE_ID_OFFSET 0x02
-#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148
-#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184
-
-/* MMIO Device Registers */
-#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */
-
-#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */
-#define IOAT_XFERCAP_4KB 12
-#define IOAT_XFERCAP_8KB 13
-#define IOAT_XFERCAP_16KB 14
-#define IOAT_XFERCAP_32KB 15
-#define IOAT_XFERCAP_32GB 0
-
-#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */
-#define IOAT_GENCTRL_DEBUG_EN 0x01
-
-#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */
-#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */
-#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */
-#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */
-#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */
-
-#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */
-
-#define IOAT_VER_OFFSET 0x08 /* 8-bit */
-#define IOAT_VER_MAJOR_MASK 0xF0
-#define IOAT_VER_MINOR_MASK 0x0F
-#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4)
-#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK)
-
-#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */
-
-#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */
-#define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */
-#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */
-
-#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
-#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
-
-#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
-
-/* DMA Channel Registers */
-#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
-#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
-#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
-#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
-#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
-#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
-#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
-#define IOAT_CHANCTRL_INT_DISABLE 0x0001
-
-#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
-#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
-#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */
-
-
-#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */
-#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */
-#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
-#define IOAT1_CHANSTS_OFFSET_LOW 0x04
-#define IOAT2_CHANSTS_OFFSET_LOW 0x08
-#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
-#define IOAT1_CHANSTS_OFFSET_HIGH 0x08
-#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C
-#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
-#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F
-#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010
-#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3
-
-
-
-#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */
-
-#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */
-#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
-#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */
-
-/* CB DCA Memory Space Registers */
-#define IOAT_DCAOFFSET_OFFSET 0x14
-/* CB_BAR + IOAT_DCAOFFSET value */
-#define IOAT_DCA_VER_OFFSET 0x00
-#define IOAT_DCA_VER_MAJOR_MASK 0xF0
-#define IOAT_DCA_VER_MINOR_MASK 0x0F
-
-#define IOAT_DCA_COMP_OFFSET 0x02
-#define IOAT_DCA_COMP_V1 0x1
-
-#define IOAT_FSB_CAPABILITY_OFFSET 0x04
-#define IOAT_FSB_CAPABILITY_PREFETCH 0x1
-
-#define IOAT_PCI_CAPABILITY_OFFSET 0x06
-#define IOAT_PCI_CAPABILITY_MEMWR 0x1
-
-#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08
-#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1
-
-#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A
-#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1
-
-#define IOAT_APICID_TAG_MAP_OFFSET 0x0C
-#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F
-#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
-#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0
-#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
-#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00
-#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
-#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000
-#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
-#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000
-#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
-#define IOAT_APICID_TAG_CB2_VALID 0x8080808080
-
-#define IOAT_DCA_GREQID_OFFSET 0x10
-#define IOAT_DCA_GREQID_SIZE 0x04
-#define IOAT_DCA_GREQID_MASK 0xFFFF
-#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000
-#define IOAT_DCA_GREQID_VALID 0x20000000
-#define IOAT_DCA_GREQID_LASTID 0x80000000
-
-#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
-#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1
-
-#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
-#define IOAT3_PCI_CAPABILITY_MEMWR 0x1
-
-#define IOAT3_CSI_CONTROL_OFFSET 0x0C
-#define IOAT3_CSI_CONTROL_PREFETCH 0x1
-
-#define IOAT3_PCI_CONTROL_OFFSET 0x0E
-#define IOAT3_PCI_CONTROL_MEMWR 0x1
-
-#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
-#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10
-#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
-
-#define IOAT3_DCA_GREQID_OFFSET 0x02
-
-#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
-#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */
-#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
-#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C
-#define IOAT2_CHAINADDR_OFFSET_LOW 0x10
-#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
-#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10
-#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14
-#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
-
-#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */
-#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */
-#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
-#define IOAT_CHANCMD_RESET 0x20
-#define IOAT_CHANCMD_RESUME 0x10
-#define IOAT_CHANCMD_ABORT 0x08
-#define IOAT_CHANCMD_SUSPEND 0x04
-#define IOAT_CHANCMD_APPEND 0x02
-#define IOAT_CHANCMD_START 0x01
-
-#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */
-#define IOAT_CHANCMP_OFFSET_LOW 0x18
-#define IOAT_CHANCMP_OFFSET_HIGH 0x1C
-
-#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */
-#define IOAT_CDAR_OFFSET_LOW 0x20
-#define IOAT_CDAR_OFFSET_HIGH 0x24
-
-#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */
-#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001
-#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002
-#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004
-#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008
-#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010
-#define IOAT_CHANERR_CHANCMD_ERR 0x0020
-#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040
-#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080
-#define IOAT_CHANERR_READ_DATA_ERR 0x0100
-#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200
-#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400
-#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800
-#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
-#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
-#define IOAT_CHANERR_SOFT_ERR 0x4000
-#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
-
-#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */
-
-#endif /* _IOAT_REGISTERS_H_ */
#include <linux/platform_device.h>
#include <linux/memory.h>
#include <linux/ioport.h>
+#include <linux/raid/pq.h>
#include <mach/adma.h>
}
}
+static void
+iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+{
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+ struct iop_adma_desc_slot *unmap = desc->group_head;
+ struct device *dev = &iop_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ enum dma_ctrl_flags flags = tx->flags;
+ u32 src_cnt;
+ dma_addr_t addr;
+ dma_addr_t dest;
+
+ src_cnt = unmap->unmap_src_cnt;
+ dest = iop_desc_get_dest_addr(unmap, iop_chan);
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ enum dma_data_direction dir;
+
+ if (src_cnt > 1) /* is xor? */
+ dir = DMA_BIDIRECTIONAL;
+ else
+ dir = DMA_FROM_DEVICE;
+
+ dma_unmap_page(dev, dest, len, dir);
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ while (src_cnt--) {
+ addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
+ if (addr == dest)
+ continue;
+ dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ }
+ }
+ desc->group_head = NULL;
+}
+
+static void
+iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+{
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+ struct iop_adma_desc_slot *unmap = desc->group_head;
+ struct device *dev = &iop_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ enum dma_ctrl_flags flags = tx->flags;
+ u32 src_cnt = unmap->unmap_src_cnt;
+ dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
+ dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
+ int i;
+
+ if (tx->flags & DMA_PREP_CONTINUE)
+ src_cnt -= 3;
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
+ dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
+ dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ dma_addr_t addr;
+
+ for (i = 0; i < src_cnt; i++) {
+ addr = iop_desc_get_src_addr(unmap, iop_chan, i);
+ dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ }
+ if (desc->pq_check_result) {
+ dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
+ dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
+ }
+ }
+
+ desc->group_head = NULL;
+}
+
+
static dma_cookie_t
iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
{
- BUG_ON(desc->async_tx.cookie < 0);
- if (desc->async_tx.cookie > 0) {
- cookie = desc->async_tx.cookie;
- desc->async_tx.cookie = 0;
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+ BUG_ON(tx->cookie < 0);
+ if (tx->cookie > 0) {
+ cookie = tx->cookie;
+ tx->cookie = 0;
/* call the callback (must not sleep or submit new
* operations to this channel)
*/
- if (desc->async_tx.callback)
- desc->async_tx.callback(
- desc->async_tx.callback_param);
+ if (tx->callback)
+ tx->callback(tx->callback_param);
/* unmap dma addresses
* (unmap_single vs unmap_page?)
*/
if (desc->group_head && desc->unmap_len) {
- struct iop_adma_desc_slot *unmap = desc->group_head;
- struct device *dev =
- &iop_chan->device->pdev->dev;
- u32 len = unmap->unmap_len;
- enum dma_ctrl_flags flags = desc->async_tx.flags;
- u32 src_cnt;
- dma_addr_t addr;
- dma_addr_t dest;
-
- src_cnt = unmap->unmap_src_cnt;
- dest = iop_desc_get_dest_addr(unmap, iop_chan);
- if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- enum dma_data_direction dir;
-
- if (src_cnt > 1) /* is xor? */
- dir = DMA_BIDIRECTIONAL;
- else
- dir = DMA_FROM_DEVICE;
-
- dma_unmap_page(dev, dest, len, dir);
- }
-
- if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- while (src_cnt--) {
- addr = iop_desc_get_src_addr(unmap,
- iop_chan,
- src_cnt);
- if (addr == dest)
- continue;
- dma_unmap_page(dev, addr, len,
- DMA_TO_DEVICE);
- }
- }
- desc->group_head = NULL;
+ if (iop_desc_is_pq(desc))
+ iop_desc_unmap_pq(iop_chan, desc);
+ else
+ iop_desc_unmap(iop_chan, desc);
}
}
/* run dependent operations */
- dma_run_dependencies(&desc->async_tx);
+ dma_run_dependencies(tx);
return cookie;
}
{
struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
- spin_lock(&iop_chan->lock);
+ /* lockdep will flag depedency submissions as potentially
+ * recursive locking, this is not the case as a dependency
+ * submission will never recurse a channels submit routine.
+ * There are checks in async_tx.c to prevent this.
+ */
+ spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
__iop_adma_slot_cleanup(iop_chan);
spin_unlock(&iop_chan->lock);
}
}
alloc_tail->group_head = alloc_start;
alloc_tail->async_tx.cookie = -EBUSY;
- list_splice(&chain, &alloc_tail->async_tx.tx_list);
+ list_splice(&chain, &alloc_tail->tx_list);
iop_chan->last_used = last_used;
iop_desc_clear_next_desc(alloc_start);
iop_desc_clear_next_desc(alloc_tail);
old_chain_tail = list_entry(iop_chan->chain.prev,
struct iop_adma_desc_slot, chain_node);
- list_splice_init(&sw_desc->async_tx.tx_list,
+ list_splice_init(&sw_desc->tx_list,
&old_chain_tail->chain_node);
/* fix up the hardware chain */
dma_async_tx_descriptor_init(&slot->async_tx, chan);
slot->async_tx.tx_submit = iop_adma_tx_submit;
+ INIT_LIST_HEAD(&slot->tx_list);
INIT_LIST_HEAD(&slot->chain_node);
INIT_LIST_HEAD(&slot->slot_node);
hw_desc = (char *) iop_chan->device->dma_desc_pool;
}
static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
- unsigned int src_cnt, size_t len, u32 *result,
- unsigned long flags)
+iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
+ unsigned int src_cnt, size_t len, u32 *result,
+ unsigned long flags)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
struct iop_adma_desc_slot *sw_desc, *grp_start;
return sw_desc ? &sw_desc->async_tx : NULL;
}
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *g;
+ int slot_cnt, slots_per_op;
+ int continue_srcs;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+ dev_dbg(iop_chan->device->common.dev,
+ "%s src_cnt: %d len: %u flags: %lx\n",
+ __func__, src_cnt, len, flags);
+
+ if (dmaf_p_disabled_continue(flags))
+ continue_srcs = 1+src_cnt;
+ else if (dmaf_continue(flags))
+ continue_srcs = 3+src_cnt;
+ else
+ continue_srcs = 0+src_cnt;
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ int i;
+
+ g = sw_desc->group_head;
+ iop_desc_set_byte_count(g, iop_chan, len);
+
+ /* even if P is disabled its destination address (bits
+ * [3:0]) must match Q. It is ok if P points to an
+ * invalid address, it won't be written.
+ */
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ dst[0] = dst[1] & 0x7;
+
+ iop_desc_set_pq_addr(g, dst);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ for (i = 0; i < src_cnt; i++)
+ iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
+
+ /* if we are continuing a previous operation factor in
+ * the old p and q values, see the comment for dma_maxpq
+ * in include/linux/dmaengine.h
+ */
+ if (dmaf_p_disabled_continue(flags))
+ iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+ else if (dmaf_continue(flags)) {
+ iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
+ iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+ iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
+ }
+ iop_desc_init_pq(g, i, flags);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, enum sum_check_flags *pqres,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *g;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+ dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+ __func__, src_cnt, len);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ /* for validate operations p and q are tagged onto the
+ * end of the source list
+ */
+ int pq_idx = src_cnt;
+
+ g = sw_desc->group_head;
+ iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
+ iop_desc_set_pq_zero_sum_byte_count(g, len);
+ g->pq_check_result = pqres;
+ pr_debug("\t%s: g->pq_check_result: %p\n",
+ __func__, g->pq_check_result);
+ sw_desc->unmap_src_cnt = src_cnt+2;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ while (src_cnt--)
+ iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
+ src[src_cnt],
+ scf[src_cnt]);
+ iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
static void iop_adma_free_chan_resources(struct dma_chan *chan)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
static int __devinit
-iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
+iop_adma_xor_val_self_test(struct iop_adma_device *device)
{
int i, src_idx;
struct page *dest;
PAGE_SIZE, DMA_TO_DEVICE);
/* skip zero sum if the capability is not present */
- if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
+ if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
goto free_resources;
/* zero sum the sources with the destintation page */
dma_srcs[i] = dma_map_page(dma_chan->device->dev,
zero_sum_srcs[i], 0, PAGE_SIZE,
DMA_TO_DEVICE);
- tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
- IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
- &zero_sum_result,
- DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &zero_sum_result,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
cookie = iop_adma_tx_submit(tx);
iop_adma_issue_pending(dma_chan);
dma_srcs[i] = dma_map_page(dma_chan->device->dev,
zero_sum_srcs[i], 0, PAGE_SIZE,
DMA_TO_DEVICE);
- tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
- IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
- &zero_sum_result,
- DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &zero_sum_result,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
cookie = iop_adma_tx_submit(tx);
iop_adma_issue_pending(dma_chan);
return err;
}
+#ifdef CONFIG_MD_RAID6_PQ
+static int __devinit
+iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
+{
+ /* combined sources, software pq results, and extra hw pq results */
+ struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
+ /* ptr to the extra hw pq buffers defined above */
+ struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
+ /* address conversion buffers (dma_map / page_address) */
+ void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
+ dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST];
+ dma_addr_t pq_dest[2];
+
+ int i;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u32 zero_sum_result;
+ int err = 0;
+ struct device *dev;
+
+ dev_dbg(device->common.dev, "%s\n", __func__);
+
+ for (i = 0; i < ARRAY_SIZE(pq); i++) {
+ pq[i] = alloc_page(GFP_KERNEL);
+ if (!pq[i]) {
+ while (i--)
+ __free_page(pq[i]);
+ return -ENOMEM;
+ }
+ }
+
+ /* Fill in src buffers */
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
+ pq_sw[i] = page_address(pq[i]);
+ memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
+ }
+ pq_sw[i] = page_address(pq[i]);
+ pq_sw[i+1] = page_address(pq[i+1]);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dev = dma_chan->device->dev;
+
+ /* initialize the dests */
+ memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
+ memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
+
+ /* test pq */
+ pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+ pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
+ IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
+ PAGE_SIZE,
+ DMA_PREP_INTERRUPT |
+ DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_err(dev, "Self-test pq timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
+
+ if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
+ page_address(pq_hw[0]), PAGE_SIZE) != 0) {
+ dev_err(dev, "Self-test p failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
+ page_address(pq_hw[1]), PAGE_SIZE) != 0) {
+ dev_err(dev, "Self-test q failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* test correct zero sum using the software generated pq values */
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+ pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ zero_sum_result = ~0;
+ tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+ pq_src, IOP_ADMA_NUM_SRC_TEST,
+ raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+ DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != 0) {
+ dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
+ zero_sum_result);
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* test incorrect zero sum */
+ i = IOP_ADMA_NUM_SRC_TEST;
+ memset(pq_sw[i] + 100, 0, 100);
+ memset(pq_sw[i+1] + 200, 0, 200);
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+ pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ zero_sum_result = 0;
+ tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+ pq_src, IOP_ADMA_NUM_SRC_TEST,
+ raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+ DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
+ dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
+ zero_sum_result);
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ iop_adma_free_chan_resources(dma_chan);
+out:
+ i = ARRAY_SIZE(pq);
+ while (i--)
+ __free_page(pq[i]);
+ return err;
+}
+#endif
+
static int __devexit iop_adma_remove(struct platform_device *dev)
{
struct iop_adma_device *device = platform_get_drvdata(dev);
dma_dev->max_xor = iop_adma_get_max_xor();
dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
}
- if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
- dma_dev->device_prep_dma_zero_sum =
- iop_adma_prep_dma_zero_sum;
+ if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_xor_val =
+ iop_adma_prep_dma_xor_val;
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
+ dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
+ }
+ if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_pq_val =
+ iop_adma_prep_dma_pq_val;
if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
dma_dev->device_prep_dma_interrupt =
iop_adma_prep_dma_interrupt;
}
if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
- dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
- ret = iop_adma_xor_zero_sum_self_test(adev);
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+ ret = iop_adma_xor_val_self_test(adev);
dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
if (ret)
goto err_free_iop_chan;
}
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
+ dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
+ #ifdef CONFIG_MD_RAID6_PQ
+ ret = iop_adma_pq_zero_sum_self_test(adev);
+ dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
+ #else
+ /* can not test raid6, so do not publish capability */
+ dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
+ dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
+ ret = 0;
+ #endif
+ if (ret)
+ goto err_free_iop_chan;
+ }
+
dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
- "( %s%s%s%s%s%s%s%s%s%s)\n",
- dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
- dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
- dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
+ "( %s%s%s%s%s%s%s)\n",
+ dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
+ dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
- dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
- dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
+ dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
- dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
if (sw_desc) {
grp_start = sw_desc->group_head;
- list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+ list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
async_tx_ack(&sw_desc->async_tx);
iop_desc_init_memcpy(grp_start, 0);
iop_desc_set_byte_count(grp_start, iop_chan, 0);
sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
if (sw_desc) {
grp_start = sw_desc->group_head;
- list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+ list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
async_tx_ack(&sw_desc->async_tx);
iop_desc_init_null_xor(grp_start, 2, 0);
iop_desc_set_byte_count(grp_start, iop_chan, 0);
iov_byte_offset,
kdata,
copy);
+ /* poll for a descriptor slot */
+ if (unlikely(dma_cookie < 0)) {
+ dma_async_issue_pending(chan);
+ continue;
+ }
len -= copy;
iov[iovec_idx].iov_len -= copy;
page,
offset,
copy);
+ /* poll for a descriptor slot */
+ if (unlikely(dma_cookie < 0)) {
+ dma_async_issue_pending(chan);
+ continue;
+ }
len -= copy;
iov[iovec_idx].iov_len -= copy;
}
alloc_tail->group_head = alloc_start;
alloc_tail->async_tx.cookie = -EBUSY;
- list_splice(&chain, &alloc_tail->async_tx.tx_list);
+ list_splice(&chain, &alloc_tail->tx_list);
mv_chan->last_used = last_used;
mv_desc_clear_next_desc(alloc_start);
mv_desc_clear_next_desc(alloc_tail);
cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
if (list_empty(&mv_chan->chain))
- list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
+ list_splice_init(&sw_desc->tx_list, &mv_chan->chain);
else {
new_hw_chain = 0;
old_chain_tail = list_entry(mv_chan->chain.prev,
struct mv_xor_desc_slot,
chain_node);
- list_splice_init(&grp_start->async_tx.tx_list,
+ list_splice_init(&grp_start->tx_list,
&old_chain_tail->chain_node);
if (!mv_can_chain(grp_start))
slot->async_tx.tx_submit = mv_xor_tx_submit;
INIT_LIST_HEAD(&slot->chain_node);
INIT_LIST_HEAD(&slot->slot_node);
+ INIT_LIST_HEAD(&slot->tx_list);
hw_desc = (char *) mv_chan->device->dma_desc_pool;
slot->async_tx.phys =
(dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
* @idx: pool index
* @unmap_src_cnt: number of xor sources
* @unmap_len: transaction bytecount
+ * @tx_list: list of slots that make up a multi-descriptor transaction
* @async_tx: support for the async_tx api
- * @group_list: list of slots that make up a multi-descriptor transaction
- * for example transfer lengths larger than the supported hw max
* @xor_check_result: result of zero sum
* @crc32_result: result crc calculation
*/
u16 unmap_src_cnt;
u32 value;
size_t unmap_len;
+ struct list_head tx_list;
struct dma_async_tx_descriptor async_tx;
union {
u32 *xor_check_result;
--- /dev/null
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * base is drivers/dma/flsdma.c
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * - DMA of SuperH does not have Hardware DMA chain mode.
+ * - MAX DMA size is 16MB.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/platform_device.h>
+#include <cpu/dma.h>
+#include <asm/dma-sh.h>
+#include "shdma.h"
+
+/* DMA descriptor control */
+#define DESC_LAST (-1)
+#define DESC_COMP (1)
+#define DESC_NCOMP (0)
+
+#define NR_DESCS_PER_CHANNEL 32
+/*
+ * Define the default configuration for dual address memory-memory transfer.
+ * The 0x400 value represents auto-request, external->external.
+ *
+ * And this driver set 4byte burst mode.
+ * If you want to change mode, you need to change RS_DEFAULT of value.
+ * (ex 1byte burst mode -> (RS_DUAL & ~TS_32)
+ */
+#define RS_DEFAULT (RS_DUAL)
+
+#define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id])
+static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
+{
+ ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
+}
+
+static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
+{
+ return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
+}
+
+static void dmae_init(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = RS_DEFAULT; /* default is DUAL mode */
+ sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+/*
+ * Reset DMA controller
+ *
+ * SH7780 has two DMAOR register
+ */
+static void sh_dmae_ctl_stop(int id)
+{
+ unsigned short dmaor = dmaor_read_reg(id);
+
+ dmaor &= ~(DMAOR_NMIF | DMAOR_AE);
+ dmaor_write_reg(id, dmaor);
+}
+
+static int sh_dmae_rst(int id)
+{
+ unsigned short dmaor;
+
+ sh_dmae_ctl_stop(id);
+ dmaor = (dmaor_read_reg(id)|DMAOR_INIT);
+
+ dmaor_write_reg(id, dmaor);
+ if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) {
+ pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int dmae_is_idle(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+ if (chcr & CHCR_DE) {
+ if (!(chcr & CHCR_TE))
+ return -EBUSY; /* working */
+ }
+ return 0; /* waiting */
+}
+
+static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+ return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT];
+}
+
+static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw)
+{
+ sh_dmae_writel(sh_chan, hw.sar, SAR);
+ sh_dmae_writel(sh_chan, hw.dar, DAR);
+ sh_dmae_writel(sh_chan,
+ (hw.tcr >> calc_xmit_shift(sh_chan)), TCR);
+}
+
+static void dmae_start(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+ chcr |= (CHCR_DE|CHCR_IE);
+ sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+static void dmae_halt(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+ chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE);
+ sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
+{
+ int ret = dmae_is_idle(sh_chan);
+ /* When DMA was working, can not set data to CHCR */
+ if (ret)
+ return ret;
+
+ sh_dmae_writel(sh_chan, val, CHCR);
+ return 0;
+}
+
+#define DMARS1_ADDR 0x04
+#define DMARS2_ADDR 0x08
+#define DMARS_SHIFT 8
+#define DMARS_CHAN_MSK 0x01
+static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
+{
+ u32 addr;
+ int shift = 0;
+ int ret = dmae_is_idle(sh_chan);
+ if (ret)
+ return ret;
+
+ if (sh_chan->id & DMARS_CHAN_MSK)
+ shift = DMARS_SHIFT;
+
+ switch (sh_chan->id) {
+ /* DMARS0 */
+ case 0:
+ case 1:
+ addr = SH_DMARS_BASE;
+ break;
+ /* DMARS1 */
+ case 2:
+ case 3:
+ addr = (SH_DMARS_BASE + DMARS1_ADDR);
+ break;
+ /* DMARS2 */
+ case 4:
+ case 5:
+ addr = (SH_DMARS_BASE + DMARS2_ADDR);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ctrl_outw((val << shift) |
+ (ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)),
+ addr);
+
+ return 0;
+}
+
+static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct sh_desc *desc = tx_to_sh_desc(tx);
+ struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan);
+ dma_cookie_t cookie;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+
+ cookie = sh_chan->common.cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+
+ /* If desc only in the case of 1 */
+ if (desc->async_tx.cookie != -EBUSY)
+ desc->async_tx.cookie = cookie;
+ sh_chan->common.cookie = desc->async_tx.cookie;
+
+ list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev);
+
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ return cookie;
+}
+
+static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
+{
+ struct sh_desc *desc, *_desc, *ret = NULL;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+ list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) {
+ if (async_tx_test_ack(&desc->async_tx)) {
+ list_del(&desc->node);
+ ret = desc;
+ break;
+ }
+ }
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ return ret;
+}
+
+static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc)
+{
+ if (desc) {
+ spin_lock_bh(&sh_chan->desc_lock);
+
+ list_splice_init(&desc->tx_list, &sh_chan->ld_free);
+ list_add(&desc->node, &sh_chan->ld_free);
+
+ spin_unlock_bh(&sh_chan->desc_lock);
+ }
+}
+
+static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+ struct sh_desc *desc;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+ while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) {
+ spin_unlock_bh(&sh_chan->desc_lock);
+ desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL);
+ if (!desc) {
+ spin_lock_bh(&sh_chan->desc_lock);
+ break;
+ }
+ dma_async_tx_descriptor_init(&desc->async_tx,
+ &sh_chan->common);
+ desc->async_tx.tx_submit = sh_dmae_tx_submit;
+ desc->async_tx.flags = DMA_CTRL_ACK;
+ INIT_LIST_HEAD(&desc->tx_list);
+ sh_dmae_put_desc(sh_chan, desc);
+
+ spin_lock_bh(&sh_chan->desc_lock);
+ sh_chan->descs_allocated++;
+ }
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ return sh_chan->descs_allocated;
+}
+
+/*
+ * sh_dma_free_chan_resources - Free all resources of the channel.
+ */
+static void sh_dmae_free_chan_resources(struct dma_chan *chan)
+{
+ struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+ struct sh_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ BUG_ON(!list_empty(&sh_chan->ld_queue));
+ spin_lock_bh(&sh_chan->desc_lock);
+
+ list_splice_init(&sh_chan->ld_free, &list);
+ sh_chan->descs_allocated = 0;
+
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ list_for_each_entry_safe(desc, _desc, &list, node)
+ kfree(desc);
+}
+
+static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
+ struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+ size_t len, unsigned long flags)
+{
+ struct sh_dmae_chan *sh_chan;
+ struct sh_desc *first = NULL, *prev = NULL, *new;
+ size_t copy_size;
+
+ if (!chan)
+ return NULL;
+
+ if (!len)
+ return NULL;
+
+ sh_chan = to_sh_chan(chan);
+
+ do {
+ /* Allocate the link descriptor from DMA pool */
+ new = sh_dmae_get_desc(sh_chan);
+ if (!new) {
+ dev_err(sh_chan->dev,
+ "No free memory for link descriptor\n");
+ goto err_get_desc;
+ }
+
+ copy_size = min(len, (size_t)SH_DMA_TCR_MAX);
+
+ new->hw.sar = dma_src;
+ new->hw.dar = dma_dest;
+ new->hw.tcr = copy_size;
+ if (!first)
+ first = new;
+
+ new->mark = DESC_NCOMP;
+ async_tx_ack(&new->async_tx);
+
+ prev = new;
+ len -= copy_size;
+ dma_src += copy_size;
+ dma_dest += copy_size;
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &first->tx_list);
+ } while (len);
+
+ new->async_tx.flags = flags; /* client is in control of this ack */
+ new->async_tx.cookie = -EBUSY; /* Last desc */
+
+ return &first->async_tx;
+
+err_get_desc:
+ sh_dmae_put_desc(sh_chan, first);
+ return NULL;
+
+}
+
+/*
+ * sh_chan_ld_cleanup - Clean up link descriptors
+ *
+ * This function clean up the ld_queue of DMA channel.
+ */
+static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan)
+{
+ struct sh_desc *desc, *_desc;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+ list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) {
+ dma_async_tx_callback callback;
+ void *callback_param;
+
+ /* non send data */
+ if (desc->mark == DESC_NCOMP)
+ break;
+
+ /* send data sesc */
+ callback = desc->async_tx.callback;
+ callback_param = desc->async_tx.callback_param;
+
+ /* Remove from ld_queue list */
+ list_splice_init(&desc->tx_list, &sh_chan->ld_free);
+
+ dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n",
+ desc);
+
+ list_move(&desc->node, &sh_chan->ld_free);
+ /* Run the link descriptor callback function */
+ if (callback) {
+ spin_unlock_bh(&sh_chan->desc_lock);
+ dev_dbg(sh_chan->dev, "link descriptor %p callback\n",
+ desc);
+ callback(callback_param);
+ spin_lock_bh(&sh_chan->desc_lock);
+ }
+ }
+ spin_unlock_bh(&sh_chan->desc_lock);
+}
+
+static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan)
+{
+ struct list_head *ld_node;
+ struct sh_dmae_regs hw;
+
+ /* DMA work check */
+ if (dmae_is_idle(sh_chan))
+ return;
+
+ /* Find the first un-transfer desciptor */
+ for (ld_node = sh_chan->ld_queue.next;
+ (ld_node != &sh_chan->ld_queue)
+ && (to_sh_desc(ld_node)->mark == DESC_COMP);
+ ld_node = ld_node->next)
+ cpu_relax();
+
+ if (ld_node != &sh_chan->ld_queue) {
+ /* Get the ld start address from ld_queue */
+ hw = to_sh_desc(ld_node)->hw;
+ dmae_set_reg(sh_chan, hw);
+ dmae_start(sh_chan);
+ }
+}
+
+static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan)
+{
+ struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+ sh_chan_xfer_ld_queue(sh_chan);
+}
+
+static enum dma_status sh_dmae_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+
+ sh_dmae_chan_ld_cleanup(sh_chan);
+
+ last_used = chan->cookie;
+ last_complete = sh_chan->completed_cookie;
+ if (last_complete == -EBUSY)
+ last_complete = last_used;
+
+ if (done)
+ *done = last_complete;
+
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static irqreturn_t sh_dmae_interrupt(int irq, void *data)
+{
+ irqreturn_t ret = IRQ_NONE;
+ struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+ if (chcr & CHCR_TE) {
+ /* DMA stop */
+ dmae_halt(sh_chan);
+
+ ret = IRQ_HANDLED;
+ tasklet_schedule(&sh_chan->tasklet);
+ }
+
+ return ret;
+}
+
+#if defined(CONFIG_CPU_SH4)
+static irqreturn_t sh_dmae_err(int irq, void *data)
+{
+ int err = 0;
+ struct sh_dmae_device *shdev = (struct sh_dmae_device *)data;
+
+ /* IRQ Multi */
+ if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+ int cnt = 0;
+ switch (irq) {
+#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
+ case DMTE6_IRQ:
+ cnt++;
+#endif
+ case DMTE0_IRQ:
+ if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) {
+ disable_irq(irq);
+ return IRQ_HANDLED;
+ }
+ default:
+ return IRQ_NONE;
+ }
+ } else {
+ /* reset dma controller */
+ err = sh_dmae_rst(0);
+ if (err)
+ return err;
+ if (shdev->pdata.mode & SHDMA_DMAOR1) {
+ err = sh_dmae_rst(1);
+ if (err)
+ return err;
+ }
+ disable_irq(irq);
+ return IRQ_HANDLED;
+ }
+}
+#endif
+
+static void dmae_do_tasklet(unsigned long data)
+{
+ struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
+ struct sh_desc *desc, *_desc, *cur_desc = NULL;
+ u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
+ list_for_each_entry_safe(desc, _desc,
+ &sh_chan->ld_queue, node) {
+ if ((desc->hw.sar + desc->hw.tcr) == sar_buf) {
+ cur_desc = desc;
+ break;
+ }
+ }
+
+ if (cur_desc) {
+ switch (cur_desc->async_tx.cookie) {
+ case 0: /* other desc data */
+ break;
+ case -EBUSY: /* last desc */
+ sh_chan->completed_cookie =
+ cur_desc->async_tx.cookie;
+ break;
+ default: /* first desc ( 0 < )*/
+ sh_chan->completed_cookie =
+ cur_desc->async_tx.cookie - 1;
+ break;
+ }
+ cur_desc->mark = DESC_COMP;
+ }
+ /* Next desc */
+ sh_chan_xfer_ld_queue(sh_chan);
+ sh_dmae_chan_ld_cleanup(sh_chan);
+}
+
+static unsigned int get_dmae_irq(unsigned int id)
+{
+ unsigned int irq = 0;
+ if (id < ARRAY_SIZE(dmte_irq_map))
+ irq = dmte_irq_map[id];
+ return irq;
+}
+
+static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id)
+{
+ int err;
+ unsigned int irq = get_dmae_irq(id);
+ unsigned long irqflags = IRQF_DISABLED;
+ struct sh_dmae_chan *new_sh_chan;
+
+ /* alloc channel */
+ new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
+ if (!new_sh_chan) {
+ dev_err(shdev->common.dev, "No free memory for allocating "
+ "dma channels!\n");
+ return -ENOMEM;
+ }
+
+ new_sh_chan->dev = shdev->common.dev;
+ new_sh_chan->id = id;
+
+ /* Init DMA tasklet */
+ tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet,
+ (unsigned long)new_sh_chan);
+
+ /* Init the channel */
+ dmae_init(new_sh_chan);
+
+ spin_lock_init(&new_sh_chan->desc_lock);
+
+ /* Init descripter manage list */
+ INIT_LIST_HEAD(&new_sh_chan->ld_queue);
+ INIT_LIST_HEAD(&new_sh_chan->ld_free);
+
+ /* copy struct dma_device */
+ new_sh_chan->common.device = &shdev->common;
+
+ /* Add the channel to DMA device channel list */
+ list_add_tail(&new_sh_chan->common.device_node,
+ &shdev->common.channels);
+ shdev->common.chancnt++;
+
+ if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+ irqflags = IRQF_SHARED;
+#if defined(DMTE6_IRQ)
+ if (irq >= DMTE6_IRQ)
+ irq = DMTE6_IRQ;
+ else
+#endif
+ irq = DMTE0_IRQ;
+ }
+
+ snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
+ "sh-dmae%d", new_sh_chan->id);
+
+ /* set up channel irq */
+ err = request_irq(irq, &sh_dmae_interrupt,
+ irqflags, new_sh_chan->dev_id, new_sh_chan);
+ if (err) {
+ dev_err(shdev->common.dev, "DMA channel %d request_irq error "
+ "with return %d\n", id, err);
+ goto err_no_irq;
+ }
+
+ /* CHCR register control function */
+ new_sh_chan->set_chcr = dmae_set_chcr;
+ /* DMARS register control function */
+ new_sh_chan->set_dmars = dmae_set_dmars;
+
+ shdev->chan[id] = new_sh_chan;
+ return 0;
+
+err_no_irq:
+ /* remove from dmaengine device node */
+ list_del(&new_sh_chan->common.device_node);
+ kfree(new_sh_chan);
+ return err;
+}
+
+static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
+{
+ int i;
+
+ for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) {
+ if (shdev->chan[i]) {
+ struct sh_dmae_chan *shchan = shdev->chan[i];
+ if (!(shdev->pdata.mode & SHDMA_MIX_IRQ))
+ free_irq(dmte_irq_map[i], shchan);
+
+ list_del(&shchan->common.device_node);
+ kfree(shchan);
+ shdev->chan[i] = NULL;
+ }
+ }
+ shdev->common.chancnt = 0;
+}
+
+static int __init sh_dmae_probe(struct platform_device *pdev)
+{
+ int err = 0, cnt, ecnt;
+ unsigned long irqflags = IRQF_DISABLED;
+#if defined(CONFIG_CPU_SH4)
+ int eirq[] = { DMAE0_IRQ,
+#if defined(DMAE1_IRQ)
+ DMAE1_IRQ
+#endif
+ };
+#endif
+ struct sh_dmae_device *shdev;
+
+ shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
+ if (!shdev) {
+ dev_err(&pdev->dev, "No enough memory\n");
+ err = -ENOMEM;
+ goto shdev_err;
+ }
+
+ /* get platform data */
+ if (!pdev->dev.platform_data)
+ goto shdev_err;
+
+ /* platform data */
+ memcpy(&shdev->pdata, pdev->dev.platform_data,
+ sizeof(struct sh_dmae_pdata));
+
+ /* reset dma controller */
+ err = sh_dmae_rst(0);
+ if (err)
+ goto rst_err;
+
+ /* SH7780/85/23 has DMAOR1 */
+ if (shdev->pdata.mode & SHDMA_DMAOR1) {
+ err = sh_dmae_rst(1);
+ if (err)
+ goto rst_err;
+ }
+
+ INIT_LIST_HEAD(&shdev->common.channels);
+
+ dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
+ shdev->common.device_alloc_chan_resources
+ = sh_dmae_alloc_chan_resources;
+ shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources;
+ shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy;
+ shdev->common.device_is_tx_complete = sh_dmae_is_complete;
+ shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending;
+ shdev->common.dev = &pdev->dev;
+
+#if defined(CONFIG_CPU_SH4)
+ /* Non Mix IRQ mode SH7722/SH7730 etc... */
+ if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+ irqflags = IRQF_SHARED;
+ eirq[0] = DMTE0_IRQ;
+#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
+ eirq[1] = DMTE6_IRQ;
+#endif
+ }
+
+ for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) {
+ err = request_irq(eirq[ecnt], sh_dmae_err,
+ irqflags, "DMAC Address Error", shdev);
+ if (err) {
+ dev_err(&pdev->dev, "DMA device request_irq"
+ "error (irq %d) with return %d\n",
+ eirq[ecnt], err);
+ goto eirq_err;
+ }
+ }
+#endif /* CONFIG_CPU_SH4 */
+
+ /* Create DMA Channel */
+ for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) {
+ err = sh_dmae_chan_probe(shdev, cnt);
+ if (err)
+ goto chan_probe_err;
+ }
+
+ platform_set_drvdata(pdev, shdev);
+ dma_async_device_register(&shdev->common);
+
+ return err;
+
+chan_probe_err:
+ sh_dmae_chan_remove(shdev);
+
+eirq_err:
+ for (ecnt-- ; ecnt >= 0; ecnt--)
+ free_irq(eirq[ecnt], shdev);
+
+rst_err:
+ kfree(shdev);
+
+shdev_err:
+ return err;
+}
+
+static int __exit sh_dmae_remove(struct platform_device *pdev)
+{
+ struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+
+ dma_async_device_unregister(&shdev->common);
+
+ if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+ free_irq(DMTE0_IRQ, shdev);
+#if defined(DMTE6_IRQ)
+ free_irq(DMTE6_IRQ, shdev);
+#endif
+ }
+
+ /* channel data remove */
+ sh_dmae_chan_remove(shdev);
+
+ if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) {
+ free_irq(DMAE0_IRQ, shdev);
+#if defined(DMAE1_IRQ)
+ free_irq(DMAE1_IRQ, shdev);
+#endif
+ }
+ kfree(shdev);
+
+ return 0;
+}
+
+static void sh_dmae_shutdown(struct platform_device *pdev)
+{
+ struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+ sh_dmae_ctl_stop(0);
+ if (shdev->pdata.mode & SHDMA_DMAOR1)
+ sh_dmae_ctl_stop(1);
+}
+
+static struct platform_driver sh_dmae_driver = {
+ .remove = __exit_p(sh_dmae_remove),
+ .shutdown = sh_dmae_shutdown,
+ .driver = {
+ .name = "sh-dma-engine",
+ },
+};
+
+static int __init sh_dmae_init(void)
+{
+ return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
+}
+module_init(sh_dmae_init);
+
+static void __exit sh_dmae_exit(void)
+{
+ platform_driver_unregister(&sh_dmae_driver);
+}
+module_exit(sh_dmae_exit);
+
+MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
+MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
+MODULE_LICENSE("GPL");
--- /dev/null
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#ifndef __DMA_SHDMA_H
+#define __DMA_SHDMA_H
+
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+
+#define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */
+
+struct sh_dmae_regs {
+ u32 sar; /* SAR / source address */
+ u32 dar; /* DAR / destination address */
+ u32 tcr; /* TCR / transfer count */
+};
+
+struct sh_desc {
+ struct list_head tx_list;
+ struct sh_dmae_regs hw;
+ struct list_head node;
+ struct dma_async_tx_descriptor async_tx;
+ int mark;
+};
+
+struct sh_dmae_chan {
+ dma_cookie_t completed_cookie; /* The maximum cookie completed */
+ spinlock_t desc_lock; /* Descriptor operation lock */
+ struct list_head ld_queue; /* Link descriptors queue */
+ struct list_head ld_free; /* Link descriptors free */
+ struct dma_chan common; /* DMA common channel */
+ struct device *dev; /* Channel device */
+ struct tasklet_struct tasklet; /* Tasklet */
+ int descs_allocated; /* desc count */
+ int id; /* Raw id of this channel */
+ char dev_id[16]; /* unique name per DMAC of channel */
+
+ /* Set chcr */
+ int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs);
+ /* Set DMA resource */
+ int (*set_dmars)(struct sh_dmae_chan *sh_chan, u16 res);
+};
+
+struct sh_dmae_device {
+ struct dma_device common;
+ struct sh_dmae_chan *chan[MAX_DMA_CHANNELS];
+ struct sh_dmae_pdata pdata;
+};
+
+#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common)
+#define to_sh_desc(lh) container_of(lh, struct sh_desc, node)
+#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx)
+
+#endif /* __DMA_SHDMA_H */
static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc)
{
- if (!list_empty(&desc->txd.tx_list))
- desc = list_entry(desc->txd.tx_list.prev,
- struct txx9dmac_desc, desc_node);
+ if (!list_empty(&desc->tx_list))
+ desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node);
return desc;
}
desc = kzalloc(sizeof(*desc), flags);
if (!desc)
return NULL;
+ INIT_LIST_HEAD(&desc->tx_list);
dma_async_tx_descriptor_init(&desc->txd, &dc->chan);
desc->txd.tx_submit = txx9dmac_tx_submit;
/* txd.flags will be overwritten in prep funcs */
struct txx9dmac_dev *ddev = dc->ddev;
struct txx9dmac_desc *child;
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
dma_sync_single_for_cpu(chan2parent(&dc->chan),
child->txd.phys, ddev->descsize,
DMA_TO_DEVICE);
txx9dmac_sync_desc_for_cpu(dc, desc);
spin_lock_bh(&dc->lock);
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
dev_vdbg(chan2dev(&dc->chan),
"moving child desc %p to freelist\n",
child);
- list_splice_init(&desc->txd.tx_list, &dc->free_list);
+ list_splice_init(&desc->tx_list, &dc->free_list);
dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n",
desc);
list_add(&desc->desc_node, &dc->free_list);
param = txd->callback_param;
txx9dmac_sync_desc_for_cpu(dc, desc);
- list_splice_init(&txd->tx_list, &dc->free_list);
+ list_splice_init(&desc->tx_list, &dc->free_list);
list_move(&desc->desc_node, &dc->free_list);
if (!ds) {
"Bad descriptor submitted for DMA! (cookie: %d)\n",
bad_desc->txd.cookie);
txx9dmac_dump_desc(dc, &bad_desc->hwdesc);
- list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
txx9dmac_dump_desc(dc, &child->hwdesc);
/* Pretend the descriptor completed successfully */
txx9dmac_descriptor_complete(dc, bad_desc);
return;
}
- list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ list_for_each_entry(child, &desc->tx_list, desc_node)
if (desc_read_CHAR(dc, child) == chain) {
/* Currently in progress */
if (csr & TXX9_DMA_CSR_ABCHC)
dma_sync_single_for_device(chan2parent(&dc->chan),
prev->txd.phys, ddev->descsize,
DMA_TO_DEVICE);
- list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ list_add_tail(&desc->desc_node, &first->tx_list);
}
prev = desc;
}
prev->txd.phys,
ddev->descsize,
DMA_TO_DEVICE);
- list_add_tail(&desc->desc_node,
- &first->txd.tx_list);
+ list_add_tail(&desc->desc_node, &first->tx_list);
}
prev = desc;
}
/* THEN values for driver housekeeping */
struct list_head desc_node ____cacheline_aligned;
+ struct list_head tx_list;
struct dma_async_tx_descriptor txd;
size_t len;
};
Support for error detection and correction on the Intel
3000 and 3010 server chipsets.
+config EDAC_I3200
+ tristate "Intel 3200"
+ depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL
+ help
+ Support for error detection and correction on the Intel
+ 3200 and 3210 server chipsets.
+
config EDAC_X38
tristate "Intel X38"
depends on EDAC_MM_EDAC && PCI && X86
San Clemente MCH.
config EDAC_MPC85XX
- tristate "Freescale MPC85xx"
- depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx
+ tristate "Freescale MPC83xx / MPC85xx"
+ depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || MPC85xx)
help
Support for error detection and correction on the Freescale
- MPC8560, MPC8540, MPC8548
+ MPC8349, MPC8560, MPC8540, MPC8548
config EDAC_MV64X60
tristate "Marvell MV64x60"
obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o
obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o
obj-$(CONFIG_EDAC_I3000) += i3000_edac.o
+obj-$(CONFIG_EDAC_I3200) += i3200_edac.o
obj-$(CONFIG_EDAC_X38) += x38_edac.o
obj-$(CONFIG_EDAC_I82860) += i82860_edac.o
obj-$(CONFIG_EDAC_R82600) += r82600_edac.o
obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o
+
if (!devm_request_mem_region(&pdev->dev,
r->start,
- r->end - r->start + 1,
+ resource_size(r),
pdev->name)) {
cpc925_printk(KERN_ERR, "Unable to request mem region\n");
res = -EBUSY;
goto err1;
}
- vbase = devm_ioremap(&pdev->dev, r->start, r->end - r->start + 1);
+ vbase = devm_ioremap(&pdev->dev, r->start, resource_size(r));
if (!vbase) {
cpc925_printk(KERN_ERR, "Unable to ioremap device\n");
res = -ENOMEM;
cpc925_mc_exit(mci);
edac_mc_free(mci);
err2:
- devm_release_mem_region(&pdev->dev, r->start, r->end-r->start+1);
+ devm_release_mem_region(&pdev->dev, r->start, resource_size(r));
err1:
devres_release_group(&pdev->dev, cpc925_probe);
out:
edac_dev = container_of(head, struct edac_device_ctl_info, rcu);
INIT_LIST_HEAD(&edac_dev->link);
- complete(&edac_dev->removal_complete);
}
/*
*edac_device)
{
list_del_rcu(&edac_device->link);
-
- init_completion(&edac_device->removal_complete);
call_rcu(&edac_device->rcu, complete_edac_device_list_del);
- wait_for_completion(&edac_device->removal_complete);
+ rcu_barrier();
}
/*
mci = container_of(head, struct mem_ctl_info, rcu);
INIT_LIST_HEAD(&mci->link);
- complete(&mci->complete);
}
static void del_mc_from_global_list(struct mem_ctl_info *mci)
{
atomic_dec(&edac_handlers);
list_del_rcu(&mci->link);
- init_completion(&mci->complete);
call_rcu(&mci->rcu, complete_mc_list_del);
- wait_for_completion(&mci->complete);
+ rcu_barrier();
}
/**
pci = container_of(head, struct edac_pci_ctl_info, rcu);
INIT_LIST_HEAD(&pci->link);
- complete(&pci->complete);
}
/*
static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci)
{
list_del_rcu(&pci->link);
- init_completion(&pci->complete);
call_rcu(&pci->rcu, complete_edac_pci_list_del);
- wait_for_completion(&pci->complete);
+ rcu_barrier();
}
#if 0
--- /dev/null
+/*
+ * Intel 3200/3210 Memory Controller kernel module
+ * Copyright (C) 2008-2009 Akamai Technologies, Inc.
+ * Portions by Hitoshi Mitake <h.mitake@gmail.com>.
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/edac.h>
+#include <linux/io.h>
+#include "edac_core.h"
+
+#define I3200_REVISION "1.1"
+
+#define EDAC_MOD_STR "i3200_edac"
+
+#define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0
+
+#define I3200_RANKS 8
+#define I3200_RANKS_PER_CHANNEL 4
+#define I3200_CHANNELS 2
+
+/* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */
+
+#define I3200_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */
+#define I3200_MCHBAR_HIGH 0x4c
+#define I3200_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */
+#define I3200_MMR_WINDOW_SIZE 16384
+
+#define I3200_TOM 0xa0 /* Top of Memory (16b)
+ *
+ * 15:10 reserved
+ * 9:0 total populated physical memory
+ */
+#define I3200_TOM_MASK 0x3ff /* bits 9:0 */
+#define I3200_TOM_SHIFT 26 /* 64MiB grain */
+
+#define I3200_ERRSTS 0xc8 /* Error Status Register (16b)
+ *
+ * 15 reserved
+ * 14 Isochronous TBWRR Run Behind FIFO Full
+ * (ITCV)
+ * 13 Isochronous TBWRR Run Behind FIFO Put
+ * (ITSTV)
+ * 12 reserved
+ * 11 MCH Thermal Sensor Event
+ * for SMI/SCI/SERR (GTSE)
+ * 10 reserved
+ * 9 LOCK to non-DRAM Memory Flag (LCKF)
+ * 8 reserved
+ * 7 DRAM Throttle Flag (DTF)
+ * 6:2 reserved
+ * 1 Multi-bit DRAM ECC Error Flag (DMERR)
+ * 0 Single-bit DRAM ECC Error Flag (DSERR)
+ */
+#define I3200_ERRSTS_UE 0x0002
+#define I3200_ERRSTS_CE 0x0001
+#define I3200_ERRSTS_BITS (I3200_ERRSTS_UE | I3200_ERRSTS_CE)
+
+
+/* Intel MMIO register space - device 0 function 0 - MMR space */
+
+#define I3200_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4)
+ *
+ * 15:10 reserved
+ * 9:0 Channel 0 DRAM Rank Boundary Address
+ */
+#define I3200_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */
+#define I3200_DRB_MASK 0x3ff /* bits 9:0 */
+#define I3200_DRB_SHIFT 26 /* 64MiB grain */
+
+#define I3200_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b)
+ *
+ * 63:48 Error Column Address (ERRCOL)
+ * 47:32 Error Row Address (ERRROW)
+ * 31:29 Error Bank Address (ERRBANK)
+ * 28:27 Error Rank Address (ERRRANK)
+ * 26:24 reserved
+ * 23:16 Error Syndrome (ERRSYND)
+ * 15: 2 reserved
+ * 1 Multiple Bit Error Status (MERRSTS)
+ * 0 Correctable Error Status (CERRSTS)
+ */
+#define I3200_C1ECCERRLOG 0x680 /* Chan 1 ECC Error Log (64b) */
+#define I3200_ECCERRLOG_CE 0x1
+#define I3200_ECCERRLOG_UE 0x2
+#define I3200_ECCERRLOG_RANK_BITS 0x18000000
+#define I3200_ECCERRLOG_RANK_SHIFT 27
+#define I3200_ECCERRLOG_SYNDROME_BITS 0xff0000
+#define I3200_ECCERRLOG_SYNDROME_SHIFT 16
+#define I3200_CAPID0 0xe0 /* P.95 of spec for details */
+
+struct i3200_priv {
+ void __iomem *window;
+};
+
+static int nr_channels;
+
+static int how_many_channels(struct pci_dev *pdev)
+{
+ unsigned char capid0_8b; /* 8th byte of CAPID0 */
+
+ pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
+ if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
+ debugf0("In single channel mode.\n");
+ return 1;
+ } else {
+ debugf0("In dual channel mode.\n");
+ return 2;
+ }
+}
+
+static unsigned long eccerrlog_syndrome(u64 log)
+{
+ return (log & I3200_ECCERRLOG_SYNDROME_BITS) >>
+ I3200_ECCERRLOG_SYNDROME_SHIFT;
+}
+
+static int eccerrlog_row(int channel, u64 log)
+{
+ u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >>
+ I3200_ECCERRLOG_RANK_SHIFT);
+ return rank | (channel * I3200_RANKS_PER_CHANNEL);
+}
+
+enum i3200_chips {
+ I3200 = 0,
+};
+
+struct i3200_dev_info {
+ const char *ctl_name;
+};
+
+struct i3200_error_info {
+ u16 errsts;
+ u16 errsts2;
+ u64 eccerrlog[I3200_CHANNELS];
+};
+
+static const struct i3200_dev_info i3200_devs[] = {
+ [I3200] = {
+ .ctl_name = "i3200"
+ },
+};
+
+static struct pci_dev *mci_pdev;
+static int i3200_registered = 1;
+
+
+static void i3200_clear_error_info(struct mem_ctl_info *mci)
+{
+ struct pci_dev *pdev;
+
+ pdev = to_pci_dev(mci->dev);
+
+ /*
+ * Clear any error bits.
+ * (Yes, we really clear bits by writing 1 to them.)
+ */
+ pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS,
+ I3200_ERRSTS_BITS);
+}
+
+static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci,
+ struct i3200_error_info *info)
+{
+ struct pci_dev *pdev;
+ struct i3200_priv *priv = mci->pvt_info;
+ void __iomem *window = priv->window;
+
+ pdev = to_pci_dev(mci->dev);
+
+ /*
+ * This is a mess because there is no atomic way to read all the
+ * registers at once and the registers can transition from CE being
+ * overwritten by UE.
+ */
+ pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts);
+ if (!(info->errsts & I3200_ERRSTS_BITS))
+ return;
+
+ info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
+ if (nr_channels == 2)
+ info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
+
+ pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2);
+
+ /*
+ * If the error is the same for both reads then the first set
+ * of reads is valid. If there is a change then there is a CE
+ * with no info and the second set of reads is valid and
+ * should be UE info.
+ */
+ if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
+ info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
+ if (nr_channels == 2)
+ info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
+ }
+
+ i3200_clear_error_info(mci);
+}
+
+static void i3200_process_error_info(struct mem_ctl_info *mci,
+ struct i3200_error_info *info)
+{
+ int channel;
+ u64 log;
+
+ if (!(info->errsts & I3200_ERRSTS_BITS))
+ return;
+
+ if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
+ edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ info->errsts = info->errsts2;
+ }
+
+ for (channel = 0; channel < nr_channels; channel++) {
+ log = info->eccerrlog[channel];
+ if (log & I3200_ECCERRLOG_UE) {
+ edac_mc_handle_ue(mci, 0, 0,
+ eccerrlog_row(channel, log),
+ "i3200 UE");
+ } else if (log & I3200_ECCERRLOG_CE) {
+ edac_mc_handle_ce(mci, 0, 0,
+ eccerrlog_syndrome(log),
+ eccerrlog_row(channel, log), 0,
+ "i3200 CE");
+ }
+ }
+}
+
+static void i3200_check(struct mem_ctl_info *mci)
+{
+ struct i3200_error_info info;
+
+ debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
+ i3200_get_and_clear_error_info(mci, &info);
+ i3200_process_error_info(mci, &info);
+}
+
+
+void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
+{
+ union {
+ u64 mchbar;
+ struct {
+ u32 mchbar_low;
+ u32 mchbar_high;
+ };
+ } u;
+ void __iomem *window;
+
+ pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low);
+ pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high);
+ u.mchbar &= I3200_MCHBAR_MASK;
+
+ if (u.mchbar != (resource_size_t)u.mchbar) {
+ printk(KERN_ERR
+ "i3200: mmio space beyond accessible range (0x%llx)\n",
+ (unsigned long long)u.mchbar);
+ return NULL;
+ }
+
+ window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE);
+ if (!window)
+ printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n",
+ (unsigned long long)u.mchbar);
+
+ return window;
+}
+
+
+static void i3200_get_drbs(void __iomem *window,
+ u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
+{
+ int i;
+
+ for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
+ drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
+ drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
+ }
+}
+
+static bool i3200_is_stacked(struct pci_dev *pdev,
+ u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
+{
+ u16 tom;
+
+ pci_read_config_word(pdev, I3200_TOM, &tom);
+ tom &= I3200_TOM_MASK;
+
+ return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom;
+}
+
+static unsigned long drb_to_nr_pages(
+ u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked,
+ int channel, int rank)
+{
+ int n;
+
+ n = drbs[channel][rank];
+ if (rank > 0)
+ n -= drbs[channel][rank - 1];
+ if (stacked && (channel == 1) &&
+ drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1])
+ n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1];
+
+ n <<= (I3200_DRB_SHIFT - PAGE_SHIFT);
+ return n;
+}
+
+static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
+{
+ int rc;
+ int i;
+ struct mem_ctl_info *mci = NULL;
+ unsigned long last_page;
+ u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
+ bool stacked;
+ void __iomem *window;
+ struct i3200_priv *priv;
+
+ debugf0("MC: %s()\n", __func__);
+
+ window = i3200_map_mchbar(pdev);
+ if (!window)
+ return -ENODEV;
+
+ i3200_get_drbs(window, drbs);
+ nr_channels = how_many_channels(pdev);
+
+ mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS,
+ nr_channels, 0);
+ if (!mci)
+ return -ENOMEM;
+
+ debugf3("MC: %s(): init mci\n", __func__);
+
+ mci->dev = &pdev->dev;
+ mci->mtype_cap = MEM_FLAG_DDR2;
+
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+
+ mci->mod_name = EDAC_MOD_STR;
+ mci->mod_ver = I3200_REVISION;
+ mci->ctl_name = i3200_devs[dev_idx].ctl_name;
+ mci->dev_name = pci_name(pdev);
+ mci->edac_check = i3200_check;
+ mci->ctl_page_to_phys = NULL;
+ priv = mci->pvt_info;
+ priv->window = window;
+
+ stacked = i3200_is_stacked(pdev, drbs);
+
+ /*
+ * The dram rank boundary (DRB) reg values are boundary addresses
+ * for each DRAM rank with a granularity of 64MB. DRB regs are
+ * cumulative; the last one will contain the total memory
+ * contained in all ranks.
+ */
+ last_page = -1UL;
+ for (i = 0; i < mci->nr_csrows; i++) {
+ unsigned long nr_pages;
+ struct csrow_info *csrow = &mci->csrows[i];
+
+ nr_pages = drb_to_nr_pages(drbs, stacked,
+ i / I3200_RANKS_PER_CHANNEL,
+ i % I3200_RANKS_PER_CHANNEL);
+
+ if (nr_pages == 0) {
+ csrow->mtype = MEM_EMPTY;
+ continue;
+ }
+
+ csrow->first_page = last_page + 1;
+ last_page += nr_pages;
+ csrow->last_page = last_page;
+ csrow->nr_pages = nr_pages;
+
+ csrow->grain = nr_pages << PAGE_SHIFT;
+ csrow->mtype = MEM_DDR2;
+ csrow->dtype = DEV_UNKNOWN;
+ csrow->edac_mode = EDAC_UNKNOWN;
+ }
+
+ i3200_clear_error_info(mci);
+
+ rc = -ENODEV;
+ if (edac_mc_add_mc(mci)) {
+ debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__);
+ goto fail;
+ }
+
+ /* get this far and it's successful */
+ debugf3("MC: %s(): success\n", __func__);
+ return 0;
+
+fail:
+ iounmap(window);
+ if (mci)
+ edac_mc_free(mci);
+
+ return rc;
+}
+
+static int __devinit i3200_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ int rc;
+
+ debugf0("MC: %s()\n", __func__);
+
+ if (pci_enable_device(pdev) < 0)
+ return -EIO;
+
+ rc = i3200_probe1(pdev, ent->driver_data);
+ if (!mci_pdev)
+ mci_pdev = pci_dev_get(pdev);
+
+ return rc;
+}
+
+static void __devexit i3200_remove_one(struct pci_dev *pdev)
+{
+ struct mem_ctl_info *mci;
+ struct i3200_priv *priv;
+
+ debugf0("%s()\n", __func__);
+
+ mci = edac_mc_del_mc(&pdev->dev);
+ if (!mci)
+ return;
+
+ priv = mci->pvt_info;
+ iounmap(priv->window);
+
+ edac_mc_free(mci);
+}
+
+static const struct pci_device_id i3200_pci_tbl[] __devinitdata = {
+ {
+ PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+ I3200},
+ {
+ 0,
+ } /* 0 terminated list. */
+};
+
+MODULE_DEVICE_TABLE(pci, i3200_pci_tbl);
+
+static struct pci_driver i3200_driver = {
+ .name = EDAC_MOD_STR,
+ .probe = i3200_init_one,
+ .remove = __devexit_p(i3200_remove_one),
+ .id_table = i3200_pci_tbl,
+};
+
+static int __init i3200_init(void)
+{
+ int pci_rc;
+
+ debugf3("MC: %s()\n", __func__);
+
+ /* Ensure that the OPSTATE is set correctly for POLL or NMI */
+ opstate_init();
+
+ pci_rc = pci_register_driver(&i3200_driver);
+ if (pci_rc < 0)
+ goto fail0;
+
+ if (!mci_pdev) {
+ i3200_registered = 0;
+ mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_3200_HB, NULL);
+ if (!mci_pdev) {
+ debugf0("i3200 pci_get_device fail\n");
+ pci_rc = -ENODEV;
+ goto fail1;
+ }
+
+ pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl);
+ if (pci_rc < 0) {
+ debugf0("i3200 init fail\n");
+ pci_rc = -ENODEV;
+ goto fail1;
+ }
+ }
+
+ return 0;
+
+fail1:
+ pci_unregister_driver(&i3200_driver);
+
+fail0:
+ if (mci_pdev)
+ pci_dev_put(mci_pdev);
+
+ return pci_rc;
+}
+
+static void __exit i3200_exit(void)
+{
+ debugf3("MC: %s()\n", __func__);
+
+ pci_unregister_driver(&i3200_driver);
+ if (!i3200_registered) {
+ i3200_remove_one(mci_pdev);
+ pci_dev_put(mci_pdev);
+ }
+}
+
+module_init(i3200_init);
+module_exit(i3200_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Akamai Technologies, Inc.");
+MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers");
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
#endif
static u32 orig_l2_err_disable;
+#ifdef CONFIG_MPC85xx
static u32 orig_hid1[2];
+#endif
/************************ MC SYSFS parts ***********************************/
{ .compatible = "fsl,mpc8560-l2-cache-controller", },
{ .compatible = "fsl,mpc8568-l2-cache-controller", },
{ .compatible = "fsl,mpc8572-l2-cache-controller", },
+ { .compatible = "fsl,p2020-l2-cache-controller", },
{},
};
csrow = &mci->csrows[index];
cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 +
(index * MPC85XX_MC_CS_BNDS_OFS));
- start = (cs_bnds & 0xfff0000) << 4;
- end = ((cs_bnds & 0xfff) << 20);
- if (start)
- start |= 0xfffff;
- if (end)
- end |= 0xfffff;
+
+ start = (cs_bnds & 0xffff0000) >> 16;
+ end = (cs_bnds & 0x0000ffff);
if (start == end)
continue; /* not populated */
+ start <<= (24 - PAGE_SHIFT);
+ end <<= (24 - PAGE_SHIFT);
+ end |= (1 << (24 - PAGE_SHIFT)) - 1;
+
csrow->first_page = start >> PAGE_SHIFT;
csrow->last_page = end >> PAGE_SHIFT;
- csrow->nr_pages = csrow->last_page + 1 - csrow->first_page;
+ csrow->nr_pages = end + 1 - start;
csrow->grain = 8;
csrow->mtype = mtype;
csrow->dtype = DEV_UNKNOWN;
{ .compatible = "fsl,mpc8560-memory-controller", },
{ .compatible = "fsl,mpc8568-memory-controller", },
{ .compatible = "fsl,mpc8572-memory-controller", },
+ { .compatible = "fsl,mpc8349-memory-controller", },
+ { .compatible = "fsl,p2020-memory-controller", },
{},
};
},
};
-
+#ifdef CONFIG_MPC85xx
static void __init mpc85xx_mc_clear_rfxe(void *data)
{
orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1);
mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~0x20000));
}
-
+#endif
static int __init mpc85xx_mc_init(void)
{
printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
#endif
+#ifdef CONFIG_MPC85xx
/*
* need to clear HID1[RFXE] to disable machine check int
* so we can catch it
*/
if (edac_op_state == EDAC_OPSTATE_INT)
on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0);
+#endif
return 0;
}
module_init(mpc85xx_mc_init);
+#ifdef CONFIG_MPC85xx
static void __exit mpc85xx_mc_restore_hid1(void *data)
{
mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]);
}
+#endif
static void __exit mpc85xx_mc_exit(void)
{
+#ifdef CONFIG_MPC85xx
on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0);
+#endif
#ifdef CONFIG_PCI
of_unregister_platform_driver(&mpc85xx_pci_err_driver);
#endif
return -ENOENT;
}
- pci_serr = ioremap(r->start, r->end - r->start + 1);
+ pci_serr = ioremap(r->start, resource_size(r));
if (!pci_serr)
return -ENOMEM;
if (!devm_request_mem_region(&pdev->dev,
r->start,
- r->end - r->start + 1,
+ resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
pdata->pci_vbase = devm_ioremap(&pdev->dev,
r->start,
- r->end - r->start + 1);
+ resource_size(r));
if (!pdata->pci_vbase) {
printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__);
res = -ENOMEM;
if (!devm_request_mem_region(&pdev->dev,
r->start,
- r->end - r->start + 1,
+ resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while request mem region\n",
__func__);
pdata->sram_vbase = devm_ioremap(&pdev->dev,
r->start,
- r->end - r->start + 1);
+ resource_size(r));
if (!pdata->sram_vbase) {
printk(KERN_ERR "%s: Unable to setup SRAM err regs\n",
__func__);
if (!devm_request_mem_region(&pdev->dev,
r->start,
- r->end - r->start + 1,
+ resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
pdata->cpu_vbase[0] = devm_ioremap(&pdev->dev,
r->start,
- r->end - r->start + 1);
+ resource_size(r));
if (!pdata->cpu_vbase[0]) {
printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__);
res = -ENOMEM;
if (!devm_request_mem_region(&pdev->dev,
r->start,
- r->end - r->start + 1,
+ resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
pdata->cpu_vbase[1] = devm_ioremap(&pdev->dev,
r->start,
- r->end - r->start + 1);
+ resource_size(r));
if (!pdata->cpu_vbase[1]) {
printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__);
res = -ENOMEM;
if (!devm_request_mem_region(&pdev->dev,
r->start,
- r->end - r->start + 1,
+ resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
pdata->mc_vbase = devm_ioremap(&pdev->dev,
r->start,
- r->end - r->start + 1);
+ resource_size(r));
if (!pdata->mc_vbase) {
printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__);
res = -ENOMEM;
select BACKLIGHT_CLASS_DEVICE if ACPI
select INPUT if ACPI
select ACPI_VIDEO if ACPI
+ select ACPI_BUTTON if ACPI
help
Choose this option if you have a system that has Intel 830M, 845G,
852GM, 855GM 865G or 915G integrated graphics. If M is selected, the
if (IS_ERR(obj->filp))
goto free;
+ /* Basically we want to disable the OOM killer and handle ENOMEM
+ * ourselves by sacrificing pages from cached buffers.
+ * XXX shmem_file_[gs]et_gfp_mask()
+ */
+ mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping,
+ GFP_HIGHUSER |
+ __GFP_COLD |
+ __GFP_FS |
+ __GFP_RECLAIMABLE |
+ __GFP_NORETRY |
+ __GFP_NOWARN |
+ __GFP_NOMEMALLOC);
+
kref_init(&obj->refcount);
kref_init(&obj->handlecount);
obj->size = size;
i915_gem.o \
i915_gem_debug.o \
i915_gem_tiling.o \
+ i915_trace_points.o \
intel_display.o \
intel_crt.o \
intel_lvds.o \
{
struct drm_gem_object *obj = obj_priv->obj;
- seq_printf(m, " %p: %s %08x %08x %d",
+ seq_printf(m, " %p: %s %8zd %08x %08x %d %s",
obj,
get_pin_flag(obj_priv),
+ obj->size,
obj->read_domains, obj->write_domain,
- obj_priv->last_rendering_seqno);
+ obj_priv->last_rendering_seqno,
+ obj_priv->dirty ? "dirty" : "");
if (obj->name)
seq_printf(m, " (name: %d)", obj->name);
#include "intel_drv.h"
#include "i915_drm.h"
#include "i915_drv.h"
+#include "i915_trace.h"
#include <linux/vgaarb.h>
/* Really want an OS-independent resettable timer. Would like to have
u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
int i;
+ trace_i915_ring_wait_begin (dev);
+
for (i = 0; i < 100000; i++) {
ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
acthd = I915_READ(acthd_reg);
ring->space = ring->head - (ring->tail + 8);
if (ring->space < 0)
ring->space += ring->Size;
- if (ring->space >= n)
+ if (ring->space >= n) {
+ trace_i915_ring_wait_end (dev);
return 0;
+ }
if (dev->primary->master) {
struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
}
+ trace_i915_ring_wait_end (dev);
return -EBUSY;
}
* how much was set aside so we can use it for our own purposes.
*/
static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
- uint32_t *preallocated_size)
+ uint32_t *preallocated_size,
+ uint32_t *start)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u16 tmp = 0;
return -1;
}
*preallocated_size = stolen - overhead;
+ *start = overhead;
return 0;
}
+#define PTE_ADDRESS_MASK 0xfffff000
+#define PTE_ADDRESS_MASK_HIGH 0x000000f0 /* i915+ */
+#define PTE_MAPPING_TYPE_UNCACHED (0 << 1)
+#define PTE_MAPPING_TYPE_DCACHE (1 << 1) /* i830 only */
+#define PTE_MAPPING_TYPE_CACHED (3 << 1)
+#define PTE_MAPPING_TYPE_MASK (3 << 1)
+#define PTE_VALID (1 << 0)
+
+/**
+ * i915_gtt_to_phys - take a GTT address and turn it into a physical one
+ * @dev: drm device
+ * @gtt_addr: address to translate
+ *
+ * Some chip functions require allocations from stolen space but need the
+ * physical address of the memory in question. We use this routine
+ * to get a physical address suitable for register programming from a given
+ * GTT address.
+ */
+static unsigned long i915_gtt_to_phys(struct drm_device *dev,
+ unsigned long gtt_addr)
+{
+ unsigned long *gtt;
+ unsigned long entry, phys;
+ int gtt_bar = IS_I9XX(dev) ? 0 : 1;
+ int gtt_offset, gtt_size;
+
+ if (IS_I965G(dev)) {
+ if (IS_G4X(dev) || IS_IGDNG(dev)) {
+ gtt_offset = 2*1024*1024;
+ gtt_size = 2*1024*1024;
+ } else {
+ gtt_offset = 512*1024;
+ gtt_size = 512*1024;
+ }
+ } else {
+ gtt_bar = 3;
+ gtt_offset = 0;
+ gtt_size = pci_resource_len(dev->pdev, gtt_bar);
+ }
+
+ gtt = ioremap_wc(pci_resource_start(dev->pdev, gtt_bar) + gtt_offset,
+ gtt_size);
+ if (!gtt) {
+ DRM_ERROR("ioremap of GTT failed\n");
+ return 0;
+ }
+
+ entry = *(volatile u32 *)(gtt + (gtt_addr / 1024));
+
+ DRM_DEBUG("GTT addr: 0x%08lx, PTE: 0x%08lx\n", gtt_addr, entry);
+
+ /* Mask out these reserved bits on this hardware. */
+ if (!IS_I9XX(dev) || IS_I915G(dev) || IS_I915GM(dev) ||
+ IS_I945G(dev) || IS_I945GM(dev)) {
+ entry &= ~PTE_ADDRESS_MASK_HIGH;
+ }
+
+ /* If it's not a mapping type we know, then bail. */
+ if ((entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_UNCACHED &&
+ (entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_CACHED) {
+ iounmap(gtt);
+ return 0;
+ }
+
+ if (!(entry & PTE_VALID)) {
+ DRM_ERROR("bad GTT entry in stolen space\n");
+ iounmap(gtt);
+ return 0;
+ }
+
+ iounmap(gtt);
+
+ phys =(entry & PTE_ADDRESS_MASK) |
+ ((uint64_t)(entry & PTE_ADDRESS_MASK_HIGH) << (32 - 4));
+
+ DRM_DEBUG("GTT addr: 0x%08lx, phys addr: 0x%08lx\n", gtt_addr, phys);
+
+ return phys;
+}
+
+static void i915_warn_stolen(struct drm_device *dev)
+{
+ DRM_ERROR("not enough stolen space for compressed buffer, disabling\n");
+ DRM_ERROR("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
+}
+
+static void i915_setup_compression(struct drm_device *dev, int size)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_mm_node *compressed_fb, *compressed_llb;
+ unsigned long cfb_base, ll_base;
+
+ /* Leave 1M for line length buffer & misc. */
+ compressed_fb = drm_mm_search_free(&dev_priv->vram, size, 4096, 0);
+ if (!compressed_fb) {
+ i915_warn_stolen(dev);
+ return;
+ }
+
+ compressed_fb = drm_mm_get_block(compressed_fb, size, 4096);
+ if (!compressed_fb) {
+ i915_warn_stolen(dev);
+ return;
+ }
+
+ cfb_base = i915_gtt_to_phys(dev, compressed_fb->start);
+ if (!cfb_base) {
+ DRM_ERROR("failed to get stolen phys addr, disabling FBC\n");
+ drm_mm_put_block(compressed_fb);
+ }
+
+ if (!IS_GM45(dev)) {
+ compressed_llb = drm_mm_search_free(&dev_priv->vram, 4096,
+ 4096, 0);
+ if (!compressed_llb) {
+ i915_warn_stolen(dev);
+ return;
+ }
+
+ compressed_llb = drm_mm_get_block(compressed_llb, 4096, 4096);
+ if (!compressed_llb) {
+ i915_warn_stolen(dev);
+ return;
+ }
+
+ ll_base = i915_gtt_to_phys(dev, compressed_llb->start);
+ if (!ll_base) {
+ DRM_ERROR("failed to get stolen phys addr, disabling FBC\n");
+ drm_mm_put_block(compressed_fb);
+ drm_mm_put_block(compressed_llb);
+ }
+ }
+
+ dev_priv->cfb_size = size;
+
+ if (IS_GM45(dev)) {
+ g4x_disable_fbc(dev);
+ I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
+ } else {
+ i8xx_disable_fbc(dev);
+ I915_WRITE(FBC_CFB_BASE, cfb_base);
+ I915_WRITE(FBC_LL_BASE, ll_base);
+ }
+
+ DRM_DEBUG("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n", cfb_base,
+ ll_base, size >> 20);
+}
+
/* true = enable decode, false = disable decoder */
static unsigned int i915_vga_set_decode(void *cookie, bool state)
{
}
static int i915_load_modeset_init(struct drm_device *dev,
+ unsigned long prealloc_start,
unsigned long prealloc_size,
unsigned long agp_size)
{
/* Basic memrange allocator for stolen space (aka vram) */
drm_mm_init(&dev_priv->vram, 0, prealloc_size);
+ DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024));
+
+ /* We're off and running w/KMS */
+ dev_priv->mm.suspended = 0;
/* Let GEM Manage from end of prealloc space to end of aperture.
*
*/
i915_gem_do_init(dev, prealloc_size, agp_size - 4096);
+ mutex_lock(&dev->struct_mutex);
ret = i915_gem_init_ringbuffer(dev);
+ mutex_unlock(&dev->struct_mutex);
if (ret)
goto out;
+ /* Try to set up FBC with a reasonable compressed buffer size */
+ if (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev) || IS_GM45(dev)) &&
+ i915_powersave) {
+ int cfb_size;
+
+ /* Try to get an 8M buffer... */
+ if (prealloc_size > (9*1024*1024))
+ cfb_size = 8*1024*1024;
+ else /* fall back to 7/8 of the stolen space */
+ cfb_size = prealloc_size * 7 / 8;
+ i915_setup_compression(dev, cfb_size);
+ }
+
/* Allow hardware batchbuffers unless told otherwise.
*/
dev_priv->allow_batchbuffer = 1;
struct drm_i915_private *dev_priv = dev->dev_private;
resource_size_t base, size;
int ret = 0, mmio_bar = IS_I9XX(dev) ? 0 : 1;
- uint32_t agp_size, prealloc_size;
+ uint32_t agp_size, prealloc_size, prealloc_start;
/* i915 has 4 more counters */
dev->counters += 4;
"performance may suffer.\n");
}
- ret = i915_probe_agp(dev, &agp_size, &prealloc_size);
+ ret = i915_probe_agp(dev, &agp_size, &prealloc_size, &prealloc_start);
if (ret)
goto out_iomapfree;
return ret;
}
+ /* Start out suspended */
+ dev_priv->mm.suspended = 1;
+
if (drm_core_check_feature(dev, DRIVER_MODESET)) {
- ret = i915_load_modeset_init(dev, prealloc_size, agp_size);
+ ret = i915_load_modeset_init(dev, prealloc_start,
+ prealloc_size, agp_size);
if (ret < 0) {
DRM_ERROR("failed to init modeset\n");
goto out_workqueue_free;
if (!IS_IGDNG(dev))
intel_opregion_init(dev, 0);
+ setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed,
+ (unsigned long) dev);
return 0;
out_workqueue_free:
struct drm_i915_private *dev_priv = dev->dev_private;
destroy_workqueue(dev_priv->wq);
+ del_timer_sync(&dev_priv->hangcheck_timer);
io_mapping_free(dev_priv->mm.gtt_mapping);
if (dev_priv->mm.gtt_mtrr >= 0) {
DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0),
DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0),
DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0),
+ DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, 0),
};
int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
pci_set_power_state(dev->pdev, PCI_D3hot);
}
+ dev_priv->suspended = 1;
+
return 0;
}
struct drm_i915_private *dev_priv = dev->dev_private;
int ret = 0;
- pci_set_power_state(dev->pdev, PCI_D0);
- pci_restore_state(dev->pdev);
if (pci_enable_device(dev->pdev))
return -1;
pci_set_master(dev->pdev);
drm_helper_resume_force_mode(dev);
}
+ dev_priv->suspended = 0;
+
return ret;
}
+/**
+ * i965_reset - reset chip after a hang
+ * @dev: drm device to reset
+ * @flags: reset domains
+ *
+ * Reset the chip. Useful if a hang is detected. Returns zero on successful
+ * reset or otherwise an error code.
+ *
+ * Procedure is fairly simple:
+ * - reset the chip using the reset reg
+ * - re-init context state
+ * - re-init hardware status page
+ * - re-init ring buffer
+ * - re-init interrupt state
+ * - re-init display
+ */
+int i965_reset(struct drm_device *dev, u8 flags)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ unsigned long timeout;
+ u8 gdrst;
+ /*
+ * We really should only reset the display subsystem if we actually
+ * need to
+ */
+ bool need_display = true;
+
+ mutex_lock(&dev->struct_mutex);
+
+ /*
+ * Clear request list
+ */
+ i915_gem_retire_requests(dev);
+
+ if (need_display)
+ i915_save_display(dev);
+
+ if (IS_I965G(dev) || IS_G4X(dev)) {
+ /*
+ * Set the domains we want to reset, then the reset bit (bit 0).
+ * Clear the reset bit after a while and wait for hardware status
+ * bit (bit 1) to be set
+ */
+ pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+ pci_write_config_byte(dev->pdev, GDRST, gdrst | flags | ((flags == GDRST_FULL) ? 0x1 : 0x0));
+ udelay(50);
+ pci_write_config_byte(dev->pdev, GDRST, gdrst & 0xfe);
+
+ /* ...we don't want to loop forever though, 500ms should be plenty */
+ timeout = jiffies + msecs_to_jiffies(500);
+ do {
+ udelay(100);
+ pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+ } while ((gdrst & 0x1) && time_after(timeout, jiffies));
+
+ if (gdrst & 0x1) {
+ WARN(true, "i915: Failed to reset chip\n");
+ mutex_unlock(&dev->struct_mutex);
+ return -EIO;
+ }
+ } else {
+ DRM_ERROR("Error occurred. Don't know how to reset this chip.\n");
+ return -ENODEV;
+ }
+
+ /* Ok, now get things going again... */
+
+ /*
+ * Everything depends on having the GTT running, so we need to start
+ * there. Fortunately we don't need to do this unless we reset the
+ * chip at a PCI level.
+ *
+ * Next we need to restore the context, but we don't use those
+ * yet either...
+ *
+ * Ring buffer needs to be re-initialized in the KMS case, or if X
+ * was running at the time of the reset (i.e. we weren't VT
+ * switched away).
+ */
+ if (drm_core_check_feature(dev, DRIVER_MODESET) ||
+ !dev_priv->mm.suspended) {
+ drm_i915_ring_buffer_t *ring = &dev_priv->ring;
+ struct drm_gem_object *obj = ring->ring_obj;
+ struct drm_i915_gem_object *obj_priv = obj->driver_private;
+ dev_priv->mm.suspended = 0;
+
+ /* Stop the ring if it's running. */
+ I915_WRITE(PRB0_CTL, 0);
+ I915_WRITE(PRB0_TAIL, 0);
+ I915_WRITE(PRB0_HEAD, 0);
+
+ /* Initialize the ring. */
+ I915_WRITE(PRB0_START, obj_priv->gtt_offset);
+ I915_WRITE(PRB0_CTL,
+ ((obj->size - 4096) & RING_NR_PAGES) |
+ RING_NO_REPORT |
+ RING_VALID);
+ if (!drm_core_check_feature(dev, DRIVER_MODESET))
+ i915_kernel_lost_context(dev);
+ else {
+ ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
+ ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
+ ring->space = ring->head - (ring->tail + 8);
+ if (ring->space < 0)
+ ring->space += ring->Size;
+ }
+
+ mutex_unlock(&dev->struct_mutex);
+ drm_irq_uninstall(dev);
+ drm_irq_install(dev);
+ mutex_lock(&dev->struct_mutex);
+ }
+
+ /*
+ * Display needs restore too...
+ */
+ if (need_display)
+ i915_restore_display(dev);
+
+ mutex_unlock(&dev->struct_mutex);
+ return 0;
+}
+
+
static int __devinit
i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
{
driver.num_ioctls = i915_max_ioctl;
+ i915_gem_shrinker_init();
+
/*
* If CONFIG_DRM_I915_KMS is set, default to KMS unless
* explicitly disabled with the module pararmeter.
static void __exit i915_exit(void)
{
+ i915_gem_shrinker_exit();
drm_exit(&driver);
}
PIPE_B,
};
+enum plane {
+ PLANE_A = 0,
+ PLANE_B,
+};
+
#define I915_NUM_PIPE 2
/* Interface history:
struct timeval time;
};
+struct drm_i915_display_funcs {
+ void (*dpms)(struct drm_crtc *crtc, int mode);
+ bool (*fbc_enabled)(struct drm_crtc *crtc);
+ void (*enable_fbc)(struct drm_crtc *crtc, unsigned long interval);
+ void (*disable_fbc)(struct drm_device *dev);
+ int (*get_display_clock_speed)(struct drm_device *dev);
+ int (*get_fifo_size)(struct drm_device *dev, int plane);
+ void (*update_wm)(struct drm_device *dev, int planea_clock,
+ int planeb_clock, int sr_hdisplay, int pixel_size);
+ /* clock updates for mode set */
+ /* cursor updates */
+ /* render clock increase/decrease */
+ /* display clock increase/decrease */
+ /* pll clock increase/decrease */
+ /* clock gating init */
+};
+
typedef struct drm_i915_private {
struct drm_device *dev;
unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds;
int vblank_pipe;
+ /* For hangcheck timer */
+#define DRM_I915_HANGCHECK_PERIOD 75 /* in jiffies */
+ struct timer_list hangcheck_timer;
+ int hangcheck_count;
+ uint32_t last_acthd;
+
bool cursor_needs_physical;
struct drm_mm vram;
+ unsigned long cfb_size;
+ unsigned long cfb_pitch;
+ int cfb_fence;
+ int cfb_plane;
+
int irq_enabled;
struct intel_opregion opregion;
unsigned int edp_support:1;
int lvds_ssc_freq;
+ struct notifier_block lid_notifier;
+
int crt_ddc_bus; /* -1 = unknown, else GPIO to use for CRT DDC */
struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */
int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */
struct work_struct error_work;
struct workqueue_struct *wq;
+ /* Display functions */
+ struct drm_i915_display_funcs display;
+
/* Register state */
+ bool suspended;
u8 saveLBB;
u32 saveDSPACNTR;
u32 saveDSPBCNTR;
struct io_mapping *gtt_mapping;
int gtt_mtrr;
+ /**
+ * Membership on list of all loaded devices, used to evict
+ * inactive buffers under memory pressure.
+ *
+ * Modifications should only be done whilst holding the
+ * shrink_list_lock spinlock.
+ */
+ struct list_head shrink_list;
+
/**
* List of objects currently involved in rendering from the
* ringbuffer.
* It prevents command submission from occuring and makes
* every pending request fail
*/
- int wedged;
+ atomic_t wedged;
/** Bit 6 swizzling required for X tiling */
uint32_t bit_6_swizzle_x;
* This is the same as gtt_space->start
*/
uint32_t gtt_offset;
- /**
- * Required alignment for the object
- */
- uint32_t gtt_alignment;
+
/**
* Fake offset for use by mmap(2)
*/
* in an execbuffer object list.
*/
int in_execbuffer;
+
+ /**
+ * Advice: are the backing pages purgeable?
+ */
+ int madv;
};
/**
extern unsigned int i915_fbpercrtc;
extern unsigned int i915_powersave;
+extern void i915_save_display(struct drm_device *dev);
+extern void i915_restore_display(struct drm_device *dev);
extern int i915_master_create(struct drm_device *dev, struct drm_master *master);
extern void i915_master_destroy(struct drm_device *dev, struct drm_master *master);
extern int i915_emit_box(struct drm_device *dev,
struct drm_clip_rect *boxes,
int i, int DR1, int DR4);
+extern int i965_reset(struct drm_device *dev, u8 flags);
/* i915_irq.c */
+void i915_hangcheck_elapsed(unsigned long data);
extern int i915_irq_emit(struct drm_device *dev, void *data,
struct drm_file *file_priv);
extern int i915_irq_wait(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
void i915_gem_release_mmap(struct drm_gem_object *obj);
void i915_gem_lastclose(struct drm_device *dev);
uint32_t i915_get_gem_seqno(struct drm_device *dev);
+bool i915_seqno_passed(uint32_t seq1, uint32_t seq2);
int i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
int i915_gem_object_put_fence_reg(struct drm_gem_object *obj);
void i915_gem_retire_requests(struct drm_device *dev);
void i915_gem_object_put_pages(struct drm_gem_object *obj);
void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv);
+void i915_gem_shrinker_init(void);
+void i915_gem_shrinker_exit(void);
+
/* i915_gem_tiling.c */
void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
extern void intel_modeset_init(struct drm_device *dev);
extern void intel_modeset_cleanup(struct drm_device *dev);
extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state);
+extern void i8xx_disable_fbc(struct drm_device *dev);
+extern void g4x_disable_fbc(struct drm_device *dev);
/**
* Lock test for when it's just for synchronization of ring access.
(dev)->pci_device == 0x2E12 || \
(dev)->pci_device == 0x2E22 || \
(dev)->pci_device == 0x2E32 || \
+ (dev)->pci_device == 0x2E42 || \
(dev)->pci_device == 0x0042 || \
(dev)->pci_device == 0x0046)
(dev)->pci_device == 0x2E12 || \
(dev)->pci_device == 0x2E22 || \
(dev)->pci_device == 0x2E32 || \
+ (dev)->pci_device == 0x2E42 || \
IS_GM45(dev))
#define IS_IGDG(dev) ((dev)->pci_device == 0xa001)
#define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev) || IS_IGDNG(dev))
#define SUPPORTS_INTEGRATED_DP(dev) (IS_G4X(dev) || IS_IGDNG(dev))
#define SUPPORTS_EDP(dev) (IS_IGDNG_M(dev))
-#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_I965G(dev))
+#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev) || IS_I965G(dev))
/* dsparb controlled by hw only */
#define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IGDNG(dev))
#define HAS_FW_BLC(dev) (IS_I9XX(dev) || IS_G4X(dev) || IS_IGDNG(dev))
#define HAS_PIPE_CXSR(dev) (IS_G4X(dev) || IS_IGDNG(dev))
+#define I915_HAS_FBC(dev) (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev)))
#define PRIMARY_RINGBUFFER_SIZE (128*1024)
#include "drm.h"
#include "i915_drm.h"
#include "i915_drv.h"
+#include "i915_trace.h"
#include "intel_drv.h"
#include <linux/swap.h>
#include <linux/pci.h>
static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
unsigned alignment);
static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
-static int i915_gem_evict_something(struct drm_device *dev);
+static int i915_gem_evict_something(struct drm_device *dev, int min_size);
+static int i915_gem_evict_from_inactive_list(struct drm_device *dev);
static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
struct drm_i915_gem_pwrite *args,
struct drm_file *file_priv);
+static LIST_HEAD(shrink_list);
+static DEFINE_SPINLOCK(shrink_list_lock);
+
int i915_gem_do_init(struct drm_device *dev, unsigned long start,
unsigned long end)
{
return ret;
}
+static inline gfp_t
+i915_gem_object_get_page_gfp_mask (struct drm_gem_object *obj)
+{
+ return mapping_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping);
+}
+
+static inline void
+i915_gem_object_set_page_gfp_mask (struct drm_gem_object *obj, gfp_t gfp)
+{
+ mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, gfp);
+}
+
+static int
+i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
+{
+ int ret;
+
+ ret = i915_gem_object_get_pages(obj);
+
+ /* If we've insufficient memory to map in the pages, attempt
+ * to make some space by throwing out some old buffers.
+ */
+ if (ret == -ENOMEM) {
+ struct drm_device *dev = obj->dev;
+ gfp_t gfp;
+
+ ret = i915_gem_evict_something(dev, obj->size);
+ if (ret)
+ return ret;
+
+ gfp = i915_gem_object_get_page_gfp_mask(obj);
+ i915_gem_object_set_page_gfp_mask(obj, gfp & ~__GFP_NORETRY);
+ ret = i915_gem_object_get_pages(obj);
+ i915_gem_object_set_page_gfp_mask (obj, gfp);
+ }
+
+ return ret;
+}
+
/**
* This is the fallback shmem pread path, which allocates temporary storage
* in kernel space to copy_to_user into outside of the struct_mutex, so we
mutex_lock(&dev->struct_mutex);
- ret = i915_gem_object_get_pages(obj);
- if (ret != 0)
+ ret = i915_gem_object_get_pages_or_evict(obj);
+ if (ret)
goto fail_unlock;
ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
mutex_lock(&dev->struct_mutex);
- ret = i915_gem_object_get_pages(obj);
- if (ret != 0)
+ ret = i915_gem_object_get_pages_or_evict(obj);
+ if (ret)
goto fail_unlock;
ret = i915_gem_object_set_to_cpu_domain(obj, 1);
/* Now bind it into the GTT if needed */
mutex_lock(&dev->struct_mutex);
if (!obj_priv->gtt_space) {
- ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
- if (ret) {
- mutex_unlock(&dev->struct_mutex);
- return VM_FAULT_SIGBUS;
- }
-
- ret = i915_gem_object_set_to_gtt_domain(obj, write);
- if (ret) {
- mutex_unlock(&dev->struct_mutex);
- return VM_FAULT_SIGBUS;
- }
+ ret = i915_gem_object_bind_to_gtt(obj, 0);
+ if (ret)
+ goto unlock;
list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, write);
+ if (ret)
+ goto unlock;
}
/* Need a new fence register? */
if (obj_priv->tiling_mode != I915_TILING_NONE) {
ret = i915_gem_object_get_fence_reg(obj);
- if (ret) {
- mutex_unlock(&dev->struct_mutex);
- return VM_FAULT_SIGBUS;
- }
+ if (ret)
+ goto unlock;
}
pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
/* Finally, remap it using the new GTT offset */
ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
-
+unlock:
mutex_unlock(&dev->struct_mutex);
switch (ret) {
+ case 0:
+ case -ERESTARTSYS:
+ return VM_FAULT_NOPAGE;
case -ENOMEM:
case -EAGAIN:
return VM_FAULT_OOM;
- case -EFAULT:
- case -EINVAL:
- return VM_FAULT_SIGBUS;
default:
- return VM_FAULT_NOPAGE;
+ return VM_FAULT_SIGBUS;
}
}
obj_priv = obj->driver_private;
+ if (obj_priv->madv != I915_MADV_WILLNEED) {
+ DRM_ERROR("Attempting to mmap a purgeable buffer\n");
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+ return -EINVAL;
+ }
+
+
if (!obj_priv->mmap_offset) {
ret = i915_gem_create_mmap_offset(obj);
if (ret) {
args->offset = obj_priv->mmap_offset;
- obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj);
-
- /* Make sure the alignment is correct for fence regs etc */
- if (obj_priv->agp_mem &&
- (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) {
- drm_gem_object_unreference(obj);
- mutex_unlock(&dev->struct_mutex);
- return -EINVAL;
- }
-
/*
* Pull it into the GTT so that we have a page list (makes the
* initial fault faster and any subsequent flushing possible).
*/
if (!obj_priv->agp_mem) {
- ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
+ ret = i915_gem_object_bind_to_gtt(obj, 0);
if (ret) {
drm_gem_object_unreference(obj);
mutex_unlock(&dev->struct_mutex);
int i;
BUG_ON(obj_priv->pages_refcount == 0);
+ BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
if (--obj_priv->pages_refcount != 0)
return;
if (obj_priv->tiling_mode != I915_TILING_NONE)
i915_gem_object_save_bit_17_swizzle(obj);
- for (i = 0; i < page_count; i++)
- if (obj_priv->pages[i] != NULL) {
- if (obj_priv->dirty)
- set_page_dirty(obj_priv->pages[i]);
+ if (obj_priv->madv == I915_MADV_DONTNEED)
+ obj_priv->dirty = 0;
+
+ for (i = 0; i < page_count; i++) {
+ if (obj_priv->pages[i] == NULL)
+ break;
+
+ if (obj_priv->dirty)
+ set_page_dirty(obj_priv->pages[i]);
+
+ if (obj_priv->madv == I915_MADV_WILLNEED)
mark_page_accessed(obj_priv->pages[i]);
- page_cache_release(obj_priv->pages[i]);
- }
+
+ page_cache_release(obj_priv->pages[i]);
+ }
obj_priv->dirty = 0;
drm_free_large(obj_priv->pages);
obj_priv->last_rendering_seqno = 0;
}
+/* Immediately discard the backing storage */
+static void
+i915_gem_object_truncate(struct drm_gem_object *obj)
+{
+ struct drm_i915_gem_object *obj_priv = obj->driver_private;
+ struct inode *inode;
+
+ inode = obj->filp->f_path.dentry->d_inode;
+ if (inode->i_op->truncate)
+ inode->i_op->truncate (inode);
+
+ obj_priv->madv = __I915_MADV_PURGED;
+}
+
+static inline int
+i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
+{
+ return obj_priv->madv == I915_MADV_DONTNEED;
+}
+
static void
i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
{
if ((obj->write_domain & flush_domains) ==
obj->write_domain) {
+ uint32_t old_write_domain = obj->write_domain;
+
obj->write_domain = 0;
i915_gem_object_move_to_active(obj, seqno);
+
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
}
}
- if (was_empty && !dev_priv->mm.suspended)
- queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
+ if (!dev_priv->mm.suspended) {
+ mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
+ if (was_empty)
+ queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
+ }
return seqno;
}
{
drm_i915_private_t *dev_priv = dev->dev_private;
+ trace_i915_gem_request_retire(dev, request->seqno);
+
/* Move any buffers on the active list that are no longer referenced
* by the ringbuffer to the flushing/inactive lists as appropriate.
*/
/**
* Returns true if seq1 is later than seq2.
*/
-static int
+bool
i915_seqno_passed(uint32_t seq1, uint32_t seq2)
{
return (int32_t)(seq1 - seq2) >= 0;
retiring_seqno = request->seqno;
if (i915_seqno_passed(seqno, retiring_seqno) ||
- dev_priv->mm.wedged) {
+ atomic_read(&dev_priv->mm.wedged)) {
i915_gem_retire_request(dev, request);
list_del(&request->list);
BUG_ON(seqno == 0);
+ if (atomic_read(&dev_priv->mm.wedged))
+ return -EIO;
+
if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
if (IS_IGDNG(dev))
ier = I915_READ(DEIER) | I915_READ(GTIER);
i915_driver_irq_postinstall(dev);
}
+ trace_i915_gem_request_wait_begin(dev, seqno);
+
dev_priv->mm.waiting_gem_seqno = seqno;
i915_user_irq_get(dev);
ret = wait_event_interruptible(dev_priv->irq_queue,
i915_seqno_passed(i915_get_gem_seqno(dev),
seqno) ||
- dev_priv->mm.wedged);
+ atomic_read(&dev_priv->mm.wedged));
i915_user_irq_put(dev);
dev_priv->mm.waiting_gem_seqno = 0;
+
+ trace_i915_gem_request_wait_end(dev, seqno);
}
- if (dev_priv->mm.wedged)
+ if (atomic_read(&dev_priv->mm.wedged))
ret = -EIO;
if (ret && ret != -ERESTARTSYS)
DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
invalidate_domains, flush_domains);
#endif
+ trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno,
+ invalidate_domains, flush_domains);
if (flush_domains & I915_GEM_DOMAIN_CPU)
drm_agp_chipset_flush(dev);
return -EINVAL;
}
+ /* blow away mappings if mapped through GTT */
+ i915_gem_release_mmap(obj);
+
+ if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
+ i915_gem_clear_fence_reg(obj);
+
/* Move the object to the CPU domain to ensure that
* any possible CPU writes while it's not in the GTT
* are flushed when we go to remap it. This will
return ret;
}
+ BUG_ON(obj_priv->active);
+
if (obj_priv->agp_mem != NULL) {
drm_unbind_agp(obj_priv->agp_mem);
drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
obj_priv->agp_mem = NULL;
}
- BUG_ON(obj_priv->active);
-
- /* blow away mappings if mapped through GTT */
- i915_gem_release_mmap(obj);
-
- if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
- i915_gem_clear_fence_reg(obj);
-
i915_gem_object_put_pages(obj);
+ BUG_ON(obj_priv->pages_refcount);
if (obj_priv->gtt_space) {
atomic_dec(&dev->gtt_count);
if (!list_empty(&obj_priv->list))
list_del_init(&obj_priv->list);
+ if (i915_gem_object_is_purgeable(obj_priv))
+ i915_gem_object_truncate(obj);
+
+ trace_i915_gem_object_unbind(obj);
+
return 0;
}
+static struct drm_gem_object *
+i915_gem_find_inactive_object(struct drm_device *dev, int min_size)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ struct drm_i915_gem_object *obj_priv;
+ struct drm_gem_object *best = NULL;
+ struct drm_gem_object *first = NULL;
+
+ /* Try to find the smallest clean object */
+ list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) {
+ struct drm_gem_object *obj = obj_priv->obj;
+ if (obj->size >= min_size) {
+ if ((!obj_priv->dirty ||
+ i915_gem_object_is_purgeable(obj_priv)) &&
+ (!best || obj->size < best->size)) {
+ best = obj;
+ if (best->size == min_size)
+ return best;
+ }
+ if (!first)
+ first = obj;
+ }
+ }
+
+ return best ? best : first;
+}
+
static int
-i915_gem_evict_something(struct drm_device *dev)
+i915_gem_evict_everything(struct drm_device *dev)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ uint32_t seqno;
+ int ret;
+ bool lists_empty;
+
+ spin_lock(&dev_priv->mm.active_list_lock);
+ lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
+ list_empty(&dev_priv->mm.flushing_list) &&
+ list_empty(&dev_priv->mm.active_list));
+ spin_unlock(&dev_priv->mm.active_list_lock);
+
+ if (lists_empty)
+ return -ENOSPC;
+
+ /* Flush everything (on to the inactive lists) and evict */
+ i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+ seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
+ if (seqno == 0)
+ return -ENOMEM;
+
+ ret = i915_wait_request(dev, seqno);
+ if (ret)
+ return ret;
+
+ ret = i915_gem_evict_from_inactive_list(dev);
+ if (ret)
+ return ret;
+
+ spin_lock(&dev_priv->mm.active_list_lock);
+ lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
+ list_empty(&dev_priv->mm.flushing_list) &&
+ list_empty(&dev_priv->mm.active_list));
+ spin_unlock(&dev_priv->mm.active_list_lock);
+ BUG_ON(!lists_empty);
+
+ return 0;
+}
+
+static int
+i915_gem_evict_something(struct drm_device *dev, int min_size)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_gem_object *obj;
- struct drm_i915_gem_object *obj_priv;
- int ret = 0;
+ int ret;
for (;;) {
+ i915_gem_retire_requests(dev);
+
/* If there's an inactive buffer available now, grab it
* and be done.
*/
- if (!list_empty(&dev_priv->mm.inactive_list)) {
- obj_priv = list_first_entry(&dev_priv->mm.inactive_list,
- struct drm_i915_gem_object,
- list);
- obj = obj_priv->obj;
- BUG_ON(obj_priv->pin_count != 0);
+ obj = i915_gem_find_inactive_object(dev, min_size);
+ if (obj) {
+ struct drm_i915_gem_object *obj_priv;
+
#if WATCH_LRU
DRM_INFO("%s: evicting %p\n", __func__, obj);
#endif
+ obj_priv = obj->driver_private;
+ BUG_ON(obj_priv->pin_count != 0);
BUG_ON(obj_priv->active);
/* Wait on the rendering and unbind the buffer. */
- ret = i915_gem_object_unbind(obj);
- break;
+ return i915_gem_object_unbind(obj);
}
/* If we didn't get anything, but the ring is still processing
- * things, wait for one of those things to finish and hopefully
- * leave us a buffer to evict.
+ * things, wait for the next to finish and hopefully leave us
+ * a buffer to evict.
*/
if (!list_empty(&dev_priv->mm.request_list)) {
struct drm_i915_gem_request *request;
ret = i915_wait_request(dev, request->seqno);
if (ret)
- break;
+ return ret;
- /* if waiting caused an object to become inactive,
- * then loop around and wait for it. Otherwise, we
- * assume that waiting freed and unbound something,
- * so there should now be some space in the GTT
- */
- if (!list_empty(&dev_priv->mm.inactive_list))
- continue;
- break;
+ continue;
}
/* If we didn't have anything on the request list but there
* will get moved to inactive.
*/
if (!list_empty(&dev_priv->mm.flushing_list)) {
- obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
- struct drm_i915_gem_object,
- list);
- obj = obj_priv->obj;
+ struct drm_i915_gem_object *obj_priv;
- i915_gem_flush(dev,
- obj->write_domain,
- obj->write_domain);
- i915_add_request(dev, NULL, obj->write_domain);
+ /* Find an object that we can immediately reuse */
+ list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) {
+ obj = obj_priv->obj;
+ if (obj->size >= min_size)
+ break;
- obj = NULL;
- continue;
- }
+ obj = NULL;
+ }
- DRM_ERROR("inactive empty %d request empty %d "
- "flushing empty %d\n",
- list_empty(&dev_priv->mm.inactive_list),
- list_empty(&dev_priv->mm.request_list),
- list_empty(&dev_priv->mm.flushing_list));
- /* If we didn't do any of the above, there's nothing to be done
- * and we just can't fit it in.
- */
- return -ENOSPC;
- }
- return ret;
-}
+ if (obj != NULL) {
+ uint32_t seqno;
-static int
-i915_gem_evict_everything(struct drm_device *dev)
-{
- int ret;
+ i915_gem_flush(dev,
+ obj->write_domain,
+ obj->write_domain);
+ seqno = i915_add_request(dev, NULL, obj->write_domain);
+ if (seqno == 0)
+ return -ENOMEM;
- for (;;) {
- ret = i915_gem_evict_something(dev);
- if (ret != 0)
- break;
+ ret = i915_wait_request(dev, seqno);
+ if (ret)
+ return ret;
+
+ continue;
+ }
+ }
+
+ /* If we didn't do any of the above, there's no single buffer
+ * large enough to swap out for the new one, so just evict
+ * everything and start again. (This should be rare.)
+ */
+ if (!list_empty (&dev_priv->mm.inactive_list))
+ return i915_gem_evict_from_inactive_list(dev);
+ else
+ return i915_gem_evict_everything(dev);
}
- if (ret == -ENOSPC)
- return 0;
- return ret;
}
int
BUG_ON(obj_priv->pages != NULL);
obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
if (obj_priv->pages == NULL) {
- DRM_ERROR("Faled to allocate page list\n");
obj_priv->pages_refcount--;
return -ENOMEM;
}
page = read_mapping_page(mapping, i, NULL);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
- DRM_ERROR("read_mapping_page failed: %d\n", ret);
i915_gem_object_put_pages(obj);
return ret;
}
else
i830_write_fence_reg(reg);
+ trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode);
+
return 0;
}
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj_priv = obj->driver_private;
struct drm_mm_node *free_space;
- int page_count, ret;
+ bool retry_alloc = false;
+ int ret;
if (dev_priv->mm.suspended)
return -EBUSY;
+
+ if (obj_priv->madv != I915_MADV_WILLNEED) {
+ DRM_ERROR("Attempting to bind a purgeable object\n");
+ return -EINVAL;
+ }
+
if (alignment == 0)
alignment = i915_gem_get_gtt_alignment(obj);
if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
}
}
if (obj_priv->gtt_space == NULL) {
- bool lists_empty;
-
/* If the gtt is empty and we're still having trouble
* fitting our object in, we're out of memory.
*/
#if WATCH_LRU
DRM_INFO("%s: GTT full, evicting something\n", __func__);
#endif
- spin_lock(&dev_priv->mm.active_list_lock);
- lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
- list_empty(&dev_priv->mm.flushing_list) &&
- list_empty(&dev_priv->mm.active_list));
- spin_unlock(&dev_priv->mm.active_list_lock);
- if (lists_empty) {
- DRM_ERROR("GTT full, but LRU list empty\n");
- return -ENOSPC;
- }
-
- ret = i915_gem_evict_something(dev);
- if (ret != 0) {
- if (ret != -ERESTARTSYS)
- DRM_ERROR("Failed to evict a buffer %d\n", ret);
+ ret = i915_gem_evict_something(dev, obj->size);
+ if (ret)
return ret;
- }
+
goto search_free;
}
DRM_INFO("Binding object of size %zd at 0x%08x\n",
obj->size, obj_priv->gtt_offset);
#endif
+ if (retry_alloc) {
+ i915_gem_object_set_page_gfp_mask (obj,
+ i915_gem_object_get_page_gfp_mask (obj) & ~__GFP_NORETRY);
+ }
ret = i915_gem_object_get_pages(obj);
+ if (retry_alloc) {
+ i915_gem_object_set_page_gfp_mask (obj,
+ i915_gem_object_get_page_gfp_mask (obj) | __GFP_NORETRY);
+ }
if (ret) {
drm_mm_put_block(obj_priv->gtt_space);
obj_priv->gtt_space = NULL;
+
+ if (ret == -ENOMEM) {
+ /* first try to clear up some space from the GTT */
+ ret = i915_gem_evict_something(dev, obj->size);
+ if (ret) {
+ /* now try to shrink everyone else */
+ if (! retry_alloc) {
+ retry_alloc = true;
+ goto search_free;
+ }
+
+ return ret;
+ }
+
+ goto search_free;
+ }
+
return ret;
}
- page_count = obj->size / PAGE_SIZE;
/* Create an AGP memory structure pointing at our pages, and bind it
* into the GTT.
*/
obj_priv->agp_mem = drm_agp_bind_pages(dev,
obj_priv->pages,
- page_count,
+ obj->size >> PAGE_SHIFT,
obj_priv->gtt_offset,
obj_priv->agp_type);
if (obj_priv->agp_mem == NULL) {
i915_gem_object_put_pages(obj);
drm_mm_put_block(obj_priv->gtt_space);
obj_priv->gtt_space = NULL;
- return -ENOMEM;
+
+ ret = i915_gem_evict_something(dev, obj->size);
+ if (ret)
+ return ret;
+
+ goto search_free;
}
atomic_inc(&dev->gtt_count);
atomic_add(obj->size, &dev->gtt_memory);
BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+ trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
+
return 0;
}
if (obj_priv->pages == NULL)
return;
- /* XXX: The 865 in particular appears to be weird in how it handles
- * cache flushing. We haven't figured it out, but the
- * clflush+agp_chipset_flush doesn't appear to successfully get the
- * data visible to the PGU, while wbinvd + agp_chipset_flush does.
- */
- if (IS_I865G(obj->dev)) {
- wbinvd();
- return;
- }
+ trace_i915_gem_object_clflush(obj);
drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
}
{
struct drm_device *dev = obj->dev;
uint32_t seqno;
+ uint32_t old_write_domain;
if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
return;
/* Queue the GPU write cache flushing we need. */
+ old_write_domain = obj->write_domain;
i915_gem_flush(dev, 0, obj->write_domain);
seqno = i915_add_request(dev, NULL, obj->write_domain);
obj->write_domain = 0;
i915_gem_object_move_to_active(obj, seqno);
+
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
/** Flushes the GTT write domain for the object if it's dirty. */
static void
i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
{
+ uint32_t old_write_domain;
+
if (obj->write_domain != I915_GEM_DOMAIN_GTT)
return;
* to it immediately go to main memory as far as we know, so there's
* no chipset flush. It also doesn't land in render cache.
*/
+ old_write_domain = obj->write_domain;
obj->write_domain = 0;
+
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
/** Flushes the CPU write domain for the object if it's dirty. */
i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
+ uint32_t old_write_domain;
if (obj->write_domain != I915_GEM_DOMAIN_CPU)
return;
i915_gem_clflush_object(obj);
drm_agp_chipset_flush(dev);
+ old_write_domain = obj->write_domain;
obj->write_domain = 0;
+
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
/**
i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
{
struct drm_i915_gem_object *obj_priv = obj->driver_private;
+ uint32_t old_write_domain, old_read_domains;
int ret;
/* Not valid to be called on unbound objects. */
if (ret != 0)
return ret;
+ old_write_domain = obj->write_domain;
+ old_read_domains = obj->read_domains;
+
/* If we're writing through the GTT domain, then CPU and GPU caches
* will need to be invalidated at next use.
*/
obj_priv->dirty = 1;
}
+ trace_i915_gem_object_change_domain(obj,
+ old_read_domains,
+ old_write_domain);
+
return 0;
}
static int
i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
{
+ uint32_t old_write_domain, old_read_domains;
int ret;
i915_gem_object_flush_gpu_write_domain(obj);
*/
i915_gem_object_set_to_full_cpu_read_domain(obj);
+ old_write_domain = obj->write_domain;
+ old_read_domains = obj->read_domains;
+
/* Flush the CPU cache if it's still invalid. */
if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
i915_gem_clflush_object(obj);
obj->write_domain = I915_GEM_DOMAIN_CPU;
}
+ trace_i915_gem_object_change_domain(obj,
+ old_read_domains,
+ old_write_domain);
+
return 0;
}
struct drm_i915_gem_object *obj_priv = obj->driver_private;
uint32_t invalidate_domains = 0;
uint32_t flush_domains = 0;
+ uint32_t old_read_domains;
BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
i915_gem_clflush_object(obj);
}
+ old_read_domains = obj->read_domains;
+
/* The actual obj->write_domain will be updated with
* pending_write_domain after we emit the accumulated flush for all
* of our domain changes in execbuffers (which clears objects'
obj->read_domains, obj->write_domain,
dev->invalidate_domains, dev->flush_domains);
#endif
+
+ trace_i915_gem_object_change_domain(obj,
+ old_read_domains,
+ obj->write_domain);
}
/**
uint64_t offset, uint64_t size)
{
struct drm_i915_gem_object *obj_priv = obj->driver_private;
+ uint32_t old_read_domains;
int i, ret;
if (offset == 0 && size == obj->size)
*/
BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
+ old_read_domains = obj->read_domains;
obj->read_domains |= I915_GEM_DOMAIN_CPU;
+ trace_i915_gem_object_change_domain(obj,
+ old_read_domains,
+ obj->write_domain);
+
return 0;
}
}
target_obj_priv = target_obj->driver_private;
+#if WATCH_RELOC
+ DRM_INFO("%s: obj %p offset %08x target %d "
+ "read %08x write %08x gtt %08x "
+ "presumed %08x delta %08x\n",
+ __func__,
+ obj,
+ (int) reloc->offset,
+ (int) reloc->target_handle,
+ (int) reloc->read_domains,
+ (int) reloc->write_domain,
+ (int) target_obj_priv->gtt_offset,
+ (int) reloc->presumed_offset,
+ reloc->delta);
+#endif
+
/* The target buffer should have appeared before us in the
* exec_object list, so it should have a GTT space bound by now.
*/
return -EINVAL;
}
- if (reloc->offset > obj->size - 4) {
- DRM_ERROR("Relocation beyond object bounds: "
- "obj %p target %d offset %d size %d.\n",
- obj, reloc->target_handle,
- (int) reloc->offset, (int) obj->size);
- drm_gem_object_unreference(target_obj);
- i915_gem_object_unpin(obj);
- return -EINVAL;
- }
- if (reloc->offset & 3) {
- DRM_ERROR("Relocation not 4-byte aligned: "
- "obj %p target %d offset %d.\n",
- obj, reloc->target_handle,
- (int) reloc->offset);
- drm_gem_object_unreference(target_obj);
- i915_gem_object_unpin(obj);
- return -EINVAL;
- }
-
+ /* Validate that the target is in a valid r/w GPU domain */
if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
reloc->read_domains & I915_GEM_DOMAIN_CPU) {
DRM_ERROR("reloc with read/write CPU domains: "
i915_gem_object_unpin(obj);
return -EINVAL;
}
-
if (reloc->write_domain && target_obj->pending_write_domain &&
reloc->write_domain != target_obj->pending_write_domain) {
DRM_ERROR("Write domain conflict: "
return -EINVAL;
}
-#if WATCH_RELOC
- DRM_INFO("%s: obj %p offset %08x target %d "
- "read %08x write %08x gtt %08x "
- "presumed %08x delta %08x\n",
- __func__,
- obj,
- (int) reloc->offset,
- (int) reloc->target_handle,
- (int) reloc->read_domains,
- (int) reloc->write_domain,
- (int) target_obj_priv->gtt_offset,
- (int) reloc->presumed_offset,
- reloc->delta);
-#endif
-
target_obj->pending_read_domains |= reloc->read_domains;
target_obj->pending_write_domain |= reloc->write_domain;
continue;
}
+ /* Check that the relocation address is valid... */
+ if (reloc->offset > obj->size - 4) {
+ DRM_ERROR("Relocation beyond object bounds: "
+ "obj %p target %d offset %d size %d.\n",
+ obj, reloc->target_handle,
+ (int) reloc->offset, (int) obj->size);
+ drm_gem_object_unreference(target_obj);
+ i915_gem_object_unpin(obj);
+ return -EINVAL;
+ }
+ if (reloc->offset & 3) {
+ DRM_ERROR("Relocation not 4-byte aligned: "
+ "obj %p target %d offset %d.\n",
+ obj, reloc->target_handle,
+ (int) reloc->offset);
+ drm_gem_object_unreference(target_obj);
+ i915_gem_object_unpin(obj);
+ return -EINVAL;
+ }
+
+ /* and points to somewhere within the target object. */
+ if (reloc->delta >= target_obj->size) {
+ DRM_ERROR("Relocation beyond target object bounds: "
+ "obj %p target %d delta %d size %d.\n",
+ obj, reloc->target_handle,
+ (int) reloc->delta, (int) target_obj->size);
+ drm_gem_object_unreference(target_obj);
+ i915_gem_object_unpin(obj);
+ return -EINVAL;
+ }
+
ret = i915_gem_object_set_to_gtt_domain(obj, 1);
if (ret != 0) {
drm_gem_object_unreference(target_obj);
exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
exec_len = (uint32_t) exec->batch_len;
+ trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno);
+
count = nbox ? nbox : 1;
for (i = 0; i < count; i++) {
i915_verify_inactive(dev, __FILE__, __LINE__);
- if (dev_priv->mm.wedged) {
+ if (atomic_read(&dev_priv->mm.wedged)) {
DRM_ERROR("Execbuf while wedged\n");
mutex_unlock(&dev->struct_mutex);
ret = -EIO;
/* error other than GTT full, or we've already tried again */
if (ret != -ENOSPC || pin_tries >= 1) {
- if (ret != -ERESTARTSYS)
- DRM_ERROR("Failed to pin buffers %d\n", ret);
+ if (ret != -ERESTARTSYS) {
+ unsigned long long total_size = 0;
+ for (i = 0; i < args->buffer_count; i++)
+ total_size += object_list[i]->size;
+ DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes: %d\n",
+ pinned+1, args->buffer_count,
+ total_size, ret);
+ DRM_ERROR("%d objects [%d pinned], "
+ "%d object bytes [%d pinned], "
+ "%d/%d gtt bytes\n",
+ atomic_read(&dev->object_count),
+ atomic_read(&dev->pin_count),
+ atomic_read(&dev->object_memory),
+ atomic_read(&dev->pin_memory),
+ atomic_read(&dev->gtt_memory),
+ dev->gtt_total);
+ }
goto err;
}
/* evict everyone we can from the aperture */
ret = i915_gem_evict_everything(dev);
- if (ret)
+ if (ret && ret != -ENOSPC)
goto err;
}
for (i = 0; i < args->buffer_count; i++) {
struct drm_gem_object *obj = object_list[i];
+ uint32_t old_write_domain = obj->write_domain;
obj->write_domain = obj->pending_write_domain;
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
i915_verify_inactive(dev, __FILE__, __LINE__);
i915_verify_inactive(dev, __FILE__, __LINE__);
if (obj_priv->gtt_space == NULL) {
ret = i915_gem_object_bind_to_gtt(obj, alignment);
- if (ret != 0) {
- if (ret != -EBUSY && ret != -ERESTARTSYS)
- DRM_ERROR("Failure to bind: %d\n", ret);
+ if (ret)
return ret;
- }
}
/*
* Pre-965 chips need a fence register set up in order to
}
obj_priv = obj->driver_private;
+ if (obj_priv->madv != I915_MADV_WILLNEED) {
+ DRM_ERROR("Attempting to pin a purgeable buffer\n");
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+ return -EINVAL;
+ }
+
if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
args->handle);
return i915_gem_ring_throttle(dev, file_priv);
}
+int
+i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_i915_gem_madvise *args = data;
+ struct drm_gem_object *obj;
+ struct drm_i915_gem_object *obj_priv;
+
+ switch (args->madv) {
+ case I915_MADV_DONTNEED:
+ case I915_MADV_WILLNEED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+ if (obj == NULL) {
+ DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
+ args->handle);
+ return -EBADF;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+ obj_priv = obj->driver_private;
+
+ if (obj_priv->pin_count) {
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
+ return -EINVAL;
+ }
+
+ if (obj_priv->madv != __I915_MADV_PURGED)
+ obj_priv->madv = args->madv;
+
+ /* if the object is no longer bound, discard its backing storage */
+ if (i915_gem_object_is_purgeable(obj_priv) &&
+ obj_priv->gtt_space == NULL)
+ i915_gem_object_truncate(obj);
+
+ args->retained = obj_priv->madv != __I915_MADV_PURGED;
+
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ return 0;
+}
+
int i915_gem_init_object(struct drm_gem_object *obj)
{
struct drm_i915_gem_object *obj_priv;
obj_priv->fence_reg = I915_FENCE_REG_NONE;
INIT_LIST_HEAD(&obj_priv->list);
INIT_LIST_HEAD(&obj_priv->fence_list);
+ obj_priv->madv = I915_MADV_WILLNEED;
+
+ trace_i915_gem_object_create(obj);
return 0;
}
struct drm_device *dev = obj->dev;
struct drm_i915_gem_object *obj_priv = obj->driver_private;
+ trace_i915_gem_object_destroy(obj);
+
while (obj_priv->pin_count > 0)
i915_gem_object_unpin(obj);
i915_gem_object_unbind(obj);
- i915_gem_free_mmap_offset(obj);
+ if (obj_priv->mmap_offset)
+ i915_gem_free_mmap_offset(obj);
kfree(obj_priv->page_cpu_valid);
kfree(obj_priv->bit_17);
kfree(obj->driver_private);
}
-/** Unbinds all objects that are on the given buffer list. */
+/** Unbinds all inactive objects. */
static int
-i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
+i915_gem_evict_from_inactive_list(struct drm_device *dev)
{
- struct drm_gem_object *obj;
- struct drm_i915_gem_object *obj_priv;
- int ret;
+ drm_i915_private_t *dev_priv = dev->dev_private;
- while (!list_empty(head)) {
- obj_priv = list_first_entry(head,
- struct drm_i915_gem_object,
- list);
- obj = obj_priv->obj;
+ while (!list_empty(&dev_priv->mm.inactive_list)) {
+ struct drm_gem_object *obj;
+ int ret;
- if (obj_priv->pin_count != 0) {
- DRM_ERROR("Pinned object in unbind list\n");
- mutex_unlock(&dev->struct_mutex);
- return -EINVAL;
- }
+ obj = list_first_entry(&dev_priv->mm.inactive_list,
+ struct drm_i915_gem_object,
+ list)->obj;
ret = i915_gem_object_unbind(obj);
if (ret != 0) {
- DRM_ERROR("Error unbinding object in LeaveVT: %d\n",
- ret);
- mutex_unlock(&dev->struct_mutex);
+ DRM_ERROR("Error unbinding object: %d\n", ret);
return ret;
}
}
-
return 0;
}
* We need to replace this with a semaphore, or something.
*/
dev_priv->mm.suspended = 1;
+ del_timer(&dev_priv->hangcheck_timer);
/* Cancel the retire work handler, wait for it to finish if running
*/
if (last_seqno == cur_seqno) {
if (stuck++ > 100) {
DRM_ERROR("hardware wedged\n");
- dev_priv->mm.wedged = 1;
+ atomic_set(&dev_priv->mm.wedged, 1);
DRM_WAKEUP(&dev_priv->irq_queue);
break;
}
i915_gem_retire_requests(dev);
spin_lock(&dev_priv->mm.active_list_lock);
- if (!dev_priv->mm.wedged) {
+ if (!atomic_read(&dev_priv->mm.wedged)) {
/* Active and flushing should now be empty as we've
* waited for a sequence higher than any pending execbuffer
*/
* the GPU domains and just stuff them onto inactive.
*/
while (!list_empty(&dev_priv->mm.active_list)) {
- struct drm_i915_gem_object *obj_priv;
+ struct drm_gem_object *obj;
+ uint32_t old_write_domain;
- obj_priv = list_first_entry(&dev_priv->mm.active_list,
- struct drm_i915_gem_object,
- list);
- obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
- i915_gem_object_move_to_inactive(obj_priv->obj);
+ obj = list_first_entry(&dev_priv->mm.active_list,
+ struct drm_i915_gem_object,
+ list)->obj;
+ old_write_domain = obj->write_domain;
+ obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
+ i915_gem_object_move_to_inactive(obj);
+
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
spin_unlock(&dev_priv->mm.active_list_lock);
while (!list_empty(&dev_priv->mm.flushing_list)) {
- struct drm_i915_gem_object *obj_priv;
+ struct drm_gem_object *obj;
+ uint32_t old_write_domain;
- obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
- struct drm_i915_gem_object,
- list);
- obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
- i915_gem_object_move_to_inactive(obj_priv->obj);
+ obj = list_first_entry(&dev_priv->mm.flushing_list,
+ struct drm_i915_gem_object,
+ list)->obj;
+ old_write_domain = obj->write_domain;
+ obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
+ i915_gem_object_move_to_inactive(obj);
+
+ trace_i915_gem_object_change_domain(obj,
+ obj->read_domains,
+ old_write_domain);
}
/* Move all inactive buffers out of the GTT. */
- ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list);
+ ret = i915_gem_evict_from_inactive_list(dev);
WARN_ON(!list_empty(&dev_priv->mm.inactive_list));
if (ret) {
mutex_unlock(&dev->struct_mutex);
if (drm_core_check_feature(dev, DRIVER_MODESET))
return 0;
- if (dev_priv->mm.wedged) {
+ if (atomic_read(&dev_priv->mm.wedged)) {
DRM_ERROR("Reenabling wedged hardware, good luck\n");
- dev_priv->mm.wedged = 0;
+ atomic_set(&dev_priv->mm.wedged, 0);
}
mutex_lock(&dev->struct_mutex);
i915_gem_retire_work_handler);
dev_priv->mm.next_gem_seqno = 1;
+ spin_lock(&shrink_list_lock);
+ list_add(&dev_priv->mm.shrink_list, &shrink_list);
+ spin_unlock(&shrink_list_lock);
+
/* Old X drivers will take 0-2 for front, back, depth buffers */
dev_priv->fence_reg_start = 3;
list_del_init(i915_file_priv->mm.request_list.next);
mutex_unlock(&dev->struct_mutex);
}
+
+static int
+i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask)
+{
+ drm_i915_private_t *dev_priv, *next_dev;
+ struct drm_i915_gem_object *obj_priv, *next_obj;
+ int cnt = 0;
+ int would_deadlock = 1;
+
+ /* "fast-path" to count number of available objects */
+ if (nr_to_scan == 0) {
+ spin_lock(&shrink_list_lock);
+ list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
+ struct drm_device *dev = dev_priv->dev;
+
+ if (mutex_trylock(&dev->struct_mutex)) {
+ list_for_each_entry(obj_priv,
+ &dev_priv->mm.inactive_list,
+ list)
+ cnt++;
+ mutex_unlock(&dev->struct_mutex);
+ }
+ }
+ spin_unlock(&shrink_list_lock);
+
+ return (cnt / 100) * sysctl_vfs_cache_pressure;
+ }
+
+ spin_lock(&shrink_list_lock);
+
+ /* first scan for clean buffers */
+ list_for_each_entry_safe(dev_priv, next_dev,
+ &shrink_list, mm.shrink_list) {
+ struct drm_device *dev = dev_priv->dev;
+
+ if (! mutex_trylock(&dev->struct_mutex))
+ continue;
+
+ spin_unlock(&shrink_list_lock);
+
+ i915_gem_retire_requests(dev);
+
+ list_for_each_entry_safe(obj_priv, next_obj,
+ &dev_priv->mm.inactive_list,
+ list) {
+ if (i915_gem_object_is_purgeable(obj_priv)) {
+ i915_gem_object_unbind(obj_priv->obj);
+ if (--nr_to_scan <= 0)
+ break;
+ }
+ }
+
+ spin_lock(&shrink_list_lock);
+ mutex_unlock(&dev->struct_mutex);
+
+ would_deadlock = 0;
+
+ if (nr_to_scan <= 0)
+ break;
+ }
+
+ /* second pass, evict/count anything still on the inactive list */
+ list_for_each_entry_safe(dev_priv, next_dev,
+ &shrink_list, mm.shrink_list) {
+ struct drm_device *dev = dev_priv->dev;
+
+ if (! mutex_trylock(&dev->struct_mutex))
+ continue;
+
+ spin_unlock(&shrink_list_lock);
+
+ list_for_each_entry_safe(obj_priv, next_obj,
+ &dev_priv->mm.inactive_list,
+ list) {
+ if (nr_to_scan > 0) {
+ i915_gem_object_unbind(obj_priv->obj);
+ nr_to_scan--;
+ } else
+ cnt++;
+ }
+
+ spin_lock(&shrink_list_lock);
+ mutex_unlock(&dev->struct_mutex);
+
+ would_deadlock = 0;
+ }
+
+ spin_unlock(&shrink_list_lock);
+
+ if (would_deadlock)
+ return -1;
+ else if (cnt > 0)
+ return (cnt / 100) * sysctl_vfs_cache_pressure;
+ else
+ return 0;
+}
+
+static struct shrinker shrinker = {
+ .shrink = i915_gem_shrink,
+ .seeks = DEFAULT_SEEKS,
+};
+
+__init void
+i915_gem_shrinker_init(void)
+{
+ register_shrinker(&shrinker);
+}
+
+__exit void
+i915_gem_shrinker_exit(void)
+{
+ unregister_shrinker(&shrinker);
+}
#include "drm.h"
#include "i915_drm.h"
#include "i915_drv.h"
+#include "i915_trace.h"
#include "intel_drv.h"
#define MAX_NOPID ((u32)~0)
}
if (gt_iir & GT_USER_INTERRUPT) {
- dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
+ u32 seqno = i915_get_gem_seqno(dev);
+ dev_priv->mm.irq_gem_seqno = seqno;
+ trace_i915_gem_request_complete(dev, seqno);
DRM_WAKEUP(&dev_priv->irq_queue);
}
drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
error_work);
struct drm_device *dev = dev_priv->dev;
- char *event_string = "ERROR=1";
- char *envp[] = { event_string, NULL };
+ char *error_event[] = { "ERROR=1", NULL };
+ char *reset_event[] = { "RESET=1", NULL };
+ char *reset_done_event[] = { "ERROR=0", NULL };
DRM_DEBUG("generating error event\n");
-
- kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp);
+ kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
+
+ if (atomic_read(&dev_priv->mm.wedged)) {
+ if (IS_I965G(dev)) {
+ DRM_DEBUG("resetting chip\n");
+ kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event);
+ if (!i965_reset(dev, GDRST_RENDER)) {
+ atomic_set(&dev_priv->mm.wedged, 0);
+ kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event);
+ }
+ } else {
+ printk("reboot required\n");
+ }
+ }
}
/**
* so userspace knows something bad happened (should trigger collection
* of a ring dump etc.).
*/
-static void i915_handle_error(struct drm_device *dev)
+static void i915_handle_error(struct drm_device *dev, bool wedged)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 eir = I915_READ(EIR);
I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
}
+ if (wedged) {
+ atomic_set(&dev_priv->mm.wedged, 1);
+
+ /*
+ * Wakeup waiting processes so they don't hang
+ */
+ printk("i915: Waking up sleeping processes\n");
+ DRM_WAKEUP(&dev_priv->irq_queue);
+ }
+
queue_work(dev_priv->wq, &dev_priv->error_work);
}
pipeb_stats = I915_READ(PIPEBSTAT);
if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
- i915_handle_error(dev);
+ i915_handle_error(dev, false);
/*
* Clear the PIPE(A|B)STAT regs before the IIR
}
if (iir & I915_USER_INTERRUPT) {
- dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
+ u32 seqno = i915_get_gem_seqno(dev);
+ dev_priv->mm.irq_gem_seqno = seqno;
+ trace_i915_gem_request_complete(dev, seqno);
DRM_WAKEUP(&dev_priv->irq_queue);
+ dev_priv->hangcheck_count = 0;
+ mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
}
if (pipea_stats & vblank_status) {
return -EINVAL;
}
+struct drm_i915_gem_request *i915_get_tail_request(struct drm_device *dev) {
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ return list_entry(dev_priv->mm.request_list.prev, struct drm_i915_gem_request, list);
+}
+
+/**
+ * This is called when the chip hasn't reported back with completed
+ * batchbuffers in a long time. The first time this is called we simply record
+ * ACTHD. If ACTHD hasn't changed by the time the hangcheck timer elapses
+ * again, we assume the chip is wedged and try to fix it.
+ */
+void i915_hangcheck_elapsed(unsigned long data)
+{
+ struct drm_device *dev = (struct drm_device *)data;
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ uint32_t acthd;
+
+ if (!IS_I965G(dev))
+ acthd = I915_READ(ACTHD);
+ else
+ acthd = I915_READ(ACTHD_I965);
+
+ /* If all work is done then ACTHD clearly hasn't advanced. */
+ if (list_empty(&dev_priv->mm.request_list) ||
+ i915_seqno_passed(i915_get_gem_seqno(dev), i915_get_tail_request(dev)->seqno)) {
+ dev_priv->hangcheck_count = 0;
+ return;
+ }
+
+ if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) {
+ DRM_ERROR("Hangcheck timer elapsed... GPU hung\n");
+ i915_handle_error(dev, true);
+ return;
+ }
+
+ /* Reset timer case chip hangs without another request being added */
+ mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
+
+ if (acthd != dev_priv->last_acthd)
+ dev_priv->hangcheck_count = 0;
+ else
+ dev_priv->hangcheck_count++;
+
+ dev_priv->last_acthd = acthd;
+}
+
/* drm_dma.h hooks
*/
static void igdng_irq_preinstall(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
struct opregion_asle *asle = dev_priv->opregion.asle;
u32 blc_pwm_ctl, blc_pwm_ctl2;
+ u32 max_backlight, level, shift;
if (!(bclp & ASLE_BCLP_VALID))
return ASLE_BACKLIGHT_FAIL;
return ASLE_BACKLIGHT_FAIL;
blc_pwm_ctl = I915_READ(BLC_PWM_CTL);
- blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK;
blc_pwm_ctl2 = I915_READ(BLC_PWM_CTL2);
- if (blc_pwm_ctl2 & BLM_COMBINATION_MODE)
+ if (IS_I965G(dev) && (blc_pwm_ctl2 & BLM_COMBINATION_MODE))
pci_write_config_dword(dev->pdev, PCI_LBPC, bclp);
- else
- I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | ((bclp * 0x101)-1));
-
+ else {
+ if (IS_IGD(dev)) {
+ blc_pwm_ctl &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1);
+ max_backlight = (blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >>
+ BACKLIGHT_MODULATION_FREQ_SHIFT;
+ shift = BACKLIGHT_DUTY_CYCLE_SHIFT + 1;
+ } else {
+ blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK;
+ max_backlight = ((blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >>
+ BACKLIGHT_MODULATION_FREQ_SHIFT) * 2;
+ shift = BACKLIGHT_DUTY_CYCLE_SHIFT;
+ }
+ level = (bclp * max_backlight) / 255;
+ I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | (level << shift));
+ }
asle->cblv = (bclp*0x64)/0xff | ASLE_CBLV_VALID;
return 0;
#define I915_GC_RENDER_CLOCK_200_MHZ (1 << 0)
#define I915_GC_RENDER_CLOCK_333_MHZ (4 << 0)
#define LBB 0xf4
+#define GDRST 0xc0
+#define GDRST_FULL (0<<2)
+#define GDRST_RENDER (1<<2)
+#define GDRST_MEDIA (3<<2)
/* VGA stuff */
#define FBC_CTL_PLANEA (0<<0)
#define FBC_CTL_PLANEB (1<<0)
#define FBC_FENCE_OFF 0x0321b
+#define FBC_TAG 0x03300
#define FBC_LL_SIZE (1536)
+/* Framebuffer compression for GM45+ */
+#define DPFC_CB_BASE 0x3200
+#define DPFC_CONTROL 0x3208
+#define DPFC_CTL_EN (1<<31)
+#define DPFC_CTL_PLANEA (0<<30)
+#define DPFC_CTL_PLANEB (1<<30)
+#define DPFC_CTL_FENCE_EN (1<<29)
+#define DPFC_SR_EN (1<<10)
+#define DPFC_CTL_LIMIT_1X (0<<6)
+#define DPFC_CTL_LIMIT_2X (1<<6)
+#define DPFC_CTL_LIMIT_4X (2<<6)
+#define DPFC_RECOMP_CTL 0x320c
+#define DPFC_RECOMP_STALL_EN (1<<27)
+#define DPFC_RECOMP_STALL_WM_SHIFT (16)
+#define DPFC_RECOMP_STALL_WM_MASK (0x07ff0000)
+#define DPFC_RECOMP_TIMER_COUNT_SHIFT (0)
+#define DPFC_RECOMP_TIMER_COUNT_MASK (0x0000003f)
+#define DPFC_STATUS 0x3210
+#define DPFC_INVAL_SEG_SHIFT (16)
+#define DPFC_INVAL_SEG_MASK (0x07ff0000)
+#define DPFC_COMP_SEG_SHIFT (0)
+#define DPFC_COMP_SEG_MASK (0x000003ff)
+#define DPFC_STATUS2 0x3214
+#define DPFC_FENCE_YOFF 0x3218
+#define DPFC_CHICKEN 0x3224
+#define DPFC_HT_MODIFY (1<<31)
+
/*
* GPIO regs
*/
#define PF_ENABLE (1<<31)
#define PFA_WIN_SZ 0x68074
#define PFB_WIN_SZ 0x68874
+#define PFA_WIN_POS 0x68070
+#define PFB_WIN_POS 0x68870
/* legacy palette */
#define LGC_PALETTE_A 0x4a000
if (drm_core_check_feature(dev, DRIVER_MODESET))
return;
+
/* Pipe & plane A info */
dev_priv->savePIPEACONF = I915_READ(PIPEACONF);
dev_priv->savePIPEASRC = I915_READ(PIPEASRC);
dev_priv->savePIPEBSTAT = I915_READ(PIPEBSTAT);
return;
}
+
static void i915_restore_modeset_reg(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
return;
}
-int i915_save_state(struct drm_device *dev)
+
+void i915_save_display(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
- int i;
-
- pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB);
-
- /* Render Standby */
- if (IS_I965G(dev) && IS_MOBILE(dev))
- dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY);
-
- /* Hardware status page */
- dev_priv->saveHWS = I915_READ(HWS_PGA);
/* Display arbitration control */
dev_priv->saveDSPARB = I915_READ(DSPARB);
/* This is only meaningful in non-KMS mode */
/* Don't save them in KMS mode */
i915_save_modeset_reg(dev);
+
/* Cursor state */
dev_priv->saveCURACNTR = I915_READ(CURACNTR);
dev_priv->saveCURAPOS = I915_READ(CURAPOS);
dev_priv->saveFBC_CONTROL2 = I915_READ(FBC_CONTROL2);
dev_priv->saveFBC_CONTROL = I915_READ(FBC_CONTROL);
- /* Interrupt state */
- dev_priv->saveIIR = I915_READ(IIR);
- dev_priv->saveIER = I915_READ(IER);
- dev_priv->saveIMR = I915_READ(IMR);
-
/* VGA state */
dev_priv->saveVGA0 = I915_READ(VGA0);
dev_priv->saveVGA1 = I915_READ(VGA1);
dev_priv->saveVGA_PD = I915_READ(VGA_PD);
dev_priv->saveVGACNTRL = I915_READ(VGACNTRL);
- /* Clock gating state */
- dev_priv->saveD_STATE = I915_READ(D_STATE);
- dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D);
-
- /* Cache mode state */
- dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
-
- /* Memory Arbitration state */
- dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE);
-
- /* Scratch space */
- for (i = 0; i < 16; i++) {
- dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2));
- dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2));
- }
- for (i = 0; i < 3; i++)
- dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2));
-
- /* Fences */
- if (IS_I965G(dev)) {
- for (i = 0; i < 16; i++)
- dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
- } else {
- for (i = 0; i < 8; i++)
- dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
-
- if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
- for (i = 0; i < 8; i++)
- dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
- }
i915_save_vga(dev);
-
- return 0;
}
-int i915_restore_state(struct drm_device *dev)
+void i915_restore_display(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
- int i;
-
- pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
-
- /* Render Standby */
- if (IS_I965G(dev) && IS_MOBILE(dev))
- I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY);
-
- /* Hardware status page */
- I915_WRITE(HWS_PGA, dev_priv->saveHWS);
/* Display arbitration */
I915_WRITE(DSPARB, dev_priv->saveDSPARB);
- /* Fences */
- if (IS_I965G(dev)) {
- for (i = 0; i < 16; i++)
- I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]);
- } else {
- for (i = 0; i < 8; i++)
- I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
- if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
- for (i = 0; i < 8; i++)
- I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]);
- }
-
/* Display port ratios (must be done before clock is set) */
if (SUPPORTS_INTEGRATED_DP(dev)) {
I915_WRITE(PIPEA_GMCH_DATA_M, dev_priv->savePIPEA_GMCH_DATA_M);
I915_WRITE(PIPEA_DP_LINK_N, dev_priv->savePIPEA_DP_LINK_N);
I915_WRITE(PIPEB_DP_LINK_N, dev_priv->savePIPEB_DP_LINK_N);
}
+
/* This is only meaningful in non-KMS mode */
/* Don't restore them in KMS mode */
i915_restore_modeset_reg(dev);
+
/* Cursor state */
I915_WRITE(CURAPOS, dev_priv->saveCURAPOS);
I915_WRITE(CURACNTR, dev_priv->saveCURACNTR);
I915_WRITE(VGA_PD, dev_priv->saveVGA_PD);
DRM_UDELAY(150);
+ i915_restore_vga(dev);
+}
+
+int i915_save_state(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ int i;
+
+ pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB);
+
+ /* Render Standby */
+ if (IS_I965G(dev) && IS_MOBILE(dev))
+ dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY);
+
+ /* Hardware status page */
+ dev_priv->saveHWS = I915_READ(HWS_PGA);
+
+ i915_save_display(dev);
+
+ /* Interrupt state */
+ dev_priv->saveIER = I915_READ(IER);
+ dev_priv->saveIMR = I915_READ(IMR);
+
+ /* Clock gating state */
+ dev_priv->saveD_STATE = I915_READ(D_STATE);
+ dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D); /* Not sure about this */
+
+ /* Cache mode state */
+ dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
+
+ /* Memory Arbitration state */
+ dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE);
+
+ /* Scratch space */
+ for (i = 0; i < 16; i++) {
+ dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2));
+ dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2));
+ }
+ for (i = 0; i < 3; i++)
+ dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2));
+
+ /* Fences */
+ if (IS_I965G(dev)) {
+ for (i = 0; i < 16; i++)
+ dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
+ } else {
+ for (i = 0; i < 8; i++)
+ dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
+
+ if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
+ for (i = 0; i < 8; i++)
+ dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
+ }
+
+ return 0;
+}
+
+int i915_restore_state(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ int i;
+
+ pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
+
+ /* Render Standby */
+ if (IS_I965G(dev) && IS_MOBILE(dev))
+ I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY);
+
+ /* Hardware status page */
+ I915_WRITE(HWS_PGA, dev_priv->saveHWS);
+
+ /* Fences */
+ if (IS_I965G(dev)) {
+ for (i = 0; i < 16; i++)
+ I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]);
+ } else {
+ for (i = 0; i < 8; i++)
+ I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
+ if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
+ for (i = 0; i < 8; i++)
+ I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]);
+ }
+
+ i915_restore_display(dev);
+
+ /* Interrupt state */
+ I915_WRITE (IER, dev_priv->saveIER);
+ I915_WRITE (IMR, dev_priv->saveIMR);
+
/* Clock gating state */
I915_WRITE (D_STATE, dev_priv->saveD_STATE);
I915_WRITE (DSPCLK_GATE_D, dev_priv->saveDSPCLK_GATE_D);
for (i = 0; i < 3; i++)
I915_WRITE(SWF30 + (i << 2), dev_priv->saveSWF2[i]);
- i915_restore_vga(dev);
-
return 0;
}
--- /dev/null
+#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _I915_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include <drm/drmP.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM i915
+#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
+#define TRACE_INCLUDE_FILE i915_trace
+
+/* object tracking */
+
+TRACE_EVENT(i915_gem_object_create,
+
+ TP_PROTO(struct drm_gem_object *obj),
+
+ TP_ARGS(obj),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ __field(u32, size)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ __entry->size = obj->size;
+ ),
+
+ TP_printk("obj=%p, size=%u", __entry->obj, __entry->size)
+);
+
+TRACE_EVENT(i915_gem_object_bind,
+
+ TP_PROTO(struct drm_gem_object *obj, u32 gtt_offset),
+
+ TP_ARGS(obj, gtt_offset),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ __field(u32, gtt_offset)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ __entry->gtt_offset = gtt_offset;
+ ),
+
+ TP_printk("obj=%p, gtt_offset=%08x",
+ __entry->obj, __entry->gtt_offset)
+);
+
+TRACE_EVENT(i915_gem_object_clflush,
+
+ TP_PROTO(struct drm_gem_object *obj),
+
+ TP_ARGS(obj),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ ),
+
+ TP_printk("obj=%p", __entry->obj)
+);
+
+TRACE_EVENT(i915_gem_object_change_domain,
+
+ TP_PROTO(struct drm_gem_object *obj, uint32_t old_read_domains, uint32_t old_write_domain),
+
+ TP_ARGS(obj, old_read_domains, old_write_domain),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ __field(u32, read_domains)
+ __field(u32, write_domain)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ __entry->read_domains = obj->read_domains | (old_read_domains << 16);
+ __entry->write_domain = obj->write_domain | (old_write_domain << 16);
+ ),
+
+ TP_printk("obj=%p, read=%04x, write=%04x",
+ __entry->obj,
+ __entry->read_domains, __entry->write_domain)
+);
+
+TRACE_EVENT(i915_gem_object_get_fence,
+
+ TP_PROTO(struct drm_gem_object *obj, int fence, int tiling_mode),
+
+ TP_ARGS(obj, fence, tiling_mode),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ __field(int, fence)
+ __field(int, tiling_mode)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ __entry->fence = fence;
+ __entry->tiling_mode = tiling_mode;
+ ),
+
+ TP_printk("obj=%p, fence=%d, tiling=%d",
+ __entry->obj, __entry->fence, __entry->tiling_mode)
+);
+
+TRACE_EVENT(i915_gem_object_unbind,
+
+ TP_PROTO(struct drm_gem_object *obj),
+
+ TP_ARGS(obj),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ ),
+
+ TP_printk("obj=%p", __entry->obj)
+);
+
+TRACE_EVENT(i915_gem_object_destroy,
+
+ TP_PROTO(struct drm_gem_object *obj),
+
+ TP_ARGS(obj),
+
+ TP_STRUCT__entry(
+ __field(struct drm_gem_object *, obj)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ ),
+
+ TP_printk("obj=%p", __entry->obj)
+);
+
+/* batch tracing */
+
+TRACE_EVENT(i915_gem_request_submit,
+
+ TP_PROTO(struct drm_device *dev, u32 seqno),
+
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ __field(u32, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_flush,
+
+ TP_PROTO(struct drm_device *dev, u32 seqno,
+ u32 flush_domains, u32 invalidate_domains),
+
+ TP_ARGS(dev, seqno, flush_domains, invalidate_domains),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ __field(u32, seqno)
+ __field(u32, flush_domains)
+ __field(u32, invalidate_domains)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->seqno = seqno;
+ __entry->flush_domains = flush_domains;
+ __entry->invalidate_domains = invalidate_domains;
+ ),
+
+ TP_printk("dev=%p, seqno=%u, flush=%04x, invalidate=%04x",
+ __entry->dev, __entry->seqno,
+ __entry->flush_domains, __entry->invalidate_domains)
+);
+
+
+TRACE_EVENT(i915_gem_request_complete,
+
+ TP_PROTO(struct drm_device *dev, u32 seqno),
+
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ __field(u32, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_retire,
+
+ TP_PROTO(struct drm_device *dev, u32 seqno),
+
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ __field(u32, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_wait_begin,
+
+ TP_PROTO(struct drm_device *dev, u32 seqno),
+
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ __field(u32, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_wait_end,
+
+ TP_PROTO(struct drm_device *dev, u32 seqno),
+
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ __field(u32, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_ring_wait_begin,
+
+ TP_PROTO(struct drm_device *dev),
+
+ TP_ARGS(dev),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ ),
+
+ TP_printk("dev=%p", __entry->dev)
+);
+
+TRACE_EVENT(i915_ring_wait_end,
+
+ TP_PROTO(struct drm_device *dev),
+
+ TP_ARGS(dev),
+
+ TP_STRUCT__entry(
+ __field(struct drm_device *, dev)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ ),
+
+ TP_printk("dev=%p", __entry->dev)
+);
+
+#endif /* _I915_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915
+#include <trace/define_trace.h>
--- /dev/null
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ */
+
+#include "i915_drv.h"
+
+#define CREATE_TRACE_POINTS
+#include "i915_trace.h"
if (IS_I85X(dev_priv->dev))
dev_priv->lvds_ssc_freq =
general->ssc_freq ? 66 : 48;
+ else if (IS_IGDNG(dev_priv->dev))
+ dev_priv->lvds_ssc_freq =
+ general->ssc_freq ? 100 : 120;
else
dev_priv->lvds_ssc_freq =
general->ssc_freq ? 100 : 96;
{
struct drm_device *dev = connector->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- u32 adpa, temp;
+ u32 adpa;
bool ret;
- temp = adpa = I915_READ(PCH_ADPA);
-
- adpa &= ~ADPA_DAC_ENABLE;
- I915_WRITE(PCH_ADPA, adpa);
+ adpa = I915_READ(PCH_ADPA);
adpa &= ~ADPA_CRT_HOTPLUG_MASK;
else
ret = false;
- /* restore origin register */
- I915_WRITE(PCH_ADPA, temp);
return ret;
}
* Eric Anholt <eric@anholt.net>
*/
+#include <linux/module.h>
+#include <linux/input.h>
#include <linux/i2c.h>
#include <linux/kernel.h>
#include "drmP.h"
refclk, best_clock);
if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
- if ((I915_READ(LVDS) & LVDS_CLKB_POWER_MASK) ==
+ if ((I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) ==
LVDS_CLKB_POWER_UP)
clock.p2 = limit->p2.p2_fast;
else
mdelay(20);
}
+/* Parameters have changed, update FBC info */
+static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_framebuffer *fb = crtc->fb;
+ struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+ struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private;
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ int plane, i;
+ u32 fbc_ctl, fbc_ctl2;
+
+ dev_priv->cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE;
+
+ if (fb->pitch < dev_priv->cfb_pitch)
+ dev_priv->cfb_pitch = fb->pitch;
+
+ /* FBC_CTL wants 64B units */
+ dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
+ dev_priv->cfb_fence = obj_priv->fence_reg;
+ dev_priv->cfb_plane = intel_crtc->plane;
+ plane = dev_priv->cfb_plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB;
+
+ /* Clear old tags */
+ for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
+ I915_WRITE(FBC_TAG + (i * 4), 0);
+
+ /* Set it up... */
+ fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | plane;
+ if (obj_priv->tiling_mode != I915_TILING_NONE)
+ fbc_ctl2 |= FBC_CTL_CPU_FENCE;
+ I915_WRITE(FBC_CONTROL2, fbc_ctl2);
+ I915_WRITE(FBC_FENCE_OFF, crtc->y);
+
+ /* enable it... */
+ fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC;
+ fbc_ctl |= (dev_priv->cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
+ fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT;
+ if (obj_priv->tiling_mode != I915_TILING_NONE)
+ fbc_ctl |= dev_priv->cfb_fence;
+ I915_WRITE(FBC_CONTROL, fbc_ctl);
+
+ DRM_DEBUG("enabled FBC, pitch %ld, yoff %d, plane %d, ",
+ dev_priv->cfb_pitch, crtc->y, dev_priv->cfb_plane);
+}
+
+void i8xx_disable_fbc(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ u32 fbc_ctl;
+
+ if (!I915_HAS_FBC(dev))
+ return;
+
+ /* Disable compression */
+ fbc_ctl = I915_READ(FBC_CONTROL);
+ fbc_ctl &= ~FBC_CTL_EN;
+ I915_WRITE(FBC_CONTROL, fbc_ctl);
+
+ /* Wait for compressing bit to clear */
+ while (I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING)
+ ; /* nothing */
+
+ intel_wait_for_vblank(dev);
+
+ DRM_DEBUG("disabled FBC\n");
+}
+
+static bool i8xx_fbc_enabled(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
+}
+
+static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_framebuffer *fb = crtc->fb;
+ struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+ struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private;
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ int plane = (intel_crtc->plane == 0 ? DPFC_CTL_PLANEA :
+ DPFC_CTL_PLANEB);
+ unsigned long stall_watermark = 200;
+ u32 dpfc_ctl;
+
+ dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
+ dev_priv->cfb_fence = obj_priv->fence_reg;
+ dev_priv->cfb_plane = intel_crtc->plane;
+
+ dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X;
+ if (obj_priv->tiling_mode != I915_TILING_NONE) {
+ dpfc_ctl |= DPFC_CTL_FENCE_EN | dev_priv->cfb_fence;
+ I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY);
+ } else {
+ I915_WRITE(DPFC_CHICKEN, ~DPFC_HT_MODIFY);
+ }
+
+ I915_WRITE(DPFC_CONTROL, dpfc_ctl);
+ I915_WRITE(DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
+ (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
+ (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
+ I915_WRITE(DPFC_FENCE_YOFF, crtc->y);
+
+ /* enable it... */
+ I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN);
+
+ DRM_DEBUG("enabled fbc on plane %d\n", intel_crtc->plane);
+}
+
+void g4x_disable_fbc(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ u32 dpfc_ctl;
+
+ /* Disable compression */
+ dpfc_ctl = I915_READ(DPFC_CONTROL);
+ dpfc_ctl &= ~DPFC_CTL_EN;
+ I915_WRITE(DPFC_CONTROL, dpfc_ctl);
+ intel_wait_for_vblank(dev);
+
+ DRM_DEBUG("disabled FBC\n");
+}
+
+static bool g4x_fbc_enabled(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
+}
+
+/**
+ * intel_update_fbc - enable/disable FBC as needed
+ * @crtc: CRTC to point the compressor at
+ * @mode: mode in use
+ *
+ * Set up the framebuffer compression hardware at mode set time. We
+ * enable it if possible:
+ * - plane A only (on pre-965)
+ * - no pixel mulitply/line duplication
+ * - no alpha buffer discard
+ * - no dual wide
+ * - framebuffer <= 2048 in width, 1536 in height
+ *
+ * We can't assume that any compression will take place (worst case),
+ * so the compressed buffer has to be the same size as the uncompressed
+ * one. It also must reside (along with the line length buffer) in
+ * stolen memory.
+ *
+ * We need to enable/disable FBC on a global basis.
+ */
+static void intel_update_fbc(struct drm_crtc *crtc,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_framebuffer *fb = crtc->fb;
+ struct intel_framebuffer *intel_fb;
+ struct drm_i915_gem_object *obj_priv;
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ int plane = intel_crtc->plane;
+
+ if (!i915_powersave)
+ return;
+
+ if (!dev_priv->display.fbc_enabled ||
+ !dev_priv->display.enable_fbc ||
+ !dev_priv->display.disable_fbc)
+ return;
+
+ if (!crtc->fb)
+ return;
+
+ intel_fb = to_intel_framebuffer(fb);
+ obj_priv = intel_fb->obj->driver_private;
+
+ /*
+ * If FBC is already on, we just have to verify that we can
+ * keep it that way...
+ * Need to disable if:
+ * - changing FBC params (stride, fence, mode)
+ * - new fb is too large to fit in compressed buffer
+ * - going to an unsupported config (interlace, pixel multiply, etc.)
+ */
+ if (intel_fb->obj->size > dev_priv->cfb_size) {
+ DRM_DEBUG("framebuffer too large, disabling compression\n");
+ goto out_disable;
+ }
+ if ((mode->flags & DRM_MODE_FLAG_INTERLACE) ||
+ (mode->flags & DRM_MODE_FLAG_DBLSCAN)) {
+ DRM_DEBUG("mode incompatible with compression, disabling\n");
+ goto out_disable;
+ }
+ if ((mode->hdisplay > 2048) ||
+ (mode->vdisplay > 1536)) {
+ DRM_DEBUG("mode too large for compression, disabling\n");
+ goto out_disable;
+ }
+ if ((IS_I915GM(dev) || IS_I945GM(dev)) && plane != 0) {
+ DRM_DEBUG("plane not 0, disabling compression\n");
+ goto out_disable;
+ }
+ if (obj_priv->tiling_mode != I915_TILING_X) {
+ DRM_DEBUG("framebuffer not tiled, disabling compression\n");
+ goto out_disable;
+ }
+
+ if (dev_priv->display.fbc_enabled(crtc)) {
+ /* We can re-enable it in this case, but need to update pitch */
+ if (fb->pitch > dev_priv->cfb_pitch)
+ dev_priv->display.disable_fbc(dev);
+ if (obj_priv->fence_reg != dev_priv->cfb_fence)
+ dev_priv->display.disable_fbc(dev);
+ if (plane != dev_priv->cfb_plane)
+ dev_priv->display.disable_fbc(dev);
+ }
+
+ if (!dev_priv->display.fbc_enabled(crtc)) {
+ /* Now try to turn it back on if possible */
+ dev_priv->display.enable_fbc(crtc, 500);
+ }
+
+ return;
+
+out_disable:
+ DRM_DEBUG("unsupported config, disabling FBC\n");
+ /* Multiple disables should be harmless */
+ if (dev_priv->display.fbc_enabled(crtc))
+ dev_priv->display.disable_fbc(dev);
+}
+
static int
intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
struct drm_framebuffer *old_fb)
struct drm_i915_gem_object *obj_priv;
struct drm_gem_object *obj;
int pipe = intel_crtc->pipe;
+ int plane = intel_crtc->plane;
unsigned long Start, Offset;
- int dspbase = (pipe == 0 ? DSPAADDR : DSPBADDR);
- int dspsurf = (pipe == 0 ? DSPASURF : DSPBSURF);
- int dspstride = (pipe == 0) ? DSPASTRIDE : DSPBSTRIDE;
- int dsptileoff = (pipe == 0 ? DSPATILEOFF : DSPBTILEOFF);
- int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
+ int dspbase = (plane == 0 ? DSPAADDR : DSPBADDR);
+ int dspsurf = (plane == 0 ? DSPASURF : DSPBSURF);
+ int dspstride = (plane == 0) ? DSPASTRIDE : DSPBSTRIDE;
+ int dsptileoff = (plane == 0 ? DSPATILEOFF : DSPBTILEOFF);
+ int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
u32 dspcntr, alignment;
int ret;
return 0;
}
- switch (pipe) {
+ switch (plane) {
case 0:
case 1:
break;
default:
- DRM_ERROR("Can't update pipe %d in SAREA\n", pipe);
+ DRM_ERROR("Can't update plane %d in SAREA\n", plane);
return -EINVAL;
}
I915_READ(dspbase);
}
+ if ((IS_I965G(dev) || plane == 0))
+ intel_update_fbc(crtc, &crtc->mode);
+
intel_wait_for_vblank(dev);
if (old_fb) {
int transconf_reg = (pipe == 0) ? TRANSACONF : TRANSBCONF;
int pf_ctl_reg = (pipe == 0) ? PFA_CTL_1 : PFB_CTL_1;
int pf_win_size = (pipe == 0) ? PFA_WIN_SZ : PFB_WIN_SZ;
+ int pf_win_pos = (pipe == 0) ? PFA_WIN_POS : PFB_WIN_POS;
int cpu_htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
int cpu_hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
int cpu_hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B;
}
}
+ /* Enable panel fitting for LVDS */
+ if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
+ temp = I915_READ(pf_ctl_reg);
+ I915_WRITE(pf_ctl_reg, temp | PF_ENABLE);
+
+ /* currently full aspect */
+ I915_WRITE(pf_win_pos, 0);
+
+ I915_WRITE(pf_win_size,
+ (dev_priv->panel_fixed_mode->hdisplay << 16) |
+ (dev_priv->panel_fixed_mode->vdisplay));
+ }
+
/* Enable CPU pipe */
temp = I915_READ(pipeconf_reg);
if ((temp & PIPEACONF_ENABLE) == 0) {
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int pipe = intel_crtc->pipe;
+ int plane = intel_crtc->plane;
int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
- int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
- int dspbase_reg = (pipe == 0) ? DSPAADDR : DSPBADDR;
+ int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
+ int dspbase_reg = (plane == 0) ? DSPAADDR : DSPBADDR;
int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
u32 temp;
intel_crtc_load_lut(crtc);
+ if ((IS_I965G(dev) || plane == 0))
+ intel_update_fbc(crtc, &crtc->mode);
+
/* Give the overlay scaler a chance to enable if it's on this pipe */
//intel_crtc_dpms_video(crtc, true); TODO
intel_update_watermarks(dev);
/* Give the overlay scaler a chance to disable if it's on this pipe */
//intel_crtc_dpms_video(crtc, FALSE); TODO
+ if (dev_priv->cfb_plane == plane &&
+ dev_priv->display.disable_fbc)
+ dev_priv->display.disable_fbc(dev);
+
/* Disable the VGA plane that we never use */
i915_disable_vga(dev);
static void intel_crtc_dpms(struct drm_crtc *crtc, int mode)
{
struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_master_private *master_priv;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int pipe = intel_crtc->pipe;
bool enabled;
- if (IS_IGDNG(dev))
- igdng_crtc_dpms(crtc, mode);
- else
- i9xx_crtc_dpms(crtc, mode);
+ dev_priv->display.dpms(crtc, mode);
intel_crtc->dpms_mode = mode;
return true;
}
+static int i945_get_display_clock_speed(struct drm_device *dev)
+{
+ return 400000;
+}
-/** Returns the core display clock speed for i830 - i945 */
-static int intel_get_core_clock_speed(struct drm_device *dev)
+static int i915_get_display_clock_speed(struct drm_device *dev)
{
+ return 333000;
+}
- /* Core clock values taken from the published datasheets.
- * The 830 may go up to 166 Mhz, which we should check.
- */
- if (IS_I945G(dev))
- return 400000;
- else if (IS_I915G(dev))
- return 333000;
- else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev))
- return 200000;
- else if (IS_I915GM(dev)) {
- u16 gcfgc = 0;
+static int i9xx_misc_get_display_clock_speed(struct drm_device *dev)
+{
+ return 200000;
+}
- pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+static int i915gm_get_display_clock_speed(struct drm_device *dev)
+{
+ u16 gcfgc = 0;
- if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
- return 133000;
- else {
- switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
- case GC_DISPLAY_CLOCK_333_MHZ:
- return 333000;
- default:
- case GC_DISPLAY_CLOCK_190_200_MHZ:
- return 190000;
- }
- }
- } else if (IS_I865G(dev))
- return 266000;
- else if (IS_I855(dev)) {
- u16 hpllcc = 0;
- /* Assume that the hardware is in the high speed state. This
- * should be the default.
- */
- switch (hpllcc & GC_CLOCK_CONTROL_MASK) {
- case GC_CLOCK_133_200:
- case GC_CLOCK_100_200:
- return 200000;
- case GC_CLOCK_166_250:
- return 250000;
- case GC_CLOCK_100_133:
- return 133000;
+ pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+
+ if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
+ return 133000;
+ else {
+ switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
+ case GC_DISPLAY_CLOCK_333_MHZ:
+ return 333000;
+ default:
+ case GC_DISPLAY_CLOCK_190_200_MHZ:
+ return 190000;
}
- } else /* 852, 830 */
+ }
+}
+
+static int i865_get_display_clock_speed(struct drm_device *dev)
+{
+ return 266000;
+}
+
+static int i855_get_display_clock_speed(struct drm_device *dev)
+{
+ u16 hpllcc = 0;
+ /* Assume that the hardware is in the high speed state. This
+ * should be the default.
+ */
+ switch (hpllcc & GC_CLOCK_CONTROL_MASK) {
+ case GC_CLOCK_133_200:
+ case GC_CLOCK_100_200:
+ return 200000;
+ case GC_CLOCK_166_250:
+ return 250000;
+ case GC_CLOCK_100_133:
return 133000;
+ }
+
+ /* Shouldn't happen */
+ return 0;
+}
- return 0; /* Silence gcc warning */
+static int i830_get_display_clock_speed(struct drm_device *dev)
+{
+ return 133000;
}
/**
{
long entries_required, wm_size;
- entries_required = (clock_in_khz * pixel_size * latency_ns) / 1000000;
+ /*
+ * Note: we need to make sure we don't overflow for various clock &
+ * latency values.
+ * clocks go from a few thousand to several hundred thousand.
+ * latency is usually a few thousand
+ */
+ entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
+ 1000;
entries_required /= wm->cacheline_size;
DRM_DEBUG("FIFO entries required for mode: %d\n", entries_required);
for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
latency = &cxsr_latency_table[i];
if (is_desktop == latency->is_desktop &&
- fsb == latency->fsb_freq && mem == latency->mem_freq)
- break;
+ fsb == latency->fsb_freq && mem == latency->mem_freq)
+ return latency;
}
- if (i >= ARRAY_SIZE(cxsr_latency_table)) {
- DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n");
- return NULL;
- }
- return latency;
+
+ DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n");
+
+ return NULL;
}
static void igd_disable_cxsr(struct drm_device *dev)
*/
const static int latency_ns = 5000;
-static int intel_get_fifo_size(struct drm_device *dev, int plane)
+static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t dsparb = I915_READ(DSPARB);
int size;
- if (IS_I9XX(dev)) {
- if (plane == 0)
- size = dsparb & 0x7f;
- else
- size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) -
- (dsparb & 0x7f);
- } else if (IS_I85X(dev)) {
- if (plane == 0)
- size = dsparb & 0x1ff;
- else
- size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) -
- (dsparb & 0x1ff);
- size >>= 1; /* Convert to cachelines */
- } else if (IS_845G(dev)) {
+ if (plane == 0)
size = dsparb & 0x7f;
- size >>= 2; /* Convert to cachelines */
- } else {
- size = dsparb & 0x7f;
- size >>= 1; /* Convert to cachelines */
- }
+ else
+ size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) -
+ (dsparb & 0x7f);
+
+ DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
+ size);
+
+ return size;
+}
+
+static int i85x_get_fifo_size(struct drm_device *dev, int plane)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ uint32_t dsparb = I915_READ(DSPARB);
+ int size;
+
+ if (plane == 0)
+ size = dsparb & 0x1ff;
+ else
+ size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) -
+ (dsparb & 0x1ff);
+ size >>= 1; /* Convert to cachelines */
DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
size);
return size;
}
-static void g4x_update_wm(struct drm_device *dev)
+static int i845_get_fifo_size(struct drm_device *dev, int plane)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ uint32_t dsparb = I915_READ(DSPARB);
+ int size;
+
+ size = dsparb & 0x7f;
+ size >>= 2; /* Convert to cachelines */
+
+ DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
+ size);
+
+ return size;
+}
+
+static int i830_get_fifo_size(struct drm_device *dev, int plane)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ uint32_t dsparb = I915_READ(DSPARB);
+ int size;
+
+ size = dsparb & 0x7f;
+ size >>= 1; /* Convert to cachelines */
+
+ DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
+ size);
+
+ return size;
+}
+
+static void g4x_update_wm(struct drm_device *dev, int unused, int unused2,
+ int unused3, int unused4)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 fw_blc_self = I915_READ(FW_BLC_SELF);
I915_WRITE(FW_BLC_SELF, fw_blc_self);
}
-static void i965_update_wm(struct drm_device *dev)
+static void i965_update_wm(struct drm_device *dev, int unused, int unused2,
+ int unused3, int unused4)
{
struct drm_i915_private *dev_priv = dev->dev_private;
cacheline_size = planea_params.cacheline_size;
/* Update per-plane FIFO sizes */
- planea_params.fifo_size = intel_get_fifo_size(dev, 0);
- planeb_params.fifo_size = intel_get_fifo_size(dev, 1);
+ planea_params.fifo_size = dev_priv->display.get_fifo_size(dev, 0);
+ planeb_params.fifo_size = dev_priv->display.get_fifo_size(dev, 1);
planea_wm = intel_calculate_wm(planea_clock, &planea_params,
pixel_size, latency_ns);
I915_WRITE(FW_BLC2, fwater_hi);
}
-static void i830_update_wm(struct drm_device *dev, int planea_clock,
- int pixel_size)
+static void i830_update_wm(struct drm_device *dev, int planea_clock, int unused,
+ int unused2, int pixel_size)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t fwater_lo = I915_READ(FW_BLC) & ~0xfff;
int planea_wm;
- i830_wm_info.fifo_size = intel_get_fifo_size(dev, 0);
+ i830_wm_info.fifo_size = dev_priv->display.get_fifo_size(dev, 0);
planea_wm = intel_calculate_wm(planea_clock, &i830_wm_info,
pixel_size, latency_ns);
*/
static void intel_update_watermarks(struct drm_device *dev)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_crtc *crtc;
struct intel_crtc *intel_crtc;
int sr_hdisplay = 0;
else if (IS_IGD(dev))
igd_disable_cxsr(dev);
- if (IS_G4X(dev))
- g4x_update_wm(dev);
- else if (IS_I965G(dev))
- i965_update_wm(dev);
- else if (IS_I9XX(dev) || IS_MOBILE(dev))
- i9xx_update_wm(dev, planea_clock, planeb_clock, sr_hdisplay,
- pixel_size);
- else
- i830_update_wm(dev, planea_clock, pixel_size);
+ dev_priv->display.update_wm(dev, planea_clock, planeb_clock,
+ sr_hdisplay, pixel_size);
}
static int intel_crtc_mode_set(struct drm_crtc *crtc,
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int pipe = intel_crtc->pipe;
+ int plane = intel_crtc->plane;
int fp_reg = (pipe == 0) ? FPA0 : FPB0;
int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
int dpll_md_reg = (intel_crtc->pipe == 0) ? DPLL_A_MD : DPLL_B_MD;
- int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
+ int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
int htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
int hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
int vtot_reg = (pipe == 0) ? VTOTAL_A : VTOTAL_B;
int vblank_reg = (pipe == 0) ? VBLANK_A : VBLANK_B;
int vsync_reg = (pipe == 0) ? VSYNC_A : VSYNC_B;
- int dspsize_reg = (pipe == 0) ? DSPASIZE : DSPBSIZE;
- int dsppos_reg = (pipe == 0) ? DSPAPOS : DSPBPOS;
+ int dspsize_reg = (plane == 0) ? DSPASIZE : DSPBSIZE;
+ int dsppos_reg = (plane == 0) ? DSPAPOS : DSPBPOS;
int pipesrc_reg = (pipe == 0) ? PIPEASRC : PIPEBSRC;
int refclk, num_outputs = 0;
intel_clock_t clock, reduced_clock;
enable color space conversion */
if (!IS_IGDNG(dev)) {
if (pipe == 0)
- dspcntr |= DISPPLANE_SEL_PIPE_A;
+ dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
else
dspcntr |= DISPPLANE_SEL_PIPE_B;
}
* XXX: No double-wide on 915GM pipe B. Is that the only reason for the
* pipe == 0 check?
*/
- if (mode->clock > intel_get_core_clock_speed(dev) * 9 / 10)
+ if (mode->clock >
+ dev_priv->display.get_display_clock_speed(dev) * 9 / 10)
pipeconf |= PIPEACONF_DOUBLE_WIDE;
else
pipeconf &= ~PIPEACONF_DOUBLE_WIDE;
udelay(150);
if (IS_I965G(dev) && !IS_IGDNG(dev)) {
- sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
- I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
+ if (is_sdvo) {
+ sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
+ I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
((sdvo_pixel_multiply - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT));
+ } else
+ I915_WRITE(dpll_md_reg, 0);
} else {
/* write it again -- the BIOS does, after all */
I915_WRITE(dpll_reg, dpll);
/* Flush the plane changes */
ret = intel_pipe_set_base(crtc, x, y, old_fb);
+ if ((IS_I965G(dev) || plane == 0))
+ intel_update_fbc(crtc, &crtc->mode);
+
intel_update_watermarks(dev);
drm_vblank_post_modeset(dev, pipe);
struct drm_gem_object *bo;
struct drm_i915_gem_object *obj_priv;
int pipe = intel_crtc->pipe;
+ int plane = intel_crtc->plane;
uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR;
uint32_t base = (pipe == 0) ? CURABASE : CURBBASE;
uint32_t temp = I915_READ(control);
i915_gem_object_unpin(intel_crtc->cursor_bo);
drm_gem_object_unreference(intel_crtc->cursor_bo);
}
+
+ if ((IS_I965G(dev) || plane == 0))
+ intel_update_fbc(crtc, &crtc->mode);
+
mutex_unlock(&dev->struct_mutex);
intel_crtc->cursor_addr = addr;
intel_crtc->lut_b[i] = i;
}
+ /* Swap pipes & planes for FBC on pre-965 */
+ intel_crtc->pipe = pipe;
+ intel_crtc->plane = pipe;
+ if (IS_MOBILE(dev) && (IS_I9XX(dev) && !IS_I965G(dev))) {
+ DRM_DEBUG("swapping pipes & planes for FBC\n");
+ intel_crtc->plane = ((pipe == 0) ? 1 : 0);
+ }
+
intel_crtc->cursor_addr = 0;
intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF;
drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
}
}
+/* Set up chip specific display functions */
+static void intel_init_display(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ /* We always want a DPMS function */
+ if (IS_IGDNG(dev))
+ dev_priv->display.dpms = igdng_crtc_dpms;
+ else
+ dev_priv->display.dpms = i9xx_crtc_dpms;
+
+ /* Only mobile has FBC, leave pointers NULL for other chips */
+ if (IS_MOBILE(dev)) {
+ if (IS_GM45(dev)) {
+ dev_priv->display.fbc_enabled = g4x_fbc_enabled;
+ dev_priv->display.enable_fbc = g4x_enable_fbc;
+ dev_priv->display.disable_fbc = g4x_disable_fbc;
+ } else if (IS_I965GM(dev) || IS_I945GM(dev) || IS_I915GM(dev)) {
+ dev_priv->display.fbc_enabled = i8xx_fbc_enabled;
+ dev_priv->display.enable_fbc = i8xx_enable_fbc;
+ dev_priv->display.disable_fbc = i8xx_disable_fbc;
+ }
+ /* 855GM needs testing */
+ }
+
+ /* Returns the core display clock speed */
+ if (IS_I945G(dev))
+ dev_priv->display.get_display_clock_speed =
+ i945_get_display_clock_speed;
+ else if (IS_I915G(dev))
+ dev_priv->display.get_display_clock_speed =
+ i915_get_display_clock_speed;
+ else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev))
+ dev_priv->display.get_display_clock_speed =
+ i9xx_misc_get_display_clock_speed;
+ else if (IS_I915GM(dev))
+ dev_priv->display.get_display_clock_speed =
+ i915gm_get_display_clock_speed;
+ else if (IS_I865G(dev))
+ dev_priv->display.get_display_clock_speed =
+ i865_get_display_clock_speed;
+ else if (IS_I855(dev))
+ dev_priv->display.get_display_clock_speed =
+ i855_get_display_clock_speed;
+ else /* 852, 830 */
+ dev_priv->display.get_display_clock_speed =
+ i830_get_display_clock_speed;
+
+ /* For FIFO watermark updates */
+ if (IS_G4X(dev))
+ dev_priv->display.update_wm = g4x_update_wm;
+ else if (IS_I965G(dev))
+ dev_priv->display.update_wm = i965_update_wm;
+ else if (IS_I9XX(dev) || IS_MOBILE(dev)) {
+ dev_priv->display.update_wm = i9xx_update_wm;
+ dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
+ } else {
+ if (IS_I85X(dev))
+ dev_priv->display.get_fifo_size = i85x_get_fifo_size;
+ else if (IS_845G(dev))
+ dev_priv->display.get_fifo_size = i845_get_fifo_size;
+ else
+ dev_priv->display.get_fifo_size = i830_get_fifo_size;
+ dev_priv->display.update_wm = i830_update_wm;
+ }
+}
+
void intel_modeset_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
dev->mode_config.funcs = (void *)&intel_mode_funcs;
+ intel_init_display(dev);
+
if (IS_I965G(dev)) {
dev->mode_config.max_width = 8192;
dev->mode_config.max_height = 8192;
mutex_unlock(&dev->struct_mutex);
+ if (dev_priv->display.disable_fbc)
+ dev_priv->display.disable_fbc(dev);
+
drm_mode_config_cleanup(dev);
}
#include <linux/i2c.h>
#include <linux/i2c-id.h>
#include <linux/i2c-algo-bit.h>
+#include "i915_drv.h"
#include "drm_crtc.h"
#include "drm_crtc_helper.h"
struct intel_crtc {
struct drm_crtc base;
- int pipe;
- int plane;
+ enum pipe pipe;
+ enum plane plane;
struct drm_gem_object *cursor_bo;
uint32_t cursor_addr;
u8 lut_r[256], lut_g[256], lut_b[256];
* Jesse Barnes <jesse.barnes@intel.com>
*/
+#include <acpi/button.h>
#include <linux/dmi.h>
#include <linux/i2c.h>
#include "drmP.h"
goto out;
}
+ /* full screen scale for now */
+ if (IS_IGDNG(dev))
+ goto out;
+
/* 965+ wants fuzzy fitting */
if (IS_I965G(dev))
pfit_control |= (intel_crtc->pipe << PFIT_PIPE_SHIFT) |
* to register description and PRM.
* Change the value here to see the borders for debugging
*/
- I915_WRITE(BCLRPAT_A, 0);
- I915_WRITE(BCLRPAT_B, 0);
+ if (!IS_IGDNG(dev)) {
+ I915_WRITE(BCLRPAT_A, 0);
+ I915_WRITE(BCLRPAT_B, 0);
+ }
switch (lvds_priv->fitting_mode) {
case DRM_MODE_SCALE_CENTER:
* settings.
*/
- /* No panel fitting yet, fixme */
if (IS_IGDNG(dev))
return;
I915_WRITE(PFIT_CONTROL, lvds_priv->pfit_control);
}
+/* Some lid devices report incorrect lid status, assume they're connected */
+static const struct dmi_system_id bad_lid_status[] = {
+ {
+ .ident = "Aspire One",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire one"),
+ },
+ },
+ { }
+};
+
/**
* Detect the LVDS connection.
*
- * This always returns CONNECTOR_STATUS_CONNECTED. This connector should only have
- * been set up if the LVDS was actually connected anyway.
+ * Since LVDS doesn't have hotlug, we use the lid as a proxy. Open means
+ * connected and closed means disconnected. We also send hotplug events as
+ * needed, using lid status notification from the input layer.
*/
static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector)
{
- return connector_status_connected;
+ enum drm_connector_status status = connector_status_connected;
+
+ if (!acpi_lid_open() && !dmi_check_system(bad_lid_status))
+ status = connector_status_disconnected;
+
+ return status;
}
/**
return 0;
}
+static int intel_lid_notify(struct notifier_block *nb, unsigned long val,
+ void *unused)
+{
+ struct drm_i915_private *dev_priv =
+ container_of(nb, struct drm_i915_private, lid_notifier);
+ struct drm_device *dev = dev_priv->dev;
+
+ if (acpi_lid_open() && !dev_priv->suspended) {
+ mutex_lock(&dev->mode_config.mutex);
+ drm_helper_resume_force_mode(dev);
+ mutex_unlock(&dev->mode_config.mutex);
+ }
+
+ drm_sysfs_hotplug_event(dev_priv->dev);
+
+ return NOTIFY_OK;
+}
+
/**
* intel_lvds_destroy - unregister and free LVDS structures
* @connector: connector to free
*/
static void intel_lvds_destroy(struct drm_connector *connector)
{
+ struct drm_device *dev = connector->dev;
struct intel_output *intel_output = to_intel_output(connector);
+ struct drm_i915_private *dev_priv = dev->dev_private;
if (intel_output->ddc_bus)
intel_i2c_destroy(intel_output->ddc_bus);
+ if (dev_priv->lid_notifier.notifier_call)
+ acpi_lid_notifier_unregister(&dev_priv->lid_notifier);
drm_sysfs_connector_remove(connector);
drm_connector_cleanup(connector);
kfree(connector);
pwm |= PWM_PCH_ENABLE;
I915_WRITE(BLC_PWM_PCH_CTL1, pwm);
}
+ dev_priv->lid_notifier.notifier_call = intel_lid_notify;
+ if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) {
+ DRM_DEBUG("lid notifier registration failed\n");
+ dev_priv->lid_notifier.notifier_call = NULL;
+ }
drm_sysfs_connector_add(connector);
return;
struct intel_sdvo_dtd save_input_dtd_1, save_input_dtd_2;
struct intel_sdvo_dtd save_output_dtd[16];
u32 save_SDVOX;
+ /* add the property for the SDVO-TV */
+ struct drm_property *left_property;
+ struct drm_property *right_property;
+ struct drm_property *top_property;
+ struct drm_property *bottom_property;
+ struct drm_property *hpos_property;
+ struct drm_property *vpos_property;
+
+ /* add the property for the SDVO-TV/LVDS */
+ struct drm_property *brightness_property;
+ struct drm_property *contrast_property;
+ struct drm_property *saturation_property;
+ struct drm_property *hue_property;
+
+ /* Add variable to record current setting for the above property */
+ u32 left_margin, right_margin, top_margin, bottom_margin;
+ /* this is to get the range of margin.*/
+ u32 max_hscan, max_vscan;
+ u32 max_hpos, cur_hpos;
+ u32 max_vpos, cur_vpos;
+ u32 cur_brightness, max_brightness;
+ u32 cur_contrast, max_contrast;
+ u32 cur_saturation, max_saturation;
+ u32 cur_hue, max_hue;
};
static bool
SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT),
SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT),
SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS),
+ /* Add the op code for SDVO enhancements */
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_H),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_H),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_H),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_V),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_V),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_V),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SATURATION),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SATURATION),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SATURATION),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HUE),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HUE),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HUE),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_CONTRAST),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CONTRAST),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTRAST),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_BRIGHTNESS),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_BRIGHTNESS),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_BRIGHTNESS),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_H),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_H),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_H),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_V),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_V),
+ SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_V),
/* HDMI op code */
SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE),
SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE),
status = intel_sdvo_read_response(output, NULL, 0);
if (status != SDVO_CMD_STATUS_SUCCESS)
- DRM_DEBUG("%s: Failed to set TV format\n",
+ DRM_DEBUG_KMS("%s: Failed to set TV format\n",
SDVO_NAME(sdvo_priv));
}
return 1;
}
+static
+void intel_sdvo_destroy_enhance_property(struct drm_connector *connector)
+{
+ struct intel_output *intel_output = to_intel_output(connector);
+ struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+ struct drm_device *dev = connector->dev;
+
+ if (sdvo_priv->is_tv) {
+ if (sdvo_priv->left_property)
+ drm_property_destroy(dev, sdvo_priv->left_property);
+ if (sdvo_priv->right_property)
+ drm_property_destroy(dev, sdvo_priv->right_property);
+ if (sdvo_priv->top_property)
+ drm_property_destroy(dev, sdvo_priv->top_property);
+ if (sdvo_priv->bottom_property)
+ drm_property_destroy(dev, sdvo_priv->bottom_property);
+ if (sdvo_priv->hpos_property)
+ drm_property_destroy(dev, sdvo_priv->hpos_property);
+ if (sdvo_priv->vpos_property)
+ drm_property_destroy(dev, sdvo_priv->vpos_property);
+ }
+ if (sdvo_priv->is_tv) {
+ if (sdvo_priv->saturation_property)
+ drm_property_destroy(dev,
+ sdvo_priv->saturation_property);
+ if (sdvo_priv->contrast_property)
+ drm_property_destroy(dev,
+ sdvo_priv->contrast_property);
+ if (sdvo_priv->hue_property)
+ drm_property_destroy(dev, sdvo_priv->hue_property);
+ }
+ if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
+ if (sdvo_priv->brightness_property)
+ drm_property_destroy(dev,
+ sdvo_priv->brightness_property);
+ }
+ return;
+}
+
static void intel_sdvo_destroy(struct drm_connector *connector)
{
struct intel_output *intel_output = to_intel_output(connector);
drm_property_destroy(connector->dev,
sdvo_priv->tv_format_property);
+ if (sdvo_priv->is_tv || sdvo_priv->is_lvds)
+ intel_sdvo_destroy_enhance_property(connector);
+
drm_sysfs_connector_remove(connector);
drm_connector_cleanup(connector);
struct drm_crtc *crtc = encoder->crtc;
int ret = 0;
bool changed = false;
+ uint8_t cmd, status;
+ uint16_t temp_value;
ret = drm_connector_property_set_value(connector, property, val);
if (ret < 0)
sdvo_priv->tv_format_name = sdvo_priv->tv_format_supported[val];
changed = true;
- } else {
- ret = -EINVAL;
- goto out;
}
+ if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
+ cmd = 0;
+ temp_value = val;
+ if (sdvo_priv->left_property == property) {
+ drm_connector_property_set_value(connector,
+ sdvo_priv->right_property, val);
+ if (sdvo_priv->left_margin == temp_value)
+ goto out;
+
+ sdvo_priv->left_margin = temp_value;
+ sdvo_priv->right_margin = temp_value;
+ temp_value = sdvo_priv->max_hscan -
+ sdvo_priv->left_margin;
+ cmd = SDVO_CMD_SET_OVERSCAN_H;
+ } else if (sdvo_priv->right_property == property) {
+ drm_connector_property_set_value(connector,
+ sdvo_priv->left_property, val);
+ if (sdvo_priv->right_margin == temp_value)
+ goto out;
+
+ sdvo_priv->left_margin = temp_value;
+ sdvo_priv->right_margin = temp_value;
+ temp_value = sdvo_priv->max_hscan -
+ sdvo_priv->left_margin;
+ cmd = SDVO_CMD_SET_OVERSCAN_H;
+ } else if (sdvo_priv->top_property == property) {
+ drm_connector_property_set_value(connector,
+ sdvo_priv->bottom_property, val);
+ if (sdvo_priv->top_margin == temp_value)
+ goto out;
+
+ sdvo_priv->top_margin = temp_value;
+ sdvo_priv->bottom_margin = temp_value;
+ temp_value = sdvo_priv->max_vscan -
+ sdvo_priv->top_margin;
+ cmd = SDVO_CMD_SET_OVERSCAN_V;
+ } else if (sdvo_priv->bottom_property == property) {
+ drm_connector_property_set_value(connector,
+ sdvo_priv->top_property, val);
+ if (sdvo_priv->bottom_margin == temp_value)
+ goto out;
+ sdvo_priv->top_margin = temp_value;
+ sdvo_priv->bottom_margin = temp_value;
+ temp_value = sdvo_priv->max_vscan -
+ sdvo_priv->top_margin;
+ cmd = SDVO_CMD_SET_OVERSCAN_V;
+ } else if (sdvo_priv->hpos_property == property) {
+ if (sdvo_priv->cur_hpos == temp_value)
+ goto out;
+
+ cmd = SDVO_CMD_SET_POSITION_H;
+ sdvo_priv->cur_hpos = temp_value;
+ } else if (sdvo_priv->vpos_property == property) {
+ if (sdvo_priv->cur_vpos == temp_value)
+ goto out;
+
+ cmd = SDVO_CMD_SET_POSITION_V;
+ sdvo_priv->cur_vpos = temp_value;
+ } else if (sdvo_priv->saturation_property == property) {
+ if (sdvo_priv->cur_saturation == temp_value)
+ goto out;
+
+ cmd = SDVO_CMD_SET_SATURATION;
+ sdvo_priv->cur_saturation = temp_value;
+ } else if (sdvo_priv->contrast_property == property) {
+ if (sdvo_priv->cur_contrast == temp_value)
+ goto out;
+
+ cmd = SDVO_CMD_SET_CONTRAST;
+ sdvo_priv->cur_contrast = temp_value;
+ } else if (sdvo_priv->hue_property == property) {
+ if (sdvo_priv->cur_hue == temp_value)
+ goto out;
+
+ cmd = SDVO_CMD_SET_HUE;
+ sdvo_priv->cur_hue = temp_value;
+ } else if (sdvo_priv->brightness_property == property) {
+ if (sdvo_priv->cur_brightness == temp_value)
+ goto out;
+
+ cmd = SDVO_CMD_SET_BRIGHTNESS;
+ sdvo_priv->cur_brightness = temp_value;
+ }
+ if (cmd) {
+ intel_sdvo_write_cmd(intel_output, cmd, &temp_value, 2);
+ status = intel_sdvo_read_response(intel_output,
+ NULL, 0);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO command \n");
+ return -EINVAL;
+ }
+ changed = true;
+ }
+ }
if (changed && crtc)
drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x,
crtc->y, crtc->fb);
sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1;
encoder->encoder_type = DRM_MODE_ENCODER_DAC;
connector->connector_type = DRM_MODE_CONNECTOR_VGA;
+ intel_output->clone_mask = (1 << INTEL_SDVO_NON_TV_CLONE_BIT) |
+ (1 << INTEL_ANALOG_CLONE_BIT);
} else if (flags & SDVO_OUTPUT_LVDS0) {
sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0;
}
+static void intel_sdvo_create_enhance_property(struct drm_connector *connector)
+{
+ struct intel_output *intel_output = to_intel_output(connector);
+ struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+ struct intel_sdvo_enhancements_reply sdvo_data;
+ struct drm_device *dev = connector->dev;
+ uint8_t status;
+ uint16_t response, data_value[2];
+
+ intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS,
+ NULL, 0);
+ status = intel_sdvo_read_response(intel_output, &sdvo_data,
+ sizeof(sdvo_data));
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS(" incorrect response is returned\n");
+ return;
+ }
+ response = *((uint16_t *)&sdvo_data);
+ if (!response) {
+ DRM_DEBUG_KMS("No enhancement is supported\n");
+ return;
+ }
+ if (sdvo_priv->is_tv) {
+ /* when horizontal overscan is supported, Add the left/right
+ * property
+ */
+ if (sdvo_data.overscan_h) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_OVERSCAN_H, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO max "
+ "h_overscan\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_OVERSCAN_H, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO h_overscan\n");
+ return;
+ }
+ sdvo_priv->max_hscan = data_value[0];
+ sdvo_priv->left_margin = data_value[0] - response;
+ sdvo_priv->right_margin = sdvo_priv->left_margin;
+ sdvo_priv->left_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "left_margin", 2);
+ sdvo_priv->left_property->values[0] = 0;
+ sdvo_priv->left_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->left_property,
+ sdvo_priv->left_margin);
+ sdvo_priv->right_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "right_margin", 2);
+ sdvo_priv->right_property->values[0] = 0;
+ sdvo_priv->right_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->right_property,
+ sdvo_priv->right_margin);
+ DRM_DEBUG_KMS("h_overscan: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ if (sdvo_data.overscan_v) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_OVERSCAN_V, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO max "
+ "v_overscan\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_OVERSCAN_V, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO v_overscan\n");
+ return;
+ }
+ sdvo_priv->max_vscan = data_value[0];
+ sdvo_priv->top_margin = data_value[0] - response;
+ sdvo_priv->bottom_margin = sdvo_priv->top_margin;
+ sdvo_priv->top_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "top_margin", 2);
+ sdvo_priv->top_property->values[0] = 0;
+ sdvo_priv->top_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->top_property,
+ sdvo_priv->top_margin);
+ sdvo_priv->bottom_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "bottom_margin", 2);
+ sdvo_priv->bottom_property->values[0] = 0;
+ sdvo_priv->bottom_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->bottom_property,
+ sdvo_priv->bottom_margin);
+ DRM_DEBUG_KMS("v_overscan: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ if (sdvo_data.position_h) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_POSITION_H, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO Max h_pos\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_POSITION_H, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO get h_postion\n");
+ return;
+ }
+ sdvo_priv->max_hpos = data_value[0];
+ sdvo_priv->cur_hpos = response;
+ sdvo_priv->hpos_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "hpos", 2);
+ sdvo_priv->hpos_property->values[0] = 0;
+ sdvo_priv->hpos_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->hpos_property,
+ sdvo_priv->cur_hpos);
+ DRM_DEBUG_KMS("h_position: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ if (sdvo_data.position_v) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_POSITION_V, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO Max v_pos\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_POSITION_V, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO get v_postion\n");
+ return;
+ }
+ sdvo_priv->max_vpos = data_value[0];
+ sdvo_priv->cur_vpos = response;
+ sdvo_priv->vpos_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "vpos", 2);
+ sdvo_priv->vpos_property->values[0] = 0;
+ sdvo_priv->vpos_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->vpos_property,
+ sdvo_priv->cur_vpos);
+ DRM_DEBUG_KMS("v_position: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ }
+ if (sdvo_priv->is_tv) {
+ if (sdvo_data.saturation) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_SATURATION, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO Max sat\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_SATURATION, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO get sat\n");
+ return;
+ }
+ sdvo_priv->max_saturation = data_value[0];
+ sdvo_priv->cur_saturation = response;
+ sdvo_priv->saturation_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "saturation", 2);
+ sdvo_priv->saturation_property->values[0] = 0;
+ sdvo_priv->saturation_property->values[1] =
+ data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->saturation_property,
+ sdvo_priv->cur_saturation);
+ DRM_DEBUG_KMS("saturation: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ if (sdvo_data.contrast) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_CONTRAST, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO Max contrast\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_CONTRAST, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO get contrast\n");
+ return;
+ }
+ sdvo_priv->max_contrast = data_value[0];
+ sdvo_priv->cur_contrast = response;
+ sdvo_priv->contrast_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "contrast", 2);
+ sdvo_priv->contrast_property->values[0] = 0;
+ sdvo_priv->contrast_property->values[1] = data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->contrast_property,
+ sdvo_priv->cur_contrast);
+ DRM_DEBUG_KMS("contrast: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ if (sdvo_data.hue) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_HUE, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO Max hue\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_HUE, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO get hue\n");
+ return;
+ }
+ sdvo_priv->max_hue = data_value[0];
+ sdvo_priv->cur_hue = response;
+ sdvo_priv->hue_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "hue", 2);
+ sdvo_priv->hue_property->values[0] = 0;
+ sdvo_priv->hue_property->values[1] =
+ data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->hue_property,
+ sdvo_priv->cur_hue);
+ DRM_DEBUG_KMS("hue: max %d, default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ }
+ if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
+ if (sdvo_data.brightness) {
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_MAX_BRIGHTNESS, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &data_value, 4);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO Max bright\n");
+ return;
+ }
+ intel_sdvo_write_cmd(intel_output,
+ SDVO_CMD_GET_BRIGHTNESS, NULL, 0);
+ status = intel_sdvo_read_response(intel_output,
+ &response, 2);
+ if (status != SDVO_CMD_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("Incorrect SDVO get brigh\n");
+ return;
+ }
+ sdvo_priv->max_brightness = data_value[0];
+ sdvo_priv->cur_brightness = response;
+ sdvo_priv->brightness_property =
+ drm_property_create(dev, DRM_MODE_PROP_RANGE,
+ "brightness", 2);
+ sdvo_priv->brightness_property->values[0] = 0;
+ sdvo_priv->brightness_property->values[1] =
+ data_value[0];
+ drm_connector_attach_property(connector,
+ sdvo_priv->brightness_property,
+ sdvo_priv->cur_brightness);
+ DRM_DEBUG_KMS("brightness: max %d, "
+ "default %d, current %d\n",
+ data_value[0], data_value[1], response);
+ }
+ }
+ return;
+}
+
bool intel_sdvo_init(struct drm_device *dev, int output_device)
{
struct drm_connector *connector;
drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc);
if (sdvo_priv->is_tv)
intel_sdvo_tv_create_property(connector);
+
+ if (sdvo_priv->is_tv || sdvo_priv->is_lvds)
+ intel_sdvo_create_enhance_property(connector);
+
drm_sysfs_connector_add(connector);
intel_sdvo_select_ddc_bus(sdvo_priv);
This driver can also be built as a module. If so, the module
will be called f75375s.
-config SENSORS_FSCHER
- tristate "FSC Hermes (DEPRECATED)"
- depends on X86 && I2C
- help
- This driver is DEPRECATED please use the new merged fschmd
- ("FSC Poseidon, Scylla, Hermes, Heimdall and Heracles") driver
- instead.
-
- If you say yes here you get support for Fujitsu Siemens
- Computers Hermes sensor chips.
-
- This driver can also be built as a module. If so, the module
- will be called fscher.
-
-config SENSORS_FSCPOS
- tristate "FSC Poseidon (DEPRECATED)"
- depends on X86 && I2C
- help
- This driver is DEPRECATED please use the new merged fschmd
- ("FSC Poseidon, Scylla, Hermes, Heimdall and Heracles") driver
- instead.
-
- If you say yes here you get support for Fujitsu Siemens
- Computers Poseidon sensor chips.
-
- This driver can also be built as a module. If so, the module
- will be called fscpos.
-
config SENSORS_FSCHMD
tristate "Fujitsu Siemens Computers sensor chips"
depends on X86 && I2C
will be called gl520sm.
config SENSORS_CORETEMP
- tristate "Intel Core (2) Duo/Solo temperature sensor"
+ tristate "Intel Core/Core2/Atom temperature sensor"
depends on X86 && EXPERIMENTAL
help
If you say yes here you get support for the temperature
- sensor inside your CPU. Supported all are all known variants
- of Intel Core family.
+ sensor inside your CPU. Most of the family 6 CPUs
+ are supported. Check documentation/driver for details.
config SENSORS_IBMAEM
tristate "IBM Active Energy Manager temperature/power sensors and control"
obj-$(CONFIG_SENSORS_F71805F) += f71805f.o
obj-$(CONFIG_SENSORS_F71882FG) += f71882fg.o
obj-$(CONFIG_SENSORS_F75375S) += f75375s.o
-obj-$(CONFIG_SENSORS_FSCHER) += fscher.o
obj-$(CONFIG_SENSORS_FSCHMD) += fschmd.o
-obj-$(CONFIG_SENSORS_FSCPOS) += fscpos.o
obj-$(CONFIG_SENSORS_G760A) += g760a.o
obj-$(CONFIG_SENSORS_GL518SM) += gl518sm.o
obj-$(CONFIG_SENSORS_GL520SM) += gl520sm.o
#define ADM1031_REG_PWM (0x22)
#define ADM1031_REG_FAN_MIN(nr) (0x10 + (nr))
+#define ADM1031_REG_TEMP_OFFSET(nr) (0x0d + (nr))
#define ADM1031_REG_TEMP_MAX(nr) (0x14 + 4 * (nr))
#define ADM1031_REG_TEMP_MIN(nr) (0x15 + 4 * (nr))
#define ADM1031_REG_TEMP_CRIT(nr) (0x16 + 4 * (nr))
u8 auto_temp_min[3];
u8 auto_temp_off[3];
u8 auto_temp_max[3];
+ s8 temp_offset[3];
s8 temp_min[3];
s8 temp_max[3];
s8 temp_crit[3];
#define TEMP_FROM_REG_EXT(val, ext) (TEMP_FROM_REG(val) + (ext) * 125)
+#define TEMP_OFFSET_TO_REG(val) (TEMP_TO_REG(val) & 0x8f)
+#define TEMP_OFFSET_FROM_REG(val) TEMP_FROM_REG((val) < 0 ? \
+ (val) | 0x70 : (val))
+
#define FAN_FROM_REG(reg, div) ((reg) ? (11250 * 60) / ((reg) * (div)) : 0)
static int FAN_TO_REG(int reg, int div)
(((data->ext_temp[nr] >> ((nr - 1) * 3)) & 7));
return sprintf(buf, "%d\n", TEMP_FROM_REG_EXT(data->temp[nr], ext));
}
+static ssize_t show_temp_offset(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int nr = to_sensor_dev_attr(attr)->index;
+ struct adm1031_data *data = adm1031_update_device(dev);
+ return sprintf(buf, "%d\n",
+ TEMP_OFFSET_FROM_REG(data->temp_offset[nr]));
+}
static ssize_t show_temp_min(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct adm1031_data *data = adm1031_update_device(dev);
return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_crit[nr]));
}
+static ssize_t set_temp_offset(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct adm1031_data *data = i2c_get_clientdata(client);
+ int nr = to_sensor_dev_attr(attr)->index;
+ int val;
+
+ val = simple_strtol(buf, NULL, 10);
+ val = SENSORS_LIMIT(val, -15000, 15000);
+ mutex_lock(&data->update_lock);
+ data->temp_offset[nr] = TEMP_OFFSET_TO_REG(val);
+ adm1031_write_value(client, ADM1031_REG_TEMP_OFFSET(nr),
+ data->temp_offset[nr]);
+ mutex_unlock(&data->update_lock);
+ return count;
+}
static ssize_t set_temp_min(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
#define temp_reg(offset) \
static SENSOR_DEVICE_ATTR(temp##offset##_input, S_IRUGO, \
show_temp, NULL, offset - 1); \
+static SENSOR_DEVICE_ATTR(temp##offset##_offset, S_IRUGO | S_IWUSR, \
+ show_temp_offset, set_temp_offset, offset - 1); \
static SENSOR_DEVICE_ATTR(temp##offset##_min, S_IRUGO | S_IWUSR, \
show_temp_min, set_temp_min, offset - 1); \
static SENSOR_DEVICE_ATTR(temp##offset##_max, S_IRUGO | S_IWUSR, \
&sensor_dev_attr_pwm1.dev_attr.attr,
&sensor_dev_attr_auto_fan1_channel.dev_attr.attr,
&sensor_dev_attr_temp1_input.dev_attr.attr,
+ &sensor_dev_attr_temp1_offset.dev_attr.attr,
&sensor_dev_attr_temp1_min.dev_attr.attr,
&sensor_dev_attr_temp1_min_alarm.dev_attr.attr,
&sensor_dev_attr_temp1_max.dev_attr.attr,
&sensor_dev_attr_temp1_crit.dev_attr.attr,
&sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
&sensor_dev_attr_temp2_input.dev_attr.attr,
+ &sensor_dev_attr_temp2_offset.dev_attr.attr,
&sensor_dev_attr_temp2_min.dev_attr.attr,
&sensor_dev_attr_temp2_min_alarm.dev_attr.attr,
&sensor_dev_attr_temp2_max.dev_attr.attr,
&sensor_dev_attr_pwm2.dev_attr.attr,
&sensor_dev_attr_auto_fan2_channel.dev_attr.attr,
&sensor_dev_attr_temp3_input.dev_attr.attr,
+ &sensor_dev_attr_temp3_offset.dev_attr.attr,
&sensor_dev_attr_temp3_min.dev_attr.attr,
&sensor_dev_attr_temp3_min_alarm.dev_attr.attr,
&sensor_dev_attr_temp3_max.dev_attr.attr,
}
data->temp[chan] = newh;
+ data->temp_offset[chan] =
+ adm1031_read_value(client,
+ ADM1031_REG_TEMP_OFFSET(chan));
data->temp_min[chan] =
adm1031_read_value(client,
ADM1031_REG_TEMP_MIN(chan));
/* The 100C is default for both mobile and non mobile CPUs */
int tjmax = 100000;
- int ismobile = 1;
+ int tjmax_ee = 85000;
+ int usemsr_ee = 1;
int err;
u32 eax, edx;
/* Early chips have no MSR for TjMax */
if ((c->x86_model == 0xf) && (c->x86_mask < 4)) {
- ismobile = 0;
+ usemsr_ee = 0;
}
- if ((c->x86_model > 0xe) && (ismobile)) {
+ /* Atoms seems to have TjMax at 90C */
+
+ if (c->x86_model == 0x1c) {
+ usemsr_ee = 0;
+ tjmax = 90000;
+ }
+
+ if ((c->x86_model > 0xe) && (usemsr_ee)) {
+ u8 platform_id;
/* Now we can detect the mobile CPU using Intel provided table
http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
dev_warn(dev,
"Unable to access MSR 0x17, assuming desktop"
" CPU\n");
- ismobile = 0;
- } else if (!(eax & 0x10000000)) {
- ismobile = 0;
+ usemsr_ee = 0;
+ } else if (c->x86_model < 0x17 && !(eax & 0x10000000)) {
+ /* Trust bit 28 up to Penryn, I could not find any
+ documentation on that; if you happen to know
+ someone at Intel please ask */
+ usemsr_ee = 0;
+ } else {
+ /* Platform ID bits 52:50 (EDX starts at bit 32) */
+ platform_id = (edx >> 18) & 0x7;
+
+ /* Mobile Penryn CPU seems to be platform ID 7 or 5
+ (guesswork) */
+ if ((c->x86_model == 0x17) &&
+ ((platform_id == 5) || (platform_id == 7))) {
+ /* If MSR EE bit is set, set it to 90 degrees C,
+ otherwise 105 degrees C */
+ tjmax_ee = 90000;
+ tjmax = 105000;
+ }
}
}
- if (ismobile || c->x86_model == 0x1c) {
+ if (usemsr_ee) {
err = rdmsr_safe_on_cpu(id, 0xee, &eax, &edx);
if (err) {
"Unable to access MSR 0xEE, for Tjmax, left"
" at default");
} else if (eax & 0x40000000) {
- tjmax = 85000;
+ tjmax = tjmax_ee;
}
- } else {
+ /* if we dont use msr EE it means we are desktop CPU (with exeception
+ of Atom) */
+ } else if (tjmax == 100000) {
dev_warn(dev, "Using relative temperature scale!\n");
}
platform_set_drvdata(pdev, data);
/* read the still undocumented IA32_TEMPERATURE_TARGET it exists
- on older CPUs but not in this register */
+ on older CPUs but not in this register, Atoms don't have it either */
- if (c->x86_model > 0xe) {
+ if ((c->x86_model > 0xe) && (c->x86_model != 0x1c)) {
err = rdmsr_safe_on_cpu(data->id, 0x1a2, &eax, &edx);
if (err) {
dev_warn(&pdev->dev, "Unable to read"
for_each_online_cpu(i) {
struct cpuinfo_x86 *c = &cpu_data(i);
- /* check if family 6, models 0xe, 0xf, 0x16, 0x17, 0x1A */
+ /* check if family 6, models 0xe (Pentium M DC),
+ 0xf (Core 2 DC 65nm), 0x16 (Core 2 SC 65nm),
+ 0x17 (Penryn 45nm), 0x1a (Nehalem), 0x1c (Atom),
+ 0x1e (Lynnfield) */
if ((c->cpuid_level < 0) || (c->x86 != 0x6) ||
!((c->x86_model == 0xe) || (c->x86_model == 0xf) ||
(c->x86_model == 0x16) || (c->x86_model == 0x17) ||
- (c->x86_model == 0x1A) || (c->x86_model == 0x1c))) {
+ (c->x86_model == 0x1a) || (c->x86_model == 0x1c) ||
+ (c->x86_model == 0x1e))) {
/* supported CPU not found, but report the unknown
family 6 CPU */
+++ /dev/null
-/*
- * fscher.c - Part of lm_sensors, Linux kernel modules for hardware
- * monitoring
- * Copyright (C) 2003, 2004 Reinhard Nissl <rnissl@gmx.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- * fujitsu siemens hermes chip,
- * module based on fscpos.c
- * Copyright (C) 2000 Hermann Jung <hej@odn.de>
- * Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl>
- * and Philip Edelbrock <phil@netroedge.com>
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/i2c.h>
-#include <linux/hwmon.h>
-#include <linux/err.h>
-#include <linux/mutex.h>
-#include <linux/sysfs.h>
-
-/*
- * Addresses to scan
- */
-
-static const unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END };
-
-/*
- * Insmod parameters
- */
-
-I2C_CLIENT_INSMOD_1(fscher);
-
-/*
- * The FSCHER registers
- */
-
-/* chip identification */
-#define FSCHER_REG_IDENT_0 0x00
-#define FSCHER_REG_IDENT_1 0x01
-#define FSCHER_REG_IDENT_2 0x02
-#define FSCHER_REG_REVISION 0x03
-
-/* global control and status */
-#define FSCHER_REG_EVENT_STATE 0x04
-#define FSCHER_REG_CONTROL 0x05
-
-/* watchdog */
-#define FSCHER_REG_WDOG_PRESET 0x28
-#define FSCHER_REG_WDOG_STATE 0x23
-#define FSCHER_REG_WDOG_CONTROL 0x21
-
-/* fan 0 */
-#define FSCHER_REG_FAN0_MIN 0x55
-#define FSCHER_REG_FAN0_ACT 0x0e
-#define FSCHER_REG_FAN0_STATE 0x0d
-#define FSCHER_REG_FAN0_RIPPLE 0x0f
-
-/* fan 1 */
-#define FSCHER_REG_FAN1_MIN 0x65
-#define FSCHER_REG_FAN1_ACT 0x6b
-#define FSCHER_REG_FAN1_STATE 0x62
-#define FSCHER_REG_FAN1_RIPPLE 0x6f
-
-/* fan 2 */
-#define FSCHER_REG_FAN2_MIN 0xb5
-#define FSCHER_REG_FAN2_ACT 0xbb
-#define FSCHER_REG_FAN2_STATE 0xb2
-#define FSCHER_REG_FAN2_RIPPLE 0xbf
-
-/* voltage supervision */
-#define FSCHER_REG_VOLT_12 0x45
-#define FSCHER_REG_VOLT_5 0x42
-#define FSCHER_REG_VOLT_BATT 0x48
-
-/* temperature 0 */
-#define FSCHER_REG_TEMP0_ACT 0x64
-#define FSCHER_REG_TEMP0_STATE 0x71
-
-/* temperature 1 */
-#define FSCHER_REG_TEMP1_ACT 0x32
-#define FSCHER_REG_TEMP1_STATE 0x81
-
-/* temperature 2 */
-#define FSCHER_REG_TEMP2_ACT 0x35
-#define FSCHER_REG_TEMP2_STATE 0x91
-
-/*
- * Functions declaration
- */
-
-static int fscher_probe(struct i2c_client *client,
- const struct i2c_device_id *id);
-static int fscher_detect(struct i2c_client *client, int kind,
- struct i2c_board_info *info);
-static int fscher_remove(struct i2c_client *client);
-static struct fscher_data *fscher_update_device(struct device *dev);
-static void fscher_init_client(struct i2c_client *client);
-
-static int fscher_read_value(struct i2c_client *client, u8 reg);
-static int fscher_write_value(struct i2c_client *client, u8 reg, u8 value);
-
-/*
- * Driver data (common to all clients)
- */
-
-static const struct i2c_device_id fscher_id[] = {
- { "fscher", fscher },
- { }
-};
-
-static struct i2c_driver fscher_driver = {
- .class = I2C_CLASS_HWMON,
- .driver = {
- .name = "fscher",
- },
- .probe = fscher_probe,
- .remove = fscher_remove,
- .id_table = fscher_id,
- .detect = fscher_detect,
- .address_data = &addr_data,
-};
-
-/*
- * Client data (each client gets its own)
- */
-
-struct fscher_data {
- struct device *hwmon_dev;
- struct mutex update_lock;
- char valid; /* zero until following fields are valid */
- unsigned long last_updated; /* in jiffies */
-
- /* register values */
- u8 revision; /* revision of chip */
- u8 global_event; /* global event status */
- u8 global_control; /* global control register */
- u8 watchdog[3]; /* watchdog */
- u8 volt[3]; /* 12, 5, battery voltage */
- u8 temp_act[3]; /* temperature */
- u8 temp_status[3]; /* status of sensor */
- u8 fan_act[3]; /* fans revolutions per second */
- u8 fan_status[3]; /* fan status */
- u8 fan_min[3]; /* fan min value for rps */
- u8 fan_ripple[3]; /* divider for rps */
-};
-
-/*
- * Sysfs stuff
- */
-
-#define sysfs_r(kind, sub, offset, reg) \
-static ssize_t show_##kind##sub (struct fscher_data *, char *, int); \
-static ssize_t show_##kind##offset##sub (struct device *, struct device_attribute *attr, char *); \
-static ssize_t show_##kind##offset##sub (struct device *dev, struct device_attribute *attr, char *buf) \
-{ \
- struct fscher_data *data = fscher_update_device(dev); \
- return show_##kind##sub(data, buf, (offset)); \
-}
-
-#define sysfs_w(kind, sub, offset, reg) \
-static ssize_t set_##kind##sub (struct i2c_client *, struct fscher_data *, const char *, size_t, int, int); \
-static ssize_t set_##kind##offset##sub (struct device *, struct device_attribute *attr, const char *, size_t); \
-static ssize_t set_##kind##offset##sub (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) \
-{ \
- struct i2c_client *client = to_i2c_client(dev); \
- struct fscher_data *data = i2c_get_clientdata(client); \
- return set_##kind##sub(client, data, buf, count, (offset), reg); \
-}
-
-#define sysfs_rw_n(kind, sub, offset, reg) \
-sysfs_r(kind, sub, offset, reg) \
-sysfs_w(kind, sub, offset, reg) \
-static DEVICE_ATTR(kind##offset##sub, S_IRUGO | S_IWUSR, show_##kind##offset##sub, set_##kind##offset##sub);
-
-#define sysfs_rw(kind, sub, reg) \
-sysfs_r(kind, sub, 0, reg) \
-sysfs_w(kind, sub, 0, reg) \
-static DEVICE_ATTR(kind##sub, S_IRUGO | S_IWUSR, show_##kind##0##sub, set_##kind##0##sub);
-
-#define sysfs_ro_n(kind, sub, offset, reg) \
-sysfs_r(kind, sub, offset, reg) \
-static DEVICE_ATTR(kind##offset##sub, S_IRUGO, show_##kind##offset##sub, NULL);
-
-#define sysfs_ro(kind, sub, reg) \
-sysfs_r(kind, sub, 0, reg) \
-static DEVICE_ATTR(kind, S_IRUGO, show_##kind##0##sub, NULL);
-
-#define sysfs_fan(offset, reg_status, reg_min, reg_ripple, reg_act) \
-sysfs_rw_n(pwm, , offset, reg_min) \
-sysfs_rw_n(fan, _status, offset, reg_status) \
-sysfs_rw_n(fan, _div , offset, reg_ripple) \
-sysfs_ro_n(fan, _input , offset, reg_act)
-
-#define sysfs_temp(offset, reg_status, reg_act) \
-sysfs_rw_n(temp, _status, offset, reg_status) \
-sysfs_ro_n(temp, _input , offset, reg_act)
-
-#define sysfs_in(offset, reg_act) \
-sysfs_ro_n(in, _input, offset, reg_act)
-
-#define sysfs_revision(reg_revision) \
-sysfs_ro(revision, , reg_revision)
-
-#define sysfs_alarms(reg_events) \
-sysfs_ro(alarms, , reg_events)
-
-#define sysfs_control(reg_control) \
-sysfs_rw(control, , reg_control)
-
-#define sysfs_watchdog(reg_control, reg_status, reg_preset) \
-sysfs_rw(watchdog, _control, reg_control) \
-sysfs_rw(watchdog, _status , reg_status) \
-sysfs_rw(watchdog, _preset , reg_preset)
-
-sysfs_fan(1, FSCHER_REG_FAN0_STATE, FSCHER_REG_FAN0_MIN,
- FSCHER_REG_FAN0_RIPPLE, FSCHER_REG_FAN0_ACT)
-sysfs_fan(2, FSCHER_REG_FAN1_STATE, FSCHER_REG_FAN1_MIN,
- FSCHER_REG_FAN1_RIPPLE, FSCHER_REG_FAN1_ACT)
-sysfs_fan(3, FSCHER_REG_FAN2_STATE, FSCHER_REG_FAN2_MIN,
- FSCHER_REG_FAN2_RIPPLE, FSCHER_REG_FAN2_ACT)
-
-sysfs_temp(1, FSCHER_REG_TEMP0_STATE, FSCHER_REG_TEMP0_ACT)
-sysfs_temp(2, FSCHER_REG_TEMP1_STATE, FSCHER_REG_TEMP1_ACT)
-sysfs_temp(3, FSCHER_REG_TEMP2_STATE, FSCHER_REG_TEMP2_ACT)
-
-sysfs_in(0, FSCHER_REG_VOLT_12)
-sysfs_in(1, FSCHER_REG_VOLT_5)
-sysfs_in(2, FSCHER_REG_VOLT_BATT)
-
-sysfs_revision(FSCHER_REG_REVISION)
-sysfs_alarms(FSCHER_REG_EVENTS)
-sysfs_control(FSCHER_REG_CONTROL)
-sysfs_watchdog(FSCHER_REG_WDOG_CONTROL, FSCHER_REG_WDOG_STATE, FSCHER_REG_WDOG_PRESET)
-
-static struct attribute *fscher_attributes[] = {
- &dev_attr_revision.attr,
- &dev_attr_alarms.attr,
- &dev_attr_control.attr,
-
- &dev_attr_watchdog_status.attr,
- &dev_attr_watchdog_control.attr,
- &dev_attr_watchdog_preset.attr,
-
- &dev_attr_in0_input.attr,
- &dev_attr_in1_input.attr,
- &dev_attr_in2_input.attr,
-
- &dev_attr_fan1_status.attr,
- &dev_attr_fan1_div.attr,
- &dev_attr_fan1_input.attr,
- &dev_attr_pwm1.attr,
- &dev_attr_fan2_status.attr,
- &dev_attr_fan2_div.attr,
- &dev_attr_fan2_input.attr,
- &dev_attr_pwm2.attr,
- &dev_attr_fan3_status.attr,
- &dev_attr_fan3_div.attr,
- &dev_attr_fan3_input.attr,
- &dev_attr_pwm3.attr,
-
- &dev_attr_temp1_status.attr,
- &dev_attr_temp1_input.attr,
- &dev_attr_temp2_status.attr,
- &dev_attr_temp2_input.attr,
- &dev_attr_temp3_status.attr,
- &dev_attr_temp3_input.attr,
- NULL
-};
-
-static const struct attribute_group fscher_group = {
- .attrs = fscher_attributes,
-};
-
-/*
- * Real code
- */
-
-/* Return 0 if detection is successful, -ENODEV otherwise */
-static int fscher_detect(struct i2c_client *new_client, int kind,
- struct i2c_board_info *info)
-{
- struct i2c_adapter *adapter = new_client->adapter;
-
- if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
- return -ENODEV;
-
- /* Do the remaining detection unless force or force_fscher parameter */
- if (kind < 0) {
- if ((i2c_smbus_read_byte_data(new_client,
- FSCHER_REG_IDENT_0) != 0x48) /* 'H' */
- || (i2c_smbus_read_byte_data(new_client,
- FSCHER_REG_IDENT_1) != 0x45) /* 'E' */
- || (i2c_smbus_read_byte_data(new_client,
- FSCHER_REG_IDENT_2) != 0x52)) /* 'R' */
- return -ENODEV;
- }
-
- strlcpy(info->type, "fscher", I2C_NAME_SIZE);
-
- return 0;
-}
-
-static int fscher_probe(struct i2c_client *new_client,
- const struct i2c_device_id *id)
-{
- struct fscher_data *data;
- int err;
-
- data = kzalloc(sizeof(struct fscher_data), GFP_KERNEL);
- if (!data) {
- err = -ENOMEM;
- goto exit;
- }
-
- i2c_set_clientdata(new_client, data);
- data->valid = 0;
- mutex_init(&data->update_lock);
-
- fscher_init_client(new_client);
-
- /* Register sysfs hooks */
- if ((err = sysfs_create_group(&new_client->dev.kobj, &fscher_group)))
- goto exit_free;
-
- data->hwmon_dev = hwmon_device_register(&new_client->dev);
- if (IS_ERR(data->hwmon_dev)) {
- err = PTR_ERR(data->hwmon_dev);
- goto exit_remove_files;
- }
-
- return 0;
-
-exit_remove_files:
- sysfs_remove_group(&new_client->dev.kobj, &fscher_group);
-exit_free:
- kfree(data);
-exit:
- return err;
-}
-
-static int fscher_remove(struct i2c_client *client)
-{
- struct fscher_data *data = i2c_get_clientdata(client);
-
- hwmon_device_unregister(data->hwmon_dev);
- sysfs_remove_group(&client->dev.kobj, &fscher_group);
-
- kfree(data);
- return 0;
-}
-
-static int fscher_read_value(struct i2c_client *client, u8 reg)
-{
- dev_dbg(&client->dev, "read reg 0x%02x\n", reg);
-
- return i2c_smbus_read_byte_data(client, reg);
-}
-
-static int fscher_write_value(struct i2c_client *client, u8 reg, u8 value)
-{
- dev_dbg(&client->dev, "write reg 0x%02x, val 0x%02x\n",
- reg, value);
-
- return i2c_smbus_write_byte_data(client, reg, value);
-}
-
-/* Called when we have found a new FSC Hermes. */
-static void fscher_init_client(struct i2c_client *client)
-{
- struct fscher_data *data = i2c_get_clientdata(client);
-
- /* Read revision from chip */
- data->revision = fscher_read_value(client, FSCHER_REG_REVISION);
-}
-
-static struct fscher_data *fscher_update_device(struct device *dev)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct fscher_data *data = i2c_get_clientdata(client);
-
- mutex_lock(&data->update_lock);
-
- if (time_after(jiffies, data->last_updated + 2 * HZ) || !data->valid) {
-
- dev_dbg(&client->dev, "Starting fscher update\n");
-
- data->temp_act[0] = fscher_read_value(client, FSCHER_REG_TEMP0_ACT);
- data->temp_act[1] = fscher_read_value(client, FSCHER_REG_TEMP1_ACT);
- data->temp_act[2] = fscher_read_value(client, FSCHER_REG_TEMP2_ACT);
- data->temp_status[0] = fscher_read_value(client, FSCHER_REG_TEMP0_STATE);
- data->temp_status[1] = fscher_read_value(client, FSCHER_REG_TEMP1_STATE);
- data->temp_status[2] = fscher_read_value(client, FSCHER_REG_TEMP2_STATE);
-
- data->volt[0] = fscher_read_value(client, FSCHER_REG_VOLT_12);
- data->volt[1] = fscher_read_value(client, FSCHER_REG_VOLT_5);
- data->volt[2] = fscher_read_value(client, FSCHER_REG_VOLT_BATT);
-
- data->fan_act[0] = fscher_read_value(client, FSCHER_REG_FAN0_ACT);
- data->fan_act[1] = fscher_read_value(client, FSCHER_REG_FAN1_ACT);
- data->fan_act[2] = fscher_read_value(client, FSCHER_REG_FAN2_ACT);
- data->fan_status[0] = fscher_read_value(client, FSCHER_REG_FAN0_STATE);
- data->fan_status[1] = fscher_read_value(client, FSCHER_REG_FAN1_STATE);
- data->fan_status[2] = fscher_read_value(client, FSCHER_REG_FAN2_STATE);
- data->fan_min[0] = fscher_read_value(client, FSCHER_REG_FAN0_MIN);
- data->fan_min[1] = fscher_read_value(client, FSCHER_REG_FAN1_MIN);
- data->fan_min[2] = fscher_read_value(client, FSCHER_REG_FAN2_MIN);
- data->fan_ripple[0] = fscher_read_value(client, FSCHER_REG_FAN0_RIPPLE);
- data->fan_ripple[1] = fscher_read_value(client, FSCHER_REG_FAN1_RIPPLE);
- data->fan_ripple[2] = fscher_read_value(client, FSCHER_REG_FAN2_RIPPLE);
-
- data->watchdog[0] = fscher_read_value(client, FSCHER_REG_WDOG_PRESET);
- data->watchdog[1] = fscher_read_value(client, FSCHER_REG_WDOG_STATE);
- data->watchdog[2] = fscher_read_value(client, FSCHER_REG_WDOG_CONTROL);
-
- data->global_event = fscher_read_value(client, FSCHER_REG_EVENT_STATE);
- data->global_control = fscher_read_value(client,
- FSCHER_REG_CONTROL);
-
- data->last_updated = jiffies;
- data->valid = 1;
- }
-
- mutex_unlock(&data->update_lock);
-
- return data;
-}
-
-
-
-#define FAN_INDEX_FROM_NUM(nr) ((nr) - 1)
-
-static ssize_t set_fan_status(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- /* bits 0..1, 3..7 reserved => mask with 0x04 */
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0x04;
-
- mutex_lock(&data->update_lock);
- data->fan_status[FAN_INDEX_FROM_NUM(nr)] &= ~v;
- fscher_write_value(client, reg, v);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_fan_status(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 0..1, 3..7 reserved => mask with 0x04 */
- return sprintf(buf, "%u\n", data->fan_status[FAN_INDEX_FROM_NUM(nr)] & 0x04);
-}
-
-static ssize_t set_pwm(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- unsigned long v = simple_strtoul(buf, NULL, 10);
-
- mutex_lock(&data->update_lock);
- data->fan_min[FAN_INDEX_FROM_NUM(nr)] = v > 0xff ? 0xff : v;
- fscher_write_value(client, reg, data->fan_min[FAN_INDEX_FROM_NUM(nr)]);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_pwm(struct fscher_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", data->fan_min[FAN_INDEX_FROM_NUM(nr)]);
-}
-
-static ssize_t set_fan_div(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- /* supported values: 2, 4, 8 */
- unsigned long v = simple_strtoul(buf, NULL, 10);
-
- switch (v) {
- case 2: v = 1; break;
- case 4: v = 2; break;
- case 8: v = 3; break;
- default:
- dev_err(&client->dev, "fan_div value %ld not "
- "supported. Choose one of 2, 4 or 8!\n", v);
- return -EINVAL;
- }
-
- mutex_lock(&data->update_lock);
-
- /* bits 2..7 reserved => mask with 0x03 */
- data->fan_ripple[FAN_INDEX_FROM_NUM(nr)] &= ~0x03;
- data->fan_ripple[FAN_INDEX_FROM_NUM(nr)] |= v;
-
- fscher_write_value(client, reg, data->fan_ripple[FAN_INDEX_FROM_NUM(nr)]);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_fan_div(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 2..7 reserved => mask with 0x03 */
- return sprintf(buf, "%u\n", 1 << (data->fan_ripple[FAN_INDEX_FROM_NUM(nr)] & 0x03));
-}
-
-#define RPM_FROM_REG(val) (val*60)
-
-static ssize_t show_fan_input (struct fscher_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", RPM_FROM_REG(data->fan_act[FAN_INDEX_FROM_NUM(nr)]));
-}
-
-
-
-#define TEMP_INDEX_FROM_NUM(nr) ((nr) - 1)
-
-static ssize_t set_temp_status(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- /* bits 2..7 reserved, 0 read only => mask with 0x02 */
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0x02;
-
- mutex_lock(&data->update_lock);
- data->temp_status[TEMP_INDEX_FROM_NUM(nr)] &= ~v;
- fscher_write_value(client, reg, v);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_temp_status(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 2..7 reserved => mask with 0x03 */
- return sprintf(buf, "%u\n", data->temp_status[TEMP_INDEX_FROM_NUM(nr)] & 0x03);
-}
-
-#define TEMP_FROM_REG(val) (((val) - 128) * 1000)
-
-static ssize_t show_temp_input(struct fscher_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_act[TEMP_INDEX_FROM_NUM(nr)]));
-}
-
-/*
- * The final conversion is specified in sensors.conf, as it depends on
- * mainboard specific values. We export the registers contents as
- * pseudo-hundredths-of-Volts (range 0V - 2.55V). Not that it makes much
- * sense per se, but it minimizes the conversions count and keeps the
- * values within a usual range.
- */
-#define VOLT_FROM_REG(val) ((val) * 10)
-
-static ssize_t show_in_input(struct fscher_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[nr]));
-}
-
-
-
-static ssize_t show_revision(struct fscher_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", data->revision);
-}
-
-
-
-static ssize_t show_alarms(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 2, 5..6 reserved => mask with 0x9b */
- return sprintf(buf, "%u\n", data->global_event & 0x9b);
-}
-
-
-
-static ssize_t set_control(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- /* bits 1..7 reserved => mask with 0x01 */
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0x01;
-
- mutex_lock(&data->update_lock);
- data->global_control = v;
- fscher_write_value(client, reg, v);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_control(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 1..7 reserved => mask with 0x01 */
- return sprintf(buf, "%u\n", data->global_control & 0x01);
-}
-
-
-
-static ssize_t set_watchdog_control(struct i2c_client *client, struct
- fscher_data *data, const char *buf, size_t count,
- int nr, int reg)
-{
- /* bits 0..3 reserved => mask with 0xf0 */
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0xf0;
-
- mutex_lock(&data->update_lock);
- data->watchdog[2] &= ~0xf0;
- data->watchdog[2] |= v;
- fscher_write_value(client, reg, data->watchdog[2]);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_watchdog_control(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 0..3 reserved, bit 5 write only => mask with 0xd0 */
- return sprintf(buf, "%u\n", data->watchdog[2] & 0xd0);
-}
-
-static ssize_t set_watchdog_status(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- /* bits 0, 2..7 reserved => mask with 0x02 */
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0x02;
-
- mutex_lock(&data->update_lock);
- data->watchdog[1] &= ~v;
- fscher_write_value(client, reg, v);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_watchdog_status(struct fscher_data *data, char *buf, int nr)
-{
- /* bits 0, 2..7 reserved => mask with 0x02 */
- return sprintf(buf, "%u\n", data->watchdog[1] & 0x02);
-}
-
-static ssize_t set_watchdog_preset(struct i2c_client *client, struct fscher_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0xff;
-
- mutex_lock(&data->update_lock);
- data->watchdog[0] = v;
- fscher_write_value(client, reg, data->watchdog[0]);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_watchdog_preset(struct fscher_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", data->watchdog[0]);
-}
-
-static int __init sensors_fscher_init(void)
-{
- return i2c_add_driver(&fscher_driver);
-}
-
-static void __exit sensors_fscher_exit(void)
-{
- i2c_del_driver(&fscher_driver);
-}
-
-MODULE_AUTHOR("Reinhard Nissl <rnissl@gmx.de>");
-MODULE_DESCRIPTION("FSC Hermes driver");
-MODULE_LICENSE("GPL");
-
-module_init(sensors_fscher_init);
-module_exit(sensors_fscher_exit);
+++ /dev/null
-/*
- fscpos.c - Kernel module for hardware monitoring with FSC Poseidon chips
- Copyright (C) 2004, 2005 Stefan Ott <stefan@desire.ch>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-/*
- fujitsu siemens poseidon chip,
- module based on the old fscpos module by Hermann Jung <hej@odn.de> and
- the fscher module by Reinhard Nissl <rnissl@gmx.de>
-
- original module based on lm80.c
- Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl>
- and Philip Edelbrock <phil@netroedge.com>
-
- Thanks to Jean Delvare for reviewing my code and suggesting a lot of
- improvements.
-*/
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/i2c.h>
-#include <linux/init.h>
-#include <linux/hwmon.h>
-#include <linux/err.h>
-#include <linux/mutex.h>
-#include <linux/sysfs.h>
-
-/*
- * Addresses to scan
- */
-static const unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END };
-
-/*
- * Insmod parameters
- */
-I2C_CLIENT_INSMOD_1(fscpos);
-
-/*
- * The FSCPOS registers
- */
-
-/* chip identification */
-#define FSCPOS_REG_IDENT_0 0x00
-#define FSCPOS_REG_IDENT_1 0x01
-#define FSCPOS_REG_IDENT_2 0x02
-#define FSCPOS_REG_REVISION 0x03
-
-/* global control and status */
-#define FSCPOS_REG_EVENT_STATE 0x04
-#define FSCPOS_REG_CONTROL 0x05
-
-/* watchdog */
-#define FSCPOS_REG_WDOG_PRESET 0x28
-#define FSCPOS_REG_WDOG_STATE 0x23
-#define FSCPOS_REG_WDOG_CONTROL 0x21
-
-/* voltages */
-#define FSCPOS_REG_VOLT_12 0x45
-#define FSCPOS_REG_VOLT_5 0x42
-#define FSCPOS_REG_VOLT_BATT 0x48
-
-/* fans - the chip does not support minimum speed for fan2 */
-static u8 FSCPOS_REG_PWM[] = { 0x55, 0x65 };
-static u8 FSCPOS_REG_FAN_ACT[] = { 0x0e, 0x6b, 0xab };
-static u8 FSCPOS_REG_FAN_STATE[] = { 0x0d, 0x62, 0xa2 };
-static u8 FSCPOS_REG_FAN_RIPPLE[] = { 0x0f, 0x6f, 0xaf };
-
-/* temperatures */
-static u8 FSCPOS_REG_TEMP_ACT[] = { 0x64, 0x32, 0x35 };
-static u8 FSCPOS_REG_TEMP_STATE[] = { 0x71, 0x81, 0x91 };
-
-/*
- * Functions declaration
- */
-static int fscpos_probe(struct i2c_client *client,
- const struct i2c_device_id *id);
-static int fscpos_detect(struct i2c_client *client, int kind,
- struct i2c_board_info *info);
-static int fscpos_remove(struct i2c_client *client);
-
-static int fscpos_read_value(struct i2c_client *client, u8 reg);
-static int fscpos_write_value(struct i2c_client *client, u8 reg, u8 value);
-static struct fscpos_data *fscpos_update_device(struct device *dev);
-static void fscpos_init_client(struct i2c_client *client);
-
-static void reset_fan_alarm(struct i2c_client *client, int nr);
-
-/*
- * Driver data (common to all clients)
- */
-static const struct i2c_device_id fscpos_id[] = {
- { "fscpos", fscpos },
- { }
-};
-
-static struct i2c_driver fscpos_driver = {
- .class = I2C_CLASS_HWMON,
- .driver = {
- .name = "fscpos",
- },
- .probe = fscpos_probe,
- .remove = fscpos_remove,
- .id_table = fscpos_id,
- .detect = fscpos_detect,
- .address_data = &addr_data,
-};
-
-/*
- * Client data (each client gets its own)
- */
-struct fscpos_data {
- struct device *hwmon_dev;
- struct mutex update_lock;
- char valid; /* 0 until following fields are valid */
- unsigned long last_updated; /* In jiffies */
-
- /* register values */
- u8 revision; /* revision of chip */
- u8 global_event; /* global event status */
- u8 global_control; /* global control register */
- u8 wdog_control; /* watchdog control */
- u8 wdog_state; /* watchdog status */
- u8 wdog_preset; /* watchdog preset */
- u8 volt[3]; /* 12, 5, battery current */
- u8 temp_act[3]; /* temperature */
- u8 temp_status[3]; /* status of sensor */
- u8 fan_act[3]; /* fans revolutions per second */
- u8 fan_status[3]; /* fan status */
- u8 pwm[2]; /* fan min value for rps */
- u8 fan_ripple[3]; /* divider for rps */
-};
-
-/* Temperature */
-#define TEMP_FROM_REG(val) (((val) - 128) * 1000)
-
-static ssize_t show_temp_input(struct fscpos_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_act[nr - 1]));
-}
-
-static ssize_t show_temp_status(struct fscpos_data *data, char *buf, int nr)
-{
- /* bits 2..7 reserved => mask with 0x03 */
- return sprintf(buf, "%u\n", data->temp_status[nr - 1] & 0x03);
-}
-
-static ssize_t show_temp_reset(struct fscpos_data *data, char *buf, int nr)
-{
- return sprintf(buf, "1\n");
-}
-
-static ssize_t set_temp_reset(struct i2c_client *client, struct fscpos_data
- *data, const char *buf, size_t count, int nr, int reg)
-{
- unsigned long v = simple_strtoul(buf, NULL, 10);
- if (v != 1) {
- dev_err(&client->dev, "temp_reset value %ld not supported. "
- "Use 1 to reset the alarm!\n", v);
- return -EINVAL;
- }
-
- dev_info(&client->dev, "You used the temp_reset feature which has not "
- "been proplerly tested. Please report your "
- "experience to the module author.\n");
-
- /* Supported value: 2 (clears the status) */
- fscpos_write_value(client, FSCPOS_REG_TEMP_STATE[nr - 1], 2);
- return count;
-}
-
-/* Fans */
-#define RPM_FROM_REG(val) ((val) * 60)
-
-static ssize_t show_fan_status(struct fscpos_data *data, char *buf, int nr)
-{
- /* bits 0..1, 3..7 reserved => mask with 0x04 */
- return sprintf(buf, "%u\n", data->fan_status[nr - 1] & 0x04);
-}
-
-static ssize_t show_fan_input(struct fscpos_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", RPM_FROM_REG(data->fan_act[nr - 1]));
-}
-
-static ssize_t show_fan_ripple(struct fscpos_data *data, char *buf, int nr)
-{
- /* bits 2..7 reserved => mask with 0x03 */
- return sprintf(buf, "%u\n", data->fan_ripple[nr - 1] & 0x03);
-}
-
-static ssize_t set_fan_ripple(struct i2c_client *client, struct fscpos_data
- *data, const char *buf, size_t count, int nr, int reg)
-{
- /* supported values: 2, 4, 8 */
- unsigned long v = simple_strtoul(buf, NULL, 10);
-
- switch (v) {
- case 2: v = 1; break;
- case 4: v = 2; break;
- case 8: v = 3; break;
- default:
- dev_err(&client->dev, "fan_ripple value %ld not supported. "
- "Must be one of 2, 4 or 8!\n", v);
- return -EINVAL;
- }
-
- mutex_lock(&data->update_lock);
- /* bits 2..7 reserved => mask with 0x03 */
- data->fan_ripple[nr - 1] &= ~0x03;
- data->fan_ripple[nr - 1] |= v;
-
- fscpos_write_value(client, reg, data->fan_ripple[nr - 1]);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_pwm(struct fscpos_data *data, char *buf, int nr)
-{
- return sprintf(buf, "%u\n", data->pwm[nr - 1]);
-}
-
-static ssize_t set_pwm(struct i2c_client *client, struct fscpos_data *data,
- const char *buf, size_t count, int nr, int reg)
-{
- unsigned long v = simple_strtoul(buf, NULL, 10);
-
- /* Range: 0..255 */
- if (v < 0) v = 0;
- if (v > 255) v = 255;
-
- mutex_lock(&data->update_lock);
- data->pwm[nr - 1] = v;
- fscpos_write_value(client, reg, data->pwm[nr - 1]);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static void reset_fan_alarm(struct i2c_client *client, int nr)
-{
- fscpos_write_value(client, FSCPOS_REG_FAN_STATE[nr], 4);
-}
-
-/* Volts */
-#define VOLT_FROM_REG(val, mult) ((val) * (mult) / 255)
-
-static ssize_t show_volt_12(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct fscpos_data *data = fscpos_update_device(dev);
- return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[0], 14200));
-}
-
-static ssize_t show_volt_5(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct fscpos_data *data = fscpos_update_device(dev);
- return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[1], 6600));
-}
-
-static ssize_t show_volt_batt(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct fscpos_data *data = fscpos_update_device(dev);
- return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[2], 3300));
-}
-
-/* Watchdog */
-static ssize_t show_wdog_control(struct fscpos_data *data, char *buf)
-{
- /* bits 0..3 reserved, bit 6 write only => mask with 0xb0 */
- return sprintf(buf, "%u\n", data->wdog_control & 0xb0);
-}
-
-static ssize_t set_wdog_control(struct i2c_client *client, struct fscpos_data
- *data, const char *buf, size_t count, int reg)
-{
- /* bits 0..3 reserved => mask with 0xf0 */
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0xf0;
-
- mutex_lock(&data->update_lock);
- data->wdog_control &= ~0xf0;
- data->wdog_control |= v;
- fscpos_write_value(client, reg, data->wdog_control);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_wdog_state(struct fscpos_data *data, char *buf)
-{
- /* bits 0, 2..7 reserved => mask with 0x02 */
- return sprintf(buf, "%u\n", data->wdog_state & 0x02);
-}
-
-static ssize_t set_wdog_state(struct i2c_client *client, struct fscpos_data
- *data, const char *buf, size_t count, int reg)
-{
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0x02;
-
- /* Valid values: 2 (clear) */
- if (v != 2) {
- dev_err(&client->dev, "wdog_state value %ld not supported. "
- "Must be 2 to clear the state!\n", v);
- return -EINVAL;
- }
-
- mutex_lock(&data->update_lock);
- data->wdog_state &= ~v;
- fscpos_write_value(client, reg, v);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-static ssize_t show_wdog_preset(struct fscpos_data *data, char *buf)
-{
- return sprintf(buf, "%u\n", data->wdog_preset);
-}
-
-static ssize_t set_wdog_preset(struct i2c_client *client, struct fscpos_data
- *data, const char *buf, size_t count, int reg)
-{
- unsigned long v = simple_strtoul(buf, NULL, 10) & 0xff;
-
- mutex_lock(&data->update_lock);
- data->wdog_preset = v;
- fscpos_write_value(client, reg, data->wdog_preset);
- mutex_unlock(&data->update_lock);
- return count;
-}
-
-/* Event */
-static ssize_t show_event(struct device *dev, struct device_attribute *attr, char *buf)
-{
- /* bits 5..7 reserved => mask with 0x1f */
- struct fscpos_data *data = fscpos_update_device(dev);
- return sprintf(buf, "%u\n", data->global_event & 0x9b);
-}
-
-/*
- * Sysfs stuff
- */
-#define create_getter(kind, sub) \
- static ssize_t sysfs_show_##kind##sub(struct device *dev, struct device_attribute *attr, char *buf) \
- { \
- struct fscpos_data *data = fscpos_update_device(dev); \
- return show_##kind##sub(data, buf); \
- }
-
-#define create_getter_n(kind, offset, sub) \
- static ssize_t sysfs_show_##kind##offset##sub(struct device *dev, struct device_attribute *attr, char\
- *buf) \
- { \
- struct fscpos_data *data = fscpos_update_device(dev); \
- return show_##kind##sub(data, buf, offset); \
- }
-
-#define create_setter(kind, sub, reg) \
- static ssize_t sysfs_set_##kind##sub (struct device *dev, struct device_attribute *attr, const char \
- *buf, size_t count) \
- { \
- struct i2c_client *client = to_i2c_client(dev); \
- struct fscpos_data *data = i2c_get_clientdata(client); \
- return set_##kind##sub(client, data, buf, count, reg); \
- }
-
-#define create_setter_n(kind, offset, sub, reg) \
- static ssize_t sysfs_set_##kind##offset##sub (struct device *dev, struct device_attribute *attr, \
- const char *buf, size_t count) \
- { \
- struct i2c_client *client = to_i2c_client(dev); \
- struct fscpos_data *data = i2c_get_clientdata(client); \
- return set_##kind##sub(client, data, buf, count, offset, reg);\
- }
-
-#define create_sysfs_device_ro(kind, sub, offset) \
- static DEVICE_ATTR(kind##offset##sub, S_IRUGO, \
- sysfs_show_##kind##offset##sub, NULL);
-
-#define create_sysfs_device_rw(kind, sub, offset) \
- static DEVICE_ATTR(kind##offset##sub, S_IRUGO | S_IWUSR, \
- sysfs_show_##kind##offset##sub, sysfs_set_##kind##offset##sub);
-
-#define sysfs_ro_n(kind, sub, offset) \
- create_getter_n(kind, offset, sub); \
- create_sysfs_device_ro(kind, sub, offset);
-
-#define sysfs_rw_n(kind, sub, offset, reg) \
- create_getter_n(kind, offset, sub); \
- create_setter_n(kind, offset, sub, reg); \
- create_sysfs_device_rw(kind, sub, offset);
-
-#define sysfs_rw(kind, sub, reg) \
- create_getter(kind, sub); \
- create_setter(kind, sub, reg); \
- create_sysfs_device_rw(kind, sub,);
-
-#define sysfs_fan_with_min(offset, reg_status, reg_ripple, reg_min) \
- sysfs_fan(offset, reg_status, reg_ripple); \
- sysfs_rw_n(pwm,, offset, reg_min);
-
-#define sysfs_fan(offset, reg_status, reg_ripple) \
- sysfs_ro_n(fan, _input, offset); \
- sysfs_ro_n(fan, _status, offset); \
- sysfs_rw_n(fan, _ripple, offset, reg_ripple);
-
-#define sysfs_temp(offset, reg_status) \
- sysfs_ro_n(temp, _input, offset); \
- sysfs_ro_n(temp, _status, offset); \
- sysfs_rw_n(temp, _reset, offset, reg_status);
-
-#define sysfs_watchdog(reg_wdog_preset, reg_wdog_state, reg_wdog_control) \
- sysfs_rw(wdog, _control, reg_wdog_control); \
- sysfs_rw(wdog, _preset, reg_wdog_preset); \
- sysfs_rw(wdog, _state, reg_wdog_state);
-
-sysfs_fan_with_min(1, FSCPOS_REG_FAN_STATE[0], FSCPOS_REG_FAN_RIPPLE[0],
- FSCPOS_REG_PWM[0]);
-sysfs_fan_with_min(2, FSCPOS_REG_FAN_STATE[1], FSCPOS_REG_FAN_RIPPLE[1],
- FSCPOS_REG_PWM[1]);
-sysfs_fan(3, FSCPOS_REG_FAN_STATE[2], FSCPOS_REG_FAN_RIPPLE[2]);
-
-sysfs_temp(1, FSCPOS_REG_TEMP_STATE[0]);
-sysfs_temp(2, FSCPOS_REG_TEMP_STATE[1]);
-sysfs_temp(3, FSCPOS_REG_TEMP_STATE[2]);
-
-sysfs_watchdog(FSCPOS_REG_WDOG_PRESET, FSCPOS_REG_WDOG_STATE,
- FSCPOS_REG_WDOG_CONTROL);
-
-static DEVICE_ATTR(event, S_IRUGO, show_event, NULL);
-static DEVICE_ATTR(in0_input, S_IRUGO, show_volt_12, NULL);
-static DEVICE_ATTR(in1_input, S_IRUGO, show_volt_5, NULL);
-static DEVICE_ATTR(in2_input, S_IRUGO, show_volt_batt, NULL);
-
-static struct attribute *fscpos_attributes[] = {
- &dev_attr_event.attr,
- &dev_attr_in0_input.attr,
- &dev_attr_in1_input.attr,
- &dev_attr_in2_input.attr,
-
- &dev_attr_wdog_control.attr,
- &dev_attr_wdog_preset.attr,
- &dev_attr_wdog_state.attr,
-
- &dev_attr_temp1_input.attr,
- &dev_attr_temp1_status.attr,
- &dev_attr_temp1_reset.attr,
- &dev_attr_temp2_input.attr,
- &dev_attr_temp2_status.attr,
- &dev_attr_temp2_reset.attr,
- &dev_attr_temp3_input.attr,
- &dev_attr_temp3_status.attr,
- &dev_attr_temp3_reset.attr,
-
- &dev_attr_fan1_input.attr,
- &dev_attr_fan1_status.attr,
- &dev_attr_fan1_ripple.attr,
- &dev_attr_pwm1.attr,
- &dev_attr_fan2_input.attr,
- &dev_attr_fan2_status.attr,
- &dev_attr_fan2_ripple.attr,
- &dev_attr_pwm2.attr,
- &dev_attr_fan3_input.attr,
- &dev_attr_fan3_status.attr,
- &dev_attr_fan3_ripple.attr,
- NULL
-};
-
-static const struct attribute_group fscpos_group = {
- .attrs = fscpos_attributes,
-};
-
-/* Return 0 if detection is successful, -ENODEV otherwise */
-static int fscpos_detect(struct i2c_client *new_client, int kind,
- struct i2c_board_info *info)
-{
- struct i2c_adapter *adapter = new_client->adapter;
-
- if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
- return -ENODEV;
-
- /* Do the remaining detection unless force or force_fscpos parameter */
- if (kind < 0) {
- if ((fscpos_read_value(new_client, FSCPOS_REG_IDENT_0)
- != 0x50) /* 'P' */
- || (fscpos_read_value(new_client, FSCPOS_REG_IDENT_1)
- != 0x45) /* 'E' */
- || (fscpos_read_value(new_client, FSCPOS_REG_IDENT_2)
- != 0x47))/* 'G' */
- return -ENODEV;
- }
-
- strlcpy(info->type, "fscpos", I2C_NAME_SIZE);
-
- return 0;
-}
-
-static int fscpos_probe(struct i2c_client *new_client,
- const struct i2c_device_id *id)
-{
- struct fscpos_data *data;
- int err;
-
- data = kzalloc(sizeof(struct fscpos_data), GFP_KERNEL);
- if (!data) {
- err = -ENOMEM;
- goto exit;
- }
-
- i2c_set_clientdata(new_client, data);
- data->valid = 0;
- mutex_init(&data->update_lock);
-
- /* Inizialize the fscpos chip */
- fscpos_init_client(new_client);
-
- /* Announce that the chip was found */
- dev_info(&new_client->dev, "Found fscpos chip, rev %u\n", data->revision);
-
- /* Register sysfs hooks */
- if ((err = sysfs_create_group(&new_client->dev.kobj, &fscpos_group)))
- goto exit_free;
-
- data->hwmon_dev = hwmon_device_register(&new_client->dev);
- if (IS_ERR(data->hwmon_dev)) {
- err = PTR_ERR(data->hwmon_dev);
- goto exit_remove_files;
- }
-
- return 0;
-
-exit_remove_files:
- sysfs_remove_group(&new_client->dev.kobj, &fscpos_group);
-exit_free:
- kfree(data);
-exit:
- return err;
-}
-
-static int fscpos_remove(struct i2c_client *client)
-{
- struct fscpos_data *data = i2c_get_clientdata(client);
-
- hwmon_device_unregister(data->hwmon_dev);
- sysfs_remove_group(&client->dev.kobj, &fscpos_group);
-
- kfree(data);
- return 0;
-}
-
-static int fscpos_read_value(struct i2c_client *client, u8 reg)
-{
- dev_dbg(&client->dev, "Read reg 0x%02x\n", reg);
- return i2c_smbus_read_byte_data(client, reg);
-}
-
-static int fscpos_write_value(struct i2c_client *client, u8 reg, u8 value)
-{
- dev_dbg(&client->dev, "Write reg 0x%02x, val 0x%02x\n", reg, value);
- return i2c_smbus_write_byte_data(client, reg, value);
-}
-
-/* Called when we have found a new FSCPOS chip */
-static void fscpos_init_client(struct i2c_client *client)
-{
- struct fscpos_data *data = i2c_get_clientdata(client);
-
- /* read revision from chip */
- data->revision = fscpos_read_value(client, FSCPOS_REG_REVISION);
-}
-
-static struct fscpos_data *fscpos_update_device(struct device *dev)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct fscpos_data *data = i2c_get_clientdata(client);
-
- mutex_lock(&data->update_lock);
-
- if (time_after(jiffies, data->last_updated + 2 * HZ) || !data->valid) {
- int i;
-
- dev_dbg(&client->dev, "Starting fscpos update\n");
-
- for (i = 0; i < 3; i++) {
- data->temp_act[i] = fscpos_read_value(client,
- FSCPOS_REG_TEMP_ACT[i]);
- data->temp_status[i] = fscpos_read_value(client,
- FSCPOS_REG_TEMP_STATE[i]);
- data->fan_act[i] = fscpos_read_value(client,
- FSCPOS_REG_FAN_ACT[i]);
- data->fan_status[i] = fscpos_read_value(client,
- FSCPOS_REG_FAN_STATE[i]);
- data->fan_ripple[i] = fscpos_read_value(client,
- FSCPOS_REG_FAN_RIPPLE[i]);
- if (i < 2) {
- /* fan2_min is not supported by the chip */
- data->pwm[i] = fscpos_read_value(client,
- FSCPOS_REG_PWM[i]);
- }
- /* reset fan status if speed is back to > 0 */
- if (data->fan_status[i] != 0 && data->fan_act[i] > 0) {
- reset_fan_alarm(client, i);
- }
- }
-
- data->volt[0] = fscpos_read_value(client, FSCPOS_REG_VOLT_12);
- data->volt[1] = fscpos_read_value(client, FSCPOS_REG_VOLT_5);
- data->volt[2] = fscpos_read_value(client, FSCPOS_REG_VOLT_BATT);
-
- data->wdog_preset = fscpos_read_value(client,
- FSCPOS_REG_WDOG_PRESET);
- data->wdog_state = fscpos_read_value(client,
- FSCPOS_REG_WDOG_STATE);
- data->wdog_control = fscpos_read_value(client,
- FSCPOS_REG_WDOG_CONTROL);
-
- data->global_event = fscpos_read_value(client,
- FSCPOS_REG_EVENT_STATE);
-
- data->last_updated = jiffies;
- data->valid = 1;
- }
- mutex_unlock(&data->update_lock);
- return data;
-}
-
-static int __init sm_fscpos_init(void)
-{
- return i2c_add_driver(&fscpos_driver);
-}
-
-static void __exit sm_fscpos_exit(void)
-{
- i2c_del_driver(&fscpos_driver);
-}
-
-MODULE_AUTHOR("Stefan Ott <stefan@desire.ch> based on work from Hermann Jung "
- "<hej@odn.de>, Frodo Looijaard <frodol@dds.nl>"
- " and Philip Edelbrock <phil@netroedge.com>");
-MODULE_DESCRIPTION("fujitsu siemens poseidon chip driver");
-MODULE_LICENSE("GPL");
-
-module_init(sm_fscpos_init);
-module_exit(sm_fscpos_exit);
mutex_init(&data->update_lock);
/* Initialize the LTC4215 chip */
- /* TODO */
+ i2c_smbus_write_byte_data(client, LTC4215_FAULT, 0x00);
/* Register sysfs hooks */
ret = sysfs_create_group(&client->dev.kobj, <c4215_group);
mutex_init(&data->update_lock);
/* Initialize the LTC4245 chip */
- /* TODO */
+ i2c_smbus_write_byte_data(client, LTC4245_FAULT1, 0x00);
+ i2c_smbus_write_byte_data(client, LTC4245_FAULT2, 0x00);
/* Register sysfs hooks */
ret = sysfs_create_group(&client->dev.kobj, <c4245_group);
#include <asm/idle.h>
-#include "../dma/ioatdma_hw.h"
-#include "../dma/ioatdma_registers.h"
+#include "../dma/ioat/hw.h"
+#include "../dma/ioat/registers.h"
#define I7300_IDLE_DRIVER_VERSION "1.55"
#define I7300_PRINT "i7300_idle:"
udelay(10);
sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
- IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+ IOAT_CHANSTS_STATUS;
- if (sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE)
+ if (sts != IOAT_CHANSTS_ACTIVE)
break;
}
udelay(1000);
chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
- IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+ IOAT_CHANSTS_STATUS;
- if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE) {
+ if (chan_sts != IOAT_CHANSTS_DONE) {
/* Not complete, reset the channel */
writeb(IOAT_CHANCMD_RESET,
ioat_chanbase + IOAT1_CHANCMD_OFFSET);
ioat_chanbase + IOAT1_CHANCMD_OFFSET);
chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
- IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+ IOAT_CHANSTS_STATUS;
- if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) {
+ if (chan_sts != IOAT_CHANSTS_ACTIVE) {
writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET);
break;
}
}
chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
- IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+ IOAT_CHANSTS_STATUS;
/*
* We tried to reset multiple times. If IO A/T channel is still active
* flag an error and return without cleanup. Memory leak is better
* than random corruption in that extreme error situation.
*/
- if (chan_sts == IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) {
+ if (chan_sts == IOAT_CHANSTS_ACTIVE) {
printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels."
" Not freeing resources\n");
return;
*/
#include <linux/init.h>
+#include <linux/types.h>
#include <linux/input.h>
#include <linux/module.h>
#include <linux/random.h>
* that there are no threads in the middle of input_open_device()
*/
mutex_lock(&dev->mutex);
- dev->going_away = 1;
+ dev->going_away = true;
mutex_unlock(&dev->mutex);
spin_lock_irq(&dev->event_lock);
return 0;
}
+#define INPUT_DO_TOGGLE(dev, type, bits, on) \
+ do { \
+ int i; \
+ if (!test_bit(EV_##type, dev->evbit)) \
+ break; \
+ for (i = 0; i < type##_MAX; i++) { \
+ if (!test_bit(i, dev->bits##bit) || \
+ !test_bit(i, dev->bits)) \
+ continue; \
+ dev->event(dev, EV_##type, i, on); \
+ } \
+ } while (0)
+
+static void input_dev_reset(struct input_dev *dev, bool activate)
+{
+ if (!dev->event)
+ return;
+
+ INPUT_DO_TOGGLE(dev, LED, led, activate);
+ INPUT_DO_TOGGLE(dev, SND, snd, activate);
+
+ if (activate && test_bit(EV_REP, dev->evbit)) {
+ dev->event(dev, EV_REP, REP_PERIOD, dev->rep[REP_PERIOD]);
+ dev->event(dev, EV_REP, REP_DELAY, dev->rep[REP_DELAY]);
+ }
+}
+
+#ifdef CONFIG_PM
+static int input_dev_suspend(struct device *dev)
+{
+ struct input_dev *input_dev = to_input_dev(dev);
+
+ mutex_lock(&input_dev->mutex);
+ input_dev_reset(input_dev, false);
+ mutex_unlock(&input_dev->mutex);
+
+ return 0;
+}
+
+static int input_dev_resume(struct device *dev)
+{
+ struct input_dev *input_dev = to_input_dev(dev);
+
+ mutex_lock(&input_dev->mutex);
+ input_dev_reset(input_dev, true);
+ mutex_unlock(&input_dev->mutex);
+
+ return 0;
+}
+
+static const struct dev_pm_ops input_dev_pm_ops = {
+ .suspend = input_dev_suspend,
+ .resume = input_dev_resume,
+ .poweroff = input_dev_suspend,
+ .restore = input_dev_resume,
+};
+#endif /* CONFIG_PM */
+
static struct device_type input_dev_type = {
.groups = input_dev_attr_groups,
.release = input_dev_release,
.uevent = input_dev_uevent,
+#ifdef CONFIG_PM
+ .pm = &input_dev_pm_ops,
+#endif
};
static char *input_devnode(struct device *dev, mode_t *mode)
To compile this driver as a module, choose M here: the
module will be called aaed2000_kbd.
+config KEYBOARD_ADP5588
+ tristate "ADP5588 I2C QWERTY Keypad and IO Expander"
+ depends on I2C
+ help
+ Say Y here if you want to use a ADP5588 attached to your
+ system I2C bus.
+
+ To compile this driver as a module, choose M here: the
+ module will be called adp5588-keys.
+
config KEYBOARD_AMIGA
tristate "Amiga keyboard"
depends on AMIGA
right-hand column will be interpreted as the key shown in the
left-hand column.
+config QT2160
+ tristate "Atmel AT42QT2160 Touch Sensor Chip"
+ depends on I2C && EXPERIMENTAL
+ help
+ If you say yes here you get support for Atmel AT42QT2160 Touch
+ Sensor chip as a keyboard input.
+
+ This driver can also be built as a module. If so, the module
+ will be called qt2160.
+
config KEYBOARD_BFIN
tristate "Blackfin BF54x keypad support"
depends on (BF54x && !BF544)
To compile this driver as a module, choose M here: the
module will be called maple_keyb.
+config KEYBOARD_MAX7359
+ tristate "Maxim MAX7359 Key Switch Controller"
+ depends on I2C
+ help
+ If you say yes here you get support for the Maxim MAX7359 Key
+ Switch Controller chip. This providers microprocessors with
+ management of up to 64 key switches
+
+ To compile this driver as a module, choose M here: the
+ module will be called max7359_keypad.
+
config KEYBOARD_NEWTON
tristate "Newton keyboard"
select SERIO
To compile this driver as a module, choose M here: the
module will be called newtonkbd.
+config KEYBOARD_OPENCORES
+ tristate "OpenCores Keyboard Controller"
+ help
+ Say Y here if you want to use the OpenCores Keyboard Controller
+ http://www.opencores.org/project,keyboardcontroller
+
+ To compile this driver as a module, choose M here; the
+ module will be called opencores-kbd.
+
config KEYBOARD_PXA27x
tristate "PXA27x/PXA3xx keypad support"
depends on PXA27x || PXA3xx
# Each configuration option enables a list of files.
obj-$(CONFIG_KEYBOARD_AAED2000) += aaed2000_kbd.o
+obj-$(CONFIG_KEYBOARD_ADP5588) += adp5588-keys.o
obj-$(CONFIG_KEYBOARD_AMIGA) += amikbd.o
obj-$(CONFIG_KEYBOARD_ATARI) += atakbd.o
obj-$(CONFIG_KEYBOARD_ATKBD) += atkbd.o
obj-$(CONFIG_KEYBOARD_LOCOMO) += locomokbd.o
obj-$(CONFIG_KEYBOARD_MAPLE) += maple_keyb.o
obj-$(CONFIG_KEYBOARD_MATRIX) += matrix_keypad.o
+obj-$(CONFIG_KEYBOARD_MAX7359) += max7359_keypad.o
obj-$(CONFIG_KEYBOARD_NEWTON) += newtonkbd.o
obj-$(CONFIG_KEYBOARD_OMAP) += omap-keypad.o
+obj-$(CONFIG_KEYBOARD_OPENCORES) += opencores-kbd.o
obj-$(CONFIG_KEYBOARD_PXA27x) += pxa27x_keypad.o
obj-$(CONFIG_KEYBOARD_PXA930_ROTARY) += pxa930_rotary.o
+obj-$(CONFIG_KEYBOARD_QT2160) += qt2160.o
obj-$(CONFIG_KEYBOARD_SH_KEYSC) += sh_keysc.o
obj-$(CONFIG_KEYBOARD_SPITZ) += spitzkbd.o
obj-$(CONFIG_KEYBOARD_STOWAWAY) += stowaway.o
--- /dev/null
+/*
+ * File: drivers/input/keyboard/adp5588_keys.c
+ * Description: keypad driver for ADP5588 I2C QWERTY Keypad and IO Expander
+ * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright (C) 2008-2009 Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/input.h>
+#include <linux/i2c.h>
+
+#include <linux/i2c/adp5588.h>
+
+ /* Configuration Register1 */
+#define AUTO_INC (1 << 7)
+#define GPIEM_CFG (1 << 6)
+#define OVR_FLOW_M (1 << 5)
+#define INT_CFG (1 << 4)
+#define OVR_FLOW_IEN (1 << 3)
+#define K_LCK_IM (1 << 2)
+#define GPI_IEN (1 << 1)
+#define KE_IEN (1 << 0)
+
+/* Interrupt Status Register */
+#define CMP2_INT (1 << 5)
+#define CMP1_INT (1 << 4)
+#define OVR_FLOW_INT (1 << 3)
+#define K_LCK_INT (1 << 2)
+#define GPI_INT (1 << 1)
+#define KE_INT (1 << 0)
+
+/* Key Lock and Event Counter Register */
+#define K_LCK_EN (1 << 6)
+#define LCK21 0x30
+#define KEC 0xF
+
+/* Key Event Register xy */
+#define KEY_EV_PRESSED (1 << 7)
+#define KEY_EV_MASK (0x7F)
+
+#define KP_SEL(x) (0xFFFF >> (16 - x)) /* 2^x-1 */
+
+#define KEYP_MAX_EVENT 10
+
+/*
+ * Early pre 4.0 Silicon required to delay readout by at least 25ms,
+ * since the Event Counter Register updated 25ms after the interrupt
+ * asserted.
+ */
+#define WA_DELAYED_READOUT_REVID(rev) ((rev) < 4)
+
+struct adp5588_kpad {
+ struct i2c_client *client;
+ struct input_dev *input;
+ struct delayed_work work;
+ unsigned long delay;
+ unsigned short keycode[ADP5588_KEYMAPSIZE];
+};
+
+static int adp5588_read(struct i2c_client *client, u8 reg)
+{
+ int ret = i2c_smbus_read_byte_data(client, reg);
+
+ if (ret < 0)
+ dev_err(&client->dev, "Read Error\n");
+
+ return ret;
+}
+
+static int adp5588_write(struct i2c_client *client, u8 reg, u8 val)
+{
+ return i2c_smbus_write_byte_data(client, reg, val);
+}
+
+static void adp5588_work(struct work_struct *work)
+{
+ struct adp5588_kpad *kpad = container_of(work,
+ struct adp5588_kpad, work.work);
+ struct i2c_client *client = kpad->client;
+ int i, key, status, ev_cnt;
+
+ status = adp5588_read(client, INT_STAT);
+
+ if (status & OVR_FLOW_INT) /* Unlikely and should never happen */
+ dev_err(&client->dev, "Event Overflow Error\n");
+
+ if (status & KE_INT) {
+ ev_cnt = adp5588_read(client, KEY_LCK_EC_STAT) & KEC;
+ if (ev_cnt) {
+ for (i = 0; i < ev_cnt; i++) {
+ key = adp5588_read(client, Key_EVENTA + i);
+ input_report_key(kpad->input,
+ kpad->keycode[(key & KEY_EV_MASK) - 1],
+ key & KEY_EV_PRESSED);
+ }
+ input_sync(kpad->input);
+ }
+ }
+ adp5588_write(client, INT_STAT, status); /* Status is W1C */
+}
+
+static irqreturn_t adp5588_irq(int irq, void *handle)
+{
+ struct adp5588_kpad *kpad = handle;
+
+ /*
+ * use keventd context to read the event fifo registers
+ * Schedule readout at least 25ms after notification for
+ * REVID < 4
+ */
+
+ schedule_delayed_work(&kpad->work, kpad->delay);
+
+ return IRQ_HANDLED;
+}
+
+static int __devinit adp5588_setup(struct i2c_client *client)
+{
+ struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+ int i, ret;
+
+ ret = adp5588_write(client, KP_GPIO1, KP_SEL(pdata->rows));
+ ret |= adp5588_write(client, KP_GPIO2, KP_SEL(pdata->cols) & 0xFF);
+ ret |= adp5588_write(client, KP_GPIO3, KP_SEL(pdata->cols) >> 8);
+
+ if (pdata->en_keylock) {
+ ret |= adp5588_write(client, UNLOCK1, pdata->unlock_key1);
+ ret |= adp5588_write(client, UNLOCK2, pdata->unlock_key2);
+ ret |= adp5588_write(client, KEY_LCK_EC_STAT, K_LCK_EN);
+ }
+
+ for (i = 0; i < KEYP_MAX_EVENT; i++)
+ ret |= adp5588_read(client, Key_EVENTA);
+
+ ret |= adp5588_write(client, INT_STAT, CMP2_INT | CMP1_INT |
+ OVR_FLOW_INT | K_LCK_INT |
+ GPI_INT | KE_INT); /* Status is W1C */
+
+ ret |= adp5588_write(client, CFG, INT_CFG | OVR_FLOW_IEN | KE_IEN);
+
+ if (ret < 0) {
+ dev_err(&client->dev, "Write Error\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int __devinit adp5588_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct adp5588_kpad *kpad;
+ struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+ struct input_dev *input;
+ unsigned int revid;
+ int ret, i;
+ int error;
+
+ if (!i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_BYTE_DATA)) {
+ dev_err(&client->dev, "SMBUS Byte Data not Supported\n");
+ return -EIO;
+ }
+
+ if (!pdata) {
+ dev_err(&client->dev, "no platform data?\n");
+ return -EINVAL;
+ }
+
+ if (!pdata->rows || !pdata->cols || !pdata->keymap) {
+ dev_err(&client->dev, "no rows, cols or keymap from pdata\n");
+ return -EINVAL;
+ }
+
+ if (pdata->keymapsize != ADP5588_KEYMAPSIZE) {
+ dev_err(&client->dev, "invalid keymapsize\n");
+ return -EINVAL;
+ }
+
+ if (!client->irq) {
+ dev_err(&client->dev, "no IRQ?\n");
+ return -EINVAL;
+ }
+
+ kpad = kzalloc(sizeof(*kpad), GFP_KERNEL);
+ input = input_allocate_device();
+ if (!kpad || !input) {
+ error = -ENOMEM;
+ goto err_free_mem;
+ }
+
+ kpad->client = client;
+ kpad->input = input;
+ INIT_DELAYED_WORK(&kpad->work, adp5588_work);
+
+ ret = adp5588_read(client, DEV_ID);
+ if (ret < 0) {
+ error = ret;
+ goto err_free_mem;
+ }
+
+ revid = (u8) ret & ADP5588_DEVICE_ID_MASK;
+ if (WA_DELAYED_READOUT_REVID(revid))
+ kpad->delay = msecs_to_jiffies(30);
+
+ input->name = client->name;
+ input->phys = "adp5588-keys/input0";
+ input->dev.parent = &client->dev;
+
+ input_set_drvdata(input, kpad);
+
+ input->id.bustype = BUS_I2C;
+ input->id.vendor = 0x0001;
+ input->id.product = 0x0001;
+ input->id.version = revid;
+
+ input->keycodesize = sizeof(kpad->keycode[0]);
+ input->keycodemax = pdata->keymapsize;
+ input->keycode = kpad->keycode;
+
+ memcpy(kpad->keycode, pdata->keymap,
+ pdata->keymapsize * input->keycodesize);
+
+ /* setup input device */
+ __set_bit(EV_KEY, input->evbit);
+
+ if (pdata->repeat)
+ __set_bit(EV_REP, input->evbit);
+
+ for (i = 0; i < input->keycodemax; i++)
+ __set_bit(kpad->keycode[i] & KEY_MAX, input->keybit);
+ __clear_bit(KEY_RESERVED, input->keybit);
+
+ error = input_register_device(input);
+ if (error) {
+ dev_err(&client->dev, "unable to register input device\n");
+ goto err_free_mem;
+ }
+
+ error = request_irq(client->irq, adp5588_irq,
+ IRQF_TRIGGER_FALLING | IRQF_DISABLED,
+ client->dev.driver->name, kpad);
+ if (error) {
+ dev_err(&client->dev, "irq %d busy?\n", client->irq);
+ goto err_unreg_dev;
+ }
+
+ error = adp5588_setup(client);
+ if (error)
+ goto err_free_irq;
+
+ device_init_wakeup(&client->dev, 1);
+ i2c_set_clientdata(client, kpad);
+
+ dev_info(&client->dev, "Rev.%d keypad, irq %d\n", revid, client->irq);
+ return 0;
+
+ err_free_irq:
+ free_irq(client->irq, kpad);
+ err_unreg_dev:
+ input_unregister_device(input);
+ input = NULL;
+ err_free_mem:
+ input_free_device(input);
+ kfree(kpad);
+
+ return error;
+}
+
+static int __devexit adp5588_remove(struct i2c_client *client)
+{
+ struct adp5588_kpad *kpad = i2c_get_clientdata(client);
+
+ adp5588_write(client, CFG, 0);
+ free_irq(client->irq, kpad);
+ cancel_delayed_work_sync(&kpad->work);
+ input_unregister_device(kpad->input);
+ i2c_set_clientdata(client, NULL);
+ kfree(kpad);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int adp5588_suspend(struct device *dev)
+{
+ struct adp5588_kpad *kpad = dev_get_drvdata(dev);
+ struct i2c_client *client = kpad->client;
+
+ disable_irq(client->irq);
+ cancel_delayed_work_sync(&kpad->work);
+
+ if (device_may_wakeup(&client->dev))
+ enable_irq_wake(client->irq);
+
+ return 0;
+}
+
+static int adp5588_resume(struct device *dev)
+{
+ struct adp5588_kpad *kpad = dev_get_drvdata(dev);
+ struct i2c_client *client = kpad->client;
+
+ if (device_may_wakeup(&client->dev))
+ disable_irq_wake(client->irq);
+
+ enable_irq(client->irq);
+
+ return 0;
+}
+
+static struct dev_pm_ops adp5588_dev_pm_ops = {
+ .suspend = adp5588_suspend,
+ .resume = adp5588_resume,
+};
+#endif
+
+static const struct i2c_device_id adp5588_id[] = {
+ { KBUILD_MODNAME, 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, adp5588_id);
+
+static struct i2c_driver adp5588_driver = {
+ .driver = {
+ .name = KBUILD_MODNAME,
+#ifdef CONFIG_PM
+ .pm = &adp5588_dev_pm_ops,
+#endif
+ },
+ .probe = adp5588_probe,
+ .remove = __devexit_p(adp5588_remove),
+ .id_table = adp5588_id,
+};
+
+static int __init adp5588_init(void)
+{
+ return i2c_add_driver(&adp5588_driver);
+}
+module_init(adp5588_init);
+
+static void __exit adp5588_exit(void)
+{
+ i2c_del_driver(&adp5588_driver);
+}
+module_exit(adp5588_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("ADP5588 Keypad driver");
+MODULE_ALIAS("platform:adp5588-keys");
static int atkbd_activate(struct atkbd *atkbd)
{
struct ps2dev *ps2dev = &atkbd->ps2dev;
- unsigned char param[1];
-
-/*
- * Set the LEDs to a defined state.
- */
-
- param[0] = 0;
- if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS))
- return -1;
-
-/*
- * Set autorepeat to fastest possible.
- */
-
- param[0] = 0;
- if (ps2_command(ps2dev, param, ATKBD_CMD_SETREP))
- return -1;
/*
* Enable the keyboard to receive keystrokes.
return -1;
atkbd_activate(atkbd);
-
-/*
- * Restore repeat rate and LEDs (that were reset by atkbd_activate)
- * to pre-resume state
- */
- if (!atkbd->softrepeat)
- atkbd_set_repeat_rate(atkbd);
- atkbd_set_leds(atkbd);
}
atkbd_enable(atkbd);
--- /dev/null
+/*
+ * max7359_keypad.c - MAX7359 Key Switch Controller Driver
+ *
+ * Copyright (C) 2009 Samsung Electronics
+ * Kim Kyuwon <q1.kim@samsung.com>
+ *
+ * Based on pxa27x_keypad.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Datasheet: http://www.maxim-ic.com/quick_view2.cfm/qv_pk/5456
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/input/matrix_keypad.h>
+
+#define MAX7359_MAX_KEY_ROWS 8
+#define MAX7359_MAX_KEY_COLS 8
+#define MAX7359_MAX_KEY_NUM (MAX7359_MAX_KEY_ROWS * MAX7359_MAX_KEY_COLS)
+#define MAX7359_ROW_SHIFT 3
+
+/*
+ * MAX7359 registers
+ */
+#define MAX7359_REG_KEYFIFO 0x00
+#define MAX7359_REG_CONFIG 0x01
+#define MAX7359_REG_DEBOUNCE 0x02
+#define MAX7359_REG_INTERRUPT 0x03
+#define MAX7359_REG_PORTS 0x04
+#define MAX7359_REG_KEYREP 0x05
+#define MAX7359_REG_SLEEP 0x06
+
+/*
+ * Configuration register bits
+ */
+#define MAX7359_CFG_SLEEP (1 << 7)
+#define MAX7359_CFG_INTERRUPT (1 << 5)
+#define MAX7359_CFG_KEY_RELEASE (1 << 3)
+#define MAX7359_CFG_WAKEUP (1 << 1)
+#define MAX7359_CFG_TIMEOUT (1 << 0)
+
+/*
+ * Autosleep register values (ms)
+ */
+#define MAX7359_AUTOSLEEP_8192 0x01
+#define MAX7359_AUTOSLEEP_4096 0x02
+#define MAX7359_AUTOSLEEP_2048 0x03
+#define MAX7359_AUTOSLEEP_1024 0x04
+#define MAX7359_AUTOSLEEP_512 0x05
+#define MAX7359_AUTOSLEEP_256 0x06
+
+struct max7359_keypad {
+ /* matrix key code map */
+ unsigned short keycodes[MAX7359_MAX_KEY_NUM];
+
+ struct input_dev *input_dev;
+ struct i2c_client *client;
+};
+
+static int max7359_write_reg(struct i2c_client *client, u8 reg, u8 val)
+{
+ int ret = i2c_smbus_write_byte_data(client, reg, val);
+
+ if (ret < 0)
+ dev_err(&client->dev, "%s: reg 0x%x, val 0x%x, err %d\n",
+ __func__, reg, val, ret);
+ return ret;
+}
+
+static int max7359_read_reg(struct i2c_client *client, int reg)
+{
+ int ret = i2c_smbus_read_byte_data(client, reg);
+
+ if (ret < 0)
+ dev_err(&client->dev, "%s: reg 0x%x, err %d\n",
+ __func__, reg, ret);
+ return ret;
+}
+
+static void max7359_build_keycode(struct max7359_keypad *keypad,
+ const struct matrix_keymap_data *keymap_data)
+{
+ struct input_dev *input_dev = keypad->input_dev;
+ int i;
+
+ for (i = 0; i < keymap_data->keymap_size; i++) {
+ unsigned int key = keymap_data->keymap[i];
+ unsigned int row = KEY_ROW(key);
+ unsigned int col = KEY_COL(key);
+ unsigned int scancode = MATRIX_SCAN_CODE(row, col,
+ MAX7359_ROW_SHIFT);
+ unsigned short keycode = KEY_VAL(key);
+
+ keypad->keycodes[scancode] = keycode;
+ __set_bit(keycode, input_dev->keybit);
+ }
+ __clear_bit(KEY_RESERVED, input_dev->keybit);
+}
+
+/* runs in an IRQ thread -- can (and will!) sleep */
+static irqreturn_t max7359_interrupt(int irq, void *dev_id)
+{
+ struct max7359_keypad *keypad = dev_id;
+ struct input_dev *input_dev = keypad->input_dev;
+ int val, row, col, release, code;
+
+ val = max7359_read_reg(keypad->client, MAX7359_REG_KEYFIFO);
+ row = val & 0x7;
+ col = (val >> 3) & 0x7;
+ release = val & 0x40;
+
+ code = MATRIX_SCAN_CODE(row, col, MAX7359_ROW_SHIFT);
+
+ dev_dbg(&keypad->client->dev,
+ "key[%d:%d] %s\n", row, col, release ? "release" : "press");
+
+ input_event(input_dev, EV_MSC, MSC_SCAN, code);
+ input_report_key(input_dev, keypad->keycodes[code], !release);
+ input_sync(input_dev);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Let MAX7359 fall into a deep sleep:
+ * If no keys are pressed, enter sleep mode for 8192 ms. And if any
+ * key is pressed, the MAX7359 returns to normal operating mode.
+ */
+static inline void max7359_fall_deepsleep(struct i2c_client *client)
+{
+ max7359_write_reg(client, MAX7359_REG_SLEEP, MAX7359_AUTOSLEEP_8192);
+}
+
+/*
+ * Let MAX7359 take a catnap:
+ * Autosleep just for 256 ms.
+ */
+static inline void max7359_take_catnap(struct i2c_client *client)
+{
+ max7359_write_reg(client, MAX7359_REG_SLEEP, MAX7359_AUTOSLEEP_256);
+}
+
+static int max7359_open(struct input_dev *dev)
+{
+ struct max7359_keypad *keypad = input_get_drvdata(dev);
+
+ max7359_take_catnap(keypad->client);
+
+ return 0;
+}
+
+static void max7359_close(struct input_dev *dev)
+{
+ struct max7359_keypad *keypad = input_get_drvdata(dev);
+
+ max7359_fall_deepsleep(keypad->client);
+}
+
+static void max7359_initialize(struct i2c_client *client)
+{
+ max7359_write_reg(client, MAX7359_REG_CONFIG,
+ MAX7359_CFG_INTERRUPT | /* Irq clears after host read */
+ MAX7359_CFG_KEY_RELEASE | /* Key release enable */
+ MAX7359_CFG_WAKEUP); /* Key press wakeup enable */
+
+ /* Full key-scan functionality */
+ max7359_write_reg(client, MAX7359_REG_DEBOUNCE, 0x1F);
+
+ /* nINT asserts every debounce cycles */
+ max7359_write_reg(client, MAX7359_REG_INTERRUPT, 0x01);
+
+ max7359_fall_deepsleep(client);
+}
+
+static int __devinit max7359_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ const struct matrix_keymap_data *keymap_data = client->dev.platform_data;
+ struct max7359_keypad *keypad;
+ struct input_dev *input_dev;
+ int ret;
+ int error;
+
+ if (!client->irq) {
+ dev_err(&client->dev, "The irq number should not be zero\n");
+ return -EINVAL;
+ }
+
+ /* Detect MAX7359: The initial Keys FIFO value is '0x3F' */
+ ret = max7359_read_reg(client, MAX7359_REG_KEYFIFO);
+ if (ret < 0) {
+ dev_err(&client->dev, "failed to detect device\n");
+ return -ENODEV;
+ }
+
+ dev_dbg(&client->dev, "keys FIFO is 0x%02x\n", ret);
+
+ keypad = kzalloc(sizeof(struct max7359_keypad), GFP_KERNEL);
+ input_dev = input_allocate_device();
+ if (!keypad || !input_dev) {
+ dev_err(&client->dev, "failed to allocate memory\n");
+ error = -ENOMEM;
+ goto failed_free_mem;
+ }
+
+ keypad->client = client;
+ keypad->input_dev = input_dev;
+
+ input_dev->name = client->name;
+ input_dev->id.bustype = BUS_I2C;
+ input_dev->open = max7359_open;
+ input_dev->close = max7359_close;
+ input_dev->dev.parent = &client->dev;
+
+ input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
+ input_dev->keycodesize = sizeof(keypad->keycodes[0]);
+ input_dev->keycodemax = ARRAY_SIZE(keypad->keycodes);
+ input_dev->keycode = keypad->keycodes;
+
+ input_set_capability(input_dev, EV_MSC, MSC_SCAN);
+ input_set_drvdata(input_dev, keypad);
+
+ max7359_build_keycode(keypad, keymap_data);
+
+ error = request_threaded_irq(client->irq, NULL, max7359_interrupt,
+ IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+ client->name, keypad);
+ if (error) {
+ dev_err(&client->dev, "failed to register interrupt\n");
+ goto failed_free_mem;
+ }
+
+ /* Register the input device */
+ error = input_register_device(input_dev);
+ if (error) {
+ dev_err(&client->dev, "failed to register input device\n");
+ goto failed_free_irq;
+ }
+
+ /* Initialize MAX7359 */
+ max7359_initialize(client);
+
+ i2c_set_clientdata(client, keypad);
+ device_init_wakeup(&client->dev, 1);
+
+ return 0;
+
+failed_free_irq:
+ free_irq(client->irq, keypad);
+failed_free_mem:
+ input_free_device(input_dev);
+ kfree(keypad);
+ return error;
+}
+
+static int __devexit max7359_remove(struct i2c_client *client)
+{
+ struct max7359_keypad *keypad = i2c_get_clientdata(client);
+
+ free_irq(client->irq, keypad);
+ input_unregister_device(keypad->input_dev);
+ i2c_set_clientdata(client, NULL);
+ kfree(keypad);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int max7359_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+ max7359_fall_deepsleep(client);
+
+ if (device_may_wakeup(&client->dev))
+ enable_irq_wake(client->irq);
+
+ return 0;
+}
+
+static int max7359_resume(struct i2c_client *client)
+{
+ if (device_may_wakeup(&client->dev))
+ disable_irq_wake(client->irq);
+
+ /* Restore the default setting */
+ max7359_take_catnap(client);
+
+ return 0;
+}
+#else
+#define max7359_suspend NULL
+#define max7359_resume NULL
+#endif
+
+static const struct i2c_device_id max7359_ids[] = {
+ { "max7359", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, max7359_ids);
+
+static struct i2c_driver max7359_i2c_driver = {
+ .driver = {
+ .name = "max7359",
+ },
+ .probe = max7359_probe,
+ .remove = __devexit_p(max7359_remove),
+ .suspend = max7359_suspend,
+ .resume = max7359_resume,
+ .id_table = max7359_ids,
+};
+
+static int __init max7359_init(void)
+{
+ return i2c_add_driver(&max7359_i2c_driver);
+}
+module_init(max7359_init);
+
+static void __exit max7359_exit(void)
+{
+ i2c_del_driver(&max7359_i2c_driver);
+}
+module_exit(max7359_exit);
+
+MODULE_AUTHOR("Kim Kyuwon <q1.kim@samsung.com>");
+MODULE_DESCRIPTION("MAX7359 Key Switch Controller Driver");
+MODULE_LICENSE("GPL v2");
--- /dev/null
+/*
+ * OpenCores Keyboard Controller Driver
+ * http://www.opencores.org/project,keyboardcontroller
+ *
+ * Copyright 2007-2009 HV Sistemas S.L.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+struct opencores_kbd {
+ struct input_dev *input;
+ struct resource *addr_res;
+ void __iomem *addr;
+ int irq;
+ unsigned short keycodes[128];
+};
+
+static irqreturn_t opencores_kbd_isr(int irq, void *dev_id)
+{
+ struct opencores_kbd *opencores_kbd = dev_id;
+ struct input_dev *input = opencores_kbd->input;
+ unsigned char c;
+
+ c = readb(opencores_kbd->addr);
+ input_report_key(input, c & 0x7f, c & 0x80 ? 0 : 1);
+ input_sync(input);
+
+ return IRQ_HANDLED;
+}
+
+static int __devinit opencores_kbd_probe(struct platform_device *pdev)
+{
+ struct input_dev *input;
+ struct opencores_kbd *opencores_kbd;
+ struct resource *res;
+ int irq, i, error;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(&pdev->dev, "missing board memory resource\n");
+ return -EINVAL;
+ }
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "missing board IRQ resource\n");
+ return -EINVAL;
+ }
+
+ opencores_kbd = kzalloc(sizeof(*opencores_kbd), GFP_KERNEL);
+ input = input_allocate_device();
+ if (!opencores_kbd || !input) {
+ dev_err(&pdev->dev, "failed to allocate device structures\n");
+ error = -ENOMEM;
+ goto err_free_mem;
+ }
+
+ opencores_kbd->addr_res = res;
+ res = request_mem_region(res->start, resource_size(res), pdev->name);
+ if (!res) {
+ dev_err(&pdev->dev, "failed to request I/O memory\n");
+ error = -EBUSY;
+ goto err_free_mem;
+ }
+
+ opencores_kbd->addr = ioremap(res->start, resource_size(res));
+ if (!opencores_kbd->addr) {
+ dev_err(&pdev->dev, "failed to remap I/O memory\n");
+ error = -ENXIO;
+ goto err_rel_mem;
+ }
+
+ opencores_kbd->input = input;
+ opencores_kbd->irq = irq;
+
+ input->name = pdev->name;
+ input->phys = "opencores-kbd/input0";
+ input->dev.parent = &pdev->dev;
+
+ input_set_drvdata(input, opencores_kbd);
+
+ input->id.bustype = BUS_HOST;
+ input->id.vendor = 0x0001;
+ input->id.product = 0x0001;
+ input->id.version = 0x0100;
+
+ input->keycode = opencores_kbd->keycodes;
+ input->keycodesize = sizeof(opencores_kbd->keycodes[0]);
+ input->keycodemax = ARRAY_SIZE(opencores_kbd->keycodes);
+
+ __set_bit(EV_KEY, input->evbit);
+
+ for (i = 0; i < ARRAY_SIZE(opencores_kbd->keycodes); i++) {
+ /*
+ * OpenCores controller happens to have scancodes match
+ * our KEY_* definitions.
+ */
+ opencores_kbd->keycodes[i] = i;
+ __set_bit(opencores_kbd->keycodes[i], input->keybit);
+ }
+ __clear_bit(KEY_RESERVED, input->keybit);
+
+ error = request_irq(irq, &opencores_kbd_isr,
+ IRQF_TRIGGER_RISING, pdev->name, opencores_kbd);
+ if (error) {
+ dev_err(&pdev->dev, "unable to claim irq %d\n", irq);
+ goto err_unmap_mem;
+ }
+
+ error = input_register_device(input);
+ if (error) {
+ dev_err(&pdev->dev, "unable to register input device\n");
+ goto err_free_irq;
+ }
+
+ platform_set_drvdata(pdev, opencores_kbd);
+
+ return 0;
+
+ err_free_irq:
+ free_irq(irq, opencores_kbd);
+ err_unmap_mem:
+ iounmap(opencores_kbd->addr);
+ err_rel_mem:
+ release_mem_region(res->start, resource_size(res));
+ err_free_mem:
+ input_free_device(input);
+ kfree(opencores_kbd);
+
+ return error;
+}
+
+static int __devexit opencores_kbd_remove(struct platform_device *pdev)
+{
+ struct opencores_kbd *opencores_kbd = platform_get_drvdata(pdev);
+
+ free_irq(opencores_kbd->irq, opencores_kbd);
+
+ iounmap(opencores_kbd->addr);
+ release_mem_region(opencores_kbd->addr_res->start,
+ resource_size(opencores_kbd->addr_res));
+ input_unregister_device(opencores_kbd->input);
+ kfree(opencores_kbd);
+
+ platform_set_drvdata(pdev, NULL);
+
+ return 0;
+}
+
+static struct platform_driver opencores_kbd_device_driver = {
+ .probe = opencores_kbd_probe,
+ .remove = __devexit_p(opencores_kbd_remove),
+ .driver = {
+ .name = "opencores-kbd",
+ },
+};
+
+static int __init opencores_kbd_init(void)
+{
+ return platform_driver_register(&opencores_kbd_device_driver);
+}
+module_init(opencores_kbd_init);
+
+static void __exit opencores_kbd_exit(void)
+{
+ platform_driver_unregister(&opencores_kbd_device_driver);
+}
+module_exit(opencores_kbd_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Javier Herrero <jherrero@hvsistemas.es>");
+MODULE_DESCRIPTION("Keyboard driver for OpenCores Keyboard Controller");
--- /dev/null
+/*
+ * qt2160.c - Atmel AT42QT2160 Touch Sense Controller
+ *
+ * Copyright (C) 2009 Raphael Derosso Pereira <raphaelpereira@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+
+#define QT2160_VALID_CHIPID 0x11
+
+#define QT2160_CMD_CHIPID 0
+#define QT2160_CMD_CODEVER 1
+#define QT2160_CMD_GSTAT 2
+#define QT2160_CMD_KEYS3 3
+#define QT2160_CMD_KEYS4 4
+#define QT2160_CMD_SLIDE 5
+#define QT2160_CMD_GPIOS 6
+#define QT2160_CMD_SUBVER 7
+#define QT2160_CMD_CALIBRATE 10
+
+#define QT2160_CYCLE_INTERVAL (2*HZ)
+
+static unsigned char qt2160_key2code[] = {
+ KEY_0, KEY_1, KEY_2, KEY_3,
+ KEY_4, KEY_5, KEY_6, KEY_7,
+ KEY_8, KEY_9, KEY_A, KEY_B,
+ KEY_C, KEY_D, KEY_E, KEY_F,
+};
+
+struct qt2160_data {
+ struct i2c_client *client;
+ struct input_dev *input;
+ struct delayed_work dwork;
+ spinlock_t lock; /* Protects canceling/rescheduling of dwork */
+ unsigned short keycodes[ARRAY_SIZE(qt2160_key2code)];
+ u16 key_matrix;
+};
+
+static int qt2160_read_block(struct i2c_client *client,
+ u8 inireg, u8 *buffer, unsigned int count)
+{
+ int error, idx = 0;
+
+ /*
+ * Can't use SMBus block data read. Check for I2C functionality to speed
+ * things up whenever possible. Otherwise we will be forced to read
+ * sequentially.
+ */
+ if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+
+ error = i2c_smbus_write_byte(client, inireg + idx);
+ if (error) {
+ dev_err(&client->dev,
+ "couldn't send request. Returned %d\n", error);
+ return error;
+ }
+
+ error = i2c_master_recv(client, buffer, count);
+ if (error != count) {
+ dev_err(&client->dev,
+ "couldn't read registers. Returned %d bytes\n", error);
+ return error;
+ }
+ } else {
+
+ while (count--) {
+ int data;
+
+ error = i2c_smbus_write_byte(client, inireg + idx);
+ if (error) {
+ dev_err(&client->dev,
+ "couldn't send request. Returned %d\n", error);
+ return error;
+ }
+
+ data = i2c_smbus_read_byte(client);
+ if (data < 0) {
+ dev_err(&client->dev,
+ "couldn't read register. Returned %d\n", data);
+ return data;
+ }
+
+ buffer[idx++] = data;
+ }
+ }
+
+ return 0;
+}
+
+static int qt2160_get_key_matrix(struct qt2160_data *qt2160)
+{
+ struct i2c_client *client = qt2160->client;
+ struct input_dev *input = qt2160->input;
+ u8 regs[6];
+ u16 old_matrix, new_matrix;
+ int ret, i, mask;
+
+ dev_dbg(&client->dev, "requesting keys...\n");
+
+ /*
+ * Read all registers from General Status Register
+ * to GPIOs register
+ */
+ ret = qt2160_read_block(client, QT2160_CMD_GSTAT, regs, 6);
+ if (ret) {
+ dev_err(&client->dev,
+ "could not perform chip read.\n");
+ return ret;
+ }
+
+ old_matrix = qt2160->key_matrix;
+ qt2160->key_matrix = new_matrix = (regs[2] << 8) | regs[1];
+
+ mask = 0x01;
+ for (i = 0; i < 16; ++i, mask <<= 1) {
+ int keyval = new_matrix & mask;
+
+ if ((old_matrix & mask) != keyval) {
+ input_report_key(input, qt2160->keycodes[i], keyval);
+ dev_dbg(&client->dev, "key %d %s\n",
+ i, keyval ? "pressed" : "released");
+ }
+ }
+
+ input_sync(input);
+
+ return 0;
+}
+
+static irqreturn_t qt2160_irq(int irq, void *_qt2160)
+{
+ struct qt2160_data *qt2160 = _qt2160;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qt2160->lock, flags);
+
+ __cancel_delayed_work(&qt2160->dwork);
+ schedule_delayed_work(&qt2160->dwork, 0);
+
+ spin_unlock_irqrestore(&qt2160->lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+static void qt2160_schedule_read(struct qt2160_data *qt2160)
+{
+ spin_lock_irq(&qt2160->lock);
+ schedule_delayed_work(&qt2160->dwork, QT2160_CYCLE_INTERVAL);
+ spin_unlock_irq(&qt2160->lock);
+}
+
+static void qt2160_worker(struct work_struct *work)
+{
+ struct qt2160_data *qt2160 =
+ container_of(work, struct qt2160_data, dwork.work);
+
+ dev_dbg(&qt2160->client->dev, "worker\n");
+
+ qt2160_get_key_matrix(qt2160);
+
+ /* Avoid device lock up by checking every so often */
+ qt2160_schedule_read(qt2160);
+}
+
+static int __devinit qt2160_read(struct i2c_client *client, u8 reg)
+{
+ int ret;
+
+ ret = i2c_smbus_write_byte(client, reg);
+ if (ret) {
+ dev_err(&client->dev,
+ "couldn't send request. Returned %d\n", ret);
+ return ret;
+ }
+
+ ret = i2c_smbus_read_byte(client);
+ if (ret < 0) {
+ dev_err(&client->dev,
+ "couldn't read register. Returned %d\n", ret);
+ return ret;
+ }
+
+ return ret;
+}
+
+static int __devinit qt2160_write(struct i2c_client *client, u8 reg, u8 data)
+{
+ int error;
+
+ error = i2c_smbus_write_byte(client, reg);
+ if (error) {
+ dev_err(&client->dev,
+ "couldn't send request. Returned %d\n", error);
+ return error;
+ }
+
+ error = i2c_smbus_write_byte(client, data);
+ if (error) {
+ dev_err(&client->dev,
+ "couldn't write data. Returned %d\n", error);
+ return error;
+ }
+
+ return error;
+}
+
+
+static bool __devinit qt2160_identify(struct i2c_client *client)
+{
+ int id, ver, rev;
+
+ /* Read Chid ID to check if chip is valid */
+ id = qt2160_read(client, QT2160_CMD_CHIPID);
+ if (id != QT2160_VALID_CHIPID) {
+ dev_err(&client->dev, "ID %d not supported\n", id);
+ return false;
+ }
+
+ /* Read chip firmware version */
+ ver = qt2160_read(client, QT2160_CMD_CODEVER);
+ if (ver < 0) {
+ dev_err(&client->dev, "could not get firmware version\n");
+ return false;
+ }
+
+ /* Read chip firmware revision */
+ rev = qt2160_read(client, QT2160_CMD_SUBVER);
+ if (rev < 0) {
+ dev_err(&client->dev, "could not get firmware revision\n");
+ return false;
+ }
+
+ dev_info(&client->dev, "AT42QT2160 firmware version %d.%d.%d\n",
+ ver >> 4, ver & 0xf, rev);
+
+ return true;
+}
+
+static int __devinit qt2160_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct qt2160_data *qt2160;
+ struct input_dev *input;
+ int i;
+ int error;
+
+ /* Check functionality */
+ error = i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_BYTE);
+ if (!error) {
+ dev_err(&client->dev, "%s adapter not supported\n",
+ dev_driver_string(&client->adapter->dev));
+ return -ENODEV;
+ }
+
+ if (!qt2160_identify(client))
+ return -ENODEV;
+
+ /* Chip is valid and active. Allocate structure */
+ qt2160 = kzalloc(sizeof(struct qt2160_data), GFP_KERNEL);
+ input = input_allocate_device();
+ if (!qt2160 || !input) {
+ dev_err(&client->dev, "insufficient memory\n");
+ error = -ENOMEM;
+ goto err_free_mem;
+ }
+
+ qt2160->client = client;
+ qt2160->input = input;
+ INIT_DELAYED_WORK(&qt2160->dwork, qt2160_worker);
+ spin_lock_init(&qt2160->lock);
+
+ input->name = "AT42QT2160 Touch Sense Keyboard";
+ input->id.bustype = BUS_I2C;
+
+ input->keycode = qt2160->keycodes;
+ input->keycodesize = sizeof(qt2160->keycodes[0]);
+ input->keycodemax = ARRAY_SIZE(qt2160_key2code);
+
+ __set_bit(EV_KEY, input->evbit);
+ __clear_bit(EV_REP, input->evbit);
+ for (i = 0; i < ARRAY_SIZE(qt2160_key2code); i++) {
+ qt2160->keycodes[i] = qt2160_key2code[i];
+ __set_bit(qt2160_key2code[i], input->keybit);
+ }
+ __clear_bit(KEY_RESERVED, input->keybit);
+
+ /* Calibrate device */
+ error = qt2160_write(client, QT2160_CMD_CALIBRATE, 1);
+ if (error) {
+ dev_err(&client->dev, "failed to calibrate device\n");
+ goto err_free_mem;
+ }
+
+ if (client->irq) {
+ error = request_irq(client->irq, qt2160_irq,
+ IRQF_TRIGGER_FALLING, "qt2160", qt2160);
+ if (error) {
+ dev_err(&client->dev,
+ "failed to allocate irq %d\n", client->irq);
+ goto err_free_mem;
+ }
+ }
+
+ error = input_register_device(qt2160->input);
+ if (error) {
+ dev_err(&client->dev,
+ "Failed to register input device\n");
+ goto err_free_irq;
+ }
+
+ i2c_set_clientdata(client, qt2160);
+ qt2160_schedule_read(qt2160);
+
+ return 0;
+
+err_free_irq:
+ if (client->irq)
+ free_irq(client->irq, qt2160);
+err_free_mem:
+ input_free_device(input);
+ kfree(qt2160);
+ return error;
+}
+
+static int __devexit qt2160_remove(struct i2c_client *client)
+{
+ struct qt2160_data *qt2160 = i2c_get_clientdata(client);
+
+ /* Release IRQ so no queue will be scheduled */
+ if (client->irq)
+ free_irq(client->irq, qt2160);
+
+ cancel_delayed_work_sync(&qt2160->dwork);
+
+ input_unregister_device(qt2160->input);
+ kfree(qt2160);
+
+ i2c_set_clientdata(client, NULL);
+ return 0;
+}
+
+static struct i2c_device_id qt2160_idtable[] = {
+ { "qt2160", 0, },
+ { }
+};
+
+MODULE_DEVICE_TABLE(i2c, qt2160_idtable);
+
+static struct i2c_driver qt2160_driver = {
+ .driver = {
+ .name = "qt2160",
+ .owner = THIS_MODULE,
+ },
+
+ .id_table = qt2160_idtable,
+ .probe = qt2160_probe,
+ .remove = __devexit_p(qt2160_remove),
+};
+
+static int __init qt2160_init(void)
+{
+ return i2c_add_driver(&qt2160_driver);
+}
+module_init(qt2160_init);
+
+static void __exit qt2160_cleanup(void)
+{
+ i2c_del_driver(&qt2160_driver);
+}
+module_exit(qt2160_cleanup);
+
+MODULE_AUTHOR("Raphael Derosso Pereira <raphaelpereira@gmail.com>");
+MODULE_DESCRIPTION("Driver for AT42QT2160 Touch Sensor");
+MODULE_LICENSE("GPL");
config INPUT_WINBOND_CIR
tristate "Winbond IR remote control"
depends on X86 && PNP
+ select NEW_LEDS
select LEDS_CLASS
select BITREVERSE
help
{ 0x3169, KEY_PAUSE, },
};
-/* runs in an IRQ thread -- can (and will!) sleep */
+/*
+ * Because we communicate with the MSP430 using I2C, and all I2C calls
+ * in Linux sleep, we use a threaded IRQ handler. The IRQ itself is
+ * active low, but we go through the GPIO controller so we can trigger
+ * on falling edges and not worry about enabling/disabling the IRQ in
+ * the keypress handling path.
+ */
static irqreturn_t dm355evm_keys_irq(int irq, void *_keys)
{
struct dm355evm_keys *keys = _keys;
return IRQ_HANDLED;
}
-/*
- * Because we communicate with the MSP430 using I2C, and all I2C calls
- * in Linux sleep, we use a threaded IRQ handler. The IRQ itself is
- * active low, but we go through the GPIO controller so we can trigger
- * on falling edges and not worry about enabling/disabling the IRQ in
- * the keypress handling path.
- */
-static irqreturn_t dm355evm_keys_hardirq(int irq, void *_keys)
-{
- return IRQ_WAKE_THREAD;
-}
-
static int dm355evm_setkeycode(struct input_dev *dev, int index, int keycode)
{
u16 old_keycode;
/* REVISIT: flush the event queue? */
- status = request_threaded_irq(keys->irq,
- dm355evm_keys_hardirq, dm355evm_keys_irq,
- IRQF_TRIGGER_FALLING,
- dev_name(&pdev->dev), keys);
+ status = request_threaded_irq(keys->irq, NULL, dm355evm_keys_irq,
+ IRQF_TRIGGER_FALLING, dev_name(&pdev->dev), keys);
if (status < 0)
goto fail1;
*/
ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE);
psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
- mutex_lock(&ps2dev->cmd_mutex);
+
+ ps2_begin_command(ps2dev);
if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
goto out;
rc = 0;
out:
- mutex_unlock(&ps2dev->cmd_mutex);
+ ps2_end_command(ps2dev);
ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE);
psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
dev_dbg(&ps2dev->serio->dev, "READ REG: 0x%02x is 0x%02x (rc = %d)\n",
unsigned char v;
int rc = -1;
- mutex_lock(&ps2dev->cmd_mutex);
+ ps2_begin_command(ps2dev);
if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
goto out;
rc = 0;
out:
- mutex_unlock(&ps2dev->cmd_mutex);
+ ps2_end_command(ps2dev);
dev_dbg(&ps2dev->serio->dev, "WRITE REG: 0x%02x to 0x%02x (rc = %d)\n",
reg_addr, reg_val, rc);
return rc;
ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE);
psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
- mutex_lock(&ps2dev->cmd_mutex);
+
+ ps2_begin_command(ps2dev);
if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
goto out;
rc = 0;
out:
- mutex_unlock(&ps2dev->cmd_mutex);
+ ps2_end_command(ps2dev);
ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE);
psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
dev_dbg(&ps2dev->serio->dev, "READ PAGE REG: 0x%02x (rc = %d)\n",
unsigned char v;
int rc = -1;
- mutex_lock(&ps2dev->cmd_mutex);
+ ps2_begin_command(ps2dev);
if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
goto out;
rc = 0;
out:
- mutex_unlock(&ps2dev->cmd_mutex);
+ ps2_end_command(ps2dev);
dev_dbg(&ps2dev->serio->dev, "WRITE PAGE REG: to 0x%02x (rc = %d)\n",
reg_val, rc);
return rc;
* and the irq configuration should be set to Falling Edge Trigger
*/
/* Control IRQ / Polling option */
-static int polling_req;
+static bool polling_req;
module_param(polling_req, bool, 0444);
MODULE_PARM_DESC(polling_req, "Request Polling. Default = 0 (use irq)");
struct i2c_client *client;
struct input_dev *input;
struct delayed_work dwork;
+ spinlock_t lock;
int no_data_count;
int no_decel_param;
int reduce_report_param;
return xy_delta || gesture;
}
-static irqreturn_t synaptics_i2c_irq(int irq, void *dev_id)
+static void synaptics_i2c_reschedule_work(struct synaptics_i2c *touch,
+ unsigned long delay)
{
- struct synaptics_i2c *touch = dev_id;
+ unsigned long flags;
+
+ spin_lock_irqsave(&touch->lock, flags);
/*
- * We want to have the work run immediately but it might have
- * already been scheduled with a delay, that's why we have to
- * cancel it first.
+ * If work is already scheduled then subsequent schedules will not
+ * change the scheduled time that's why we have to cancel it first.
*/
- cancel_delayed_work(&touch->dwork);
- schedule_delayed_work(&touch->dwork, 0);
+ __cancel_delayed_work(&touch->dwork);
+ schedule_delayed_work(&touch->dwork, delay);
+
+ spin_unlock_irqrestore(&touch->lock, flags);
+}
+
+static irqreturn_t synaptics_i2c_irq(int irq, void *dev_id)
+{
+ struct synaptics_i2c *touch = dev_id;
+
+ synaptics_i2c_reschedule_work(touch, 0);
return IRQ_HANDLED;
}
* We poll the device once in THREAD_IRQ_SLEEP_SECS and
* if error is detected, we try to reset and reconfigure the touchpad.
*/
- schedule_delayed_work(&touch->dwork, delay);
+ synaptics_i2c_reschedule_work(touch, delay);
}
static int synaptics_i2c_open(struct input_dev *input)
return ret;
if (polling_req)
- schedule_delayed_work(&touch->dwork,
- msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
+ synaptics_i2c_reschedule_work(touch,
+ msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
return 0;
}
touch->scan_rate_param = scan_rate;
set_scan_rate(touch, scan_rate);
INIT_DELAYED_WORK(&touch->dwork, synaptics_i2c_work_handler);
+ spin_lock_init(&touch->lock);
return touch;
}
if (!touch)
return -ENOMEM;
- i2c_set_clientdata(client, touch);
-
ret = synaptics_i2c_reset_config(client);
if (ret)
goto err_mem_free;
if (client->irq < 1)
- polling_req = 1;
+ polling_req = true;
touch->input = input_allocate_device();
if (!touch->input) {
dev_warn(&touch->client->dev,
"IRQ request failed: %d, "
"falling back to polling\n", ret);
- polling_req = 1;
+ polling_req = true;
synaptics_i2c_reg_set(touch->client,
INTERRUPT_EN_REG, 0);
}
"Input device register failed: %d\n", ret);
goto err_input_free;
}
+
+ i2c_set_clientdata(client, touch);
+
return 0;
err_input_free:
input_free_device(touch->input);
err_mem_free:
- i2c_set_clientdata(client, NULL);
kfree(touch);
return ret;
struct synaptics_i2c *touch = i2c_get_clientdata(client);
if (!polling_req)
- free_irq(touch->client->irq, touch);
+ free_irq(client->irq, touch);
input_unregister_device(touch->input);
i2c_set_clientdata(client, NULL);
if (ret)
return ret;
- schedule_delayed_work(&touch->dwork,
- msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
+ synaptics_i2c_reschedule_work(touch,
+ msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
return 0;
}
#include "i8042.h"
+/*
+ * i8042_lock protects serialization between i8042_command and
+ * the interrupt handler.
+ */
static DEFINE_SPINLOCK(i8042_lock);
+/*
+ * Writers to AUX and KBD ports as well as users issuing i8042_command
+ * directly should acquire i8042_mutex (by means of calling
+ * i8042_lock_chip() and i8042_unlock_ship() helpers) to ensure that
+ * they do not disturb each other (unfortunately in many i8042
+ * implementations write to one of the ports will immediately abort
+ * command that is being processed by another port).
+ */
+static DEFINE_MUTEX(i8042_mutex);
+
struct i8042_port {
struct serio *serio;
int irq;
static irqreturn_t i8042_interrupt(int irq, void *dev_id);
+void i8042_lock_chip(void)
+{
+ mutex_lock(&i8042_mutex);
+}
+EXPORT_SYMBOL(i8042_lock_chip);
+
+void i8042_unlock_chip(void)
+{
+ mutex_unlock(&i8042_mutex);
+}
+EXPORT_SYMBOL(i8042_unlock_chip);
+
/*
* The i8042_wait_read() and i8042_wait_write functions wait for the i8042 to
* be ready for reading values from it / writing values to it.
}
}
+/*
+ * Checks whether port belongs to i8042 controller.
+ */
+bool i8042_check_port_owner(const struct serio *port)
+{
+ int i;
+
+ for (i = 0; i < I8042_NUM_PORTS; i++)
+ if (i8042_ports[i].serio == port)
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL(i8042_check_port_owner);
+
static void i8042_free_irqs(void)
{
if (i8042_aux_irq_registered)
#include <linux/interrupt.h>
#include <linux/input.h>
#include <linux/serio.h>
+#include <linux/i8042.h>
#include <linux/init.h>
#include <linux/libps2.h>
}
EXPORT_SYMBOL(ps2_sendbyte);
+void ps2_begin_command(struct ps2dev *ps2dev)
+{
+ mutex_lock(&ps2dev->cmd_mutex);
+
+ if (i8042_check_port_owner(ps2dev->serio))
+ i8042_lock_chip();
+}
+EXPORT_SYMBOL(ps2_begin_command);
+
+void ps2_end_command(struct ps2dev *ps2dev)
+{
+ if (i8042_check_port_owner(ps2dev->serio))
+ i8042_unlock_chip();
+
+ mutex_unlock(&ps2dev->cmd_mutex);
+}
+EXPORT_SYMBOL(ps2_end_command);
+
/*
* ps2_drain() waits for device to transmit requested number of bytes
* and discards them.
maxbytes = sizeof(ps2dev->cmdbuf);
}
- mutex_lock(&ps2dev->cmd_mutex);
+ ps2_begin_command(ps2dev);
serio_pause_rx(ps2dev->serio);
ps2dev->flags = PS2_FLAG_CMD;
wait_event_timeout(ps2dev->wait,
!(ps2dev->flags & PS2_FLAG_CMD),
msecs_to_jiffies(timeout));
- mutex_unlock(&ps2dev->cmd_mutex);
+
+ ps2_end_command(ps2dev);
}
EXPORT_SYMBOL(ps2_drain);
{
int rc;
- mutex_lock(&ps2dev->cmd_mutex);
+ ps2_begin_command(ps2dev);
rc = __ps2_command(ps2dev, param, command);
- mutex_unlock(&ps2dev->cmd_mutex);
+ ps2_end_command(ps2dev);
return rc;
}
select TOUCHSCREEN_AD7879
help
Say Y here if you have a touchscreen interface using the
- AD7879-1 controller, and your board-specific initialization
- code includes that in its table of I2C devices.
+ AD7879-1/AD7889-1 controller, and your board-specific
+ initialization code includes that in its table of I2C devices.
If unsure, say N (but it's safe to say "Y").
select TOUCHSCREEN_AD7879
help
Say Y here if you have a touchscreen interface using the
- AD7879 controller, and your board-specific initialization
+ AD7879/AD7889 controller, and your board-specific initialization
code includes that in its table of SPI devices.
If unsure, say N (but it's safe to say "Y").
To compile this driver as a module, choose M here: the
module will be called wacom_w8001.
+config TOUCHSCREEN_MCS5000
+ tristate "MELFAS MCS-5000 touchscreen"
+ depends on I2C
+ help
+ Say Y here if you have the MELFAS MCS-5000 touchscreen controller
+ chip in your system.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mcs5000_ts.
config TOUCHSCREEN_MTOUCH
tristate "MicroTouch serial touchscreens"
obj-$(CONFIG_TOUCHSCREEN_ELO) += elo.o
obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o
obj-$(CONFIG_TOUCHSCREEN_INEXIO) += inexio.o
+obj-$(CONFIG_TOUCHSCREEN_MCS5000) += mcs5000_ts.o
obj-$(CONFIG_TOUCHSCREEN_MIGOR) += migor_ts.o
obj-$(CONFIG_TOUCHSCREEN_MTOUCH) += mtouch.o
obj-$(CONFIG_TOUCHSCREEN_MK712) += mk712.o
/*
- * Copyright (C) 2008 Michael Hennerich, Analog Devices Inc.
+ * Copyright (C) 2008-2009 Michael Hennerich, Analog Devices Inc.
*
- * Description: AD7879 based touchscreen, and GPIO driver (I2C/SPI Interface)
+ * Description: AD7879/AD7889 based touchscreen, and GPIO driver
+ * (I2C/SPI Interface)
*
* Bugs: Enter bugs at http://blackfin.uclinux.org/
*
static const struct i2c_device_id ad7879_id[] = {
{ "ad7879", 0 },
+ { "ad7889", 0 },
{ }
};
MODULE_DEVICE_TABLE(i2c, ad7879_id);
--- /dev/null
+/*
+ * mcs5000_ts.c - Touchscreen driver for MELFAS MCS-5000 controller
+ *
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * Based on wm97xx-core.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/i2c/mcs5000_ts.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/irq.h>
+
+/* Registers */
+#define MCS5000_TS_STATUS 0x00
+#define STATUS_OFFSET 0
+#define STATUS_NO (0 << STATUS_OFFSET)
+#define STATUS_INIT (1 << STATUS_OFFSET)
+#define STATUS_SENSING (2 << STATUS_OFFSET)
+#define STATUS_COORD (3 << STATUS_OFFSET)
+#define STATUS_GESTURE (4 << STATUS_OFFSET)
+#define ERROR_OFFSET 4
+#define ERROR_NO (0 << ERROR_OFFSET)
+#define ERROR_POWER_ON_RESET (1 << ERROR_OFFSET)
+#define ERROR_INT_RESET (2 << ERROR_OFFSET)
+#define ERROR_EXT_RESET (3 << ERROR_OFFSET)
+#define ERROR_INVALID_REG_ADDRESS (8 << ERROR_OFFSET)
+#define ERROR_INVALID_REG_VALUE (9 << ERROR_OFFSET)
+
+#define MCS5000_TS_OP_MODE 0x01
+#define RESET_OFFSET 0
+#define RESET_NO (0 << RESET_OFFSET)
+#define RESET_EXT_SOFT (1 << RESET_OFFSET)
+#define OP_MODE_OFFSET 1
+#define OP_MODE_SLEEP (0 << OP_MODE_OFFSET)
+#define OP_MODE_ACTIVE (1 << OP_MODE_OFFSET)
+#define GESTURE_OFFSET 4
+#define GESTURE_DISABLE (0 << GESTURE_OFFSET)
+#define GESTURE_ENABLE (1 << GESTURE_OFFSET)
+#define PROXIMITY_OFFSET 5
+#define PROXIMITY_DISABLE (0 << PROXIMITY_OFFSET)
+#define PROXIMITY_ENABLE (1 << PROXIMITY_OFFSET)
+#define SCAN_MODE_OFFSET 6
+#define SCAN_MODE_INTERRUPT (0 << SCAN_MODE_OFFSET)
+#define SCAN_MODE_POLLING (1 << SCAN_MODE_OFFSET)
+#define REPORT_RATE_OFFSET 7
+#define REPORT_RATE_40 (0 << REPORT_RATE_OFFSET)
+#define REPORT_RATE_80 (1 << REPORT_RATE_OFFSET)
+
+#define MCS5000_TS_SENS_CTL 0x02
+#define MCS5000_TS_FILTER_CTL 0x03
+#define PRI_FILTER_OFFSET 0
+#define SEC_FILTER_OFFSET 4
+
+#define MCS5000_TS_X_SIZE_UPPER 0x08
+#define MCS5000_TS_X_SIZE_LOWER 0x09
+#define MCS5000_TS_Y_SIZE_UPPER 0x0A
+#define MCS5000_TS_Y_SIZE_LOWER 0x0B
+
+#define MCS5000_TS_INPUT_INFO 0x10
+#define INPUT_TYPE_OFFSET 0
+#define INPUT_TYPE_NONTOUCH (0 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_SINGLE (1 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_DUAL (2 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_PALM (3 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_PROXIMITY (7 << INPUT_TYPE_OFFSET)
+#define GESTURE_CODE_OFFSET 3
+#define GESTURE_CODE_NO (0 << GESTURE_CODE_OFFSET)
+
+#define MCS5000_TS_X_POS_UPPER 0x11
+#define MCS5000_TS_X_POS_LOWER 0x12
+#define MCS5000_TS_Y_POS_UPPER 0x13
+#define MCS5000_TS_Y_POS_LOWER 0x14
+#define MCS5000_TS_Z_POS 0x15
+#define MCS5000_TS_WIDTH 0x16
+#define MCS5000_TS_GESTURE_VAL 0x17
+#define MCS5000_TS_MODULE_REV 0x20
+#define MCS5000_TS_FIRMWARE_VER 0x21
+
+/* Touchscreen absolute values */
+#define MCS5000_MAX_XC 0x3ff
+#define MCS5000_MAX_YC 0x3ff
+
+enum mcs5000_ts_read_offset {
+ READ_INPUT_INFO,
+ READ_X_POS_UPPER,
+ READ_X_POS_LOWER,
+ READ_Y_POS_UPPER,
+ READ_Y_POS_LOWER,
+ READ_BLOCK_SIZE,
+};
+
+/* Each client has this additional data */
+struct mcs5000_ts_data {
+ struct i2c_client *client;
+ struct input_dev *input_dev;
+ const struct mcs5000_ts_platform_data *platform_data;
+};
+
+static irqreturn_t mcs5000_ts_interrupt(int irq, void *dev_id)
+{
+ struct mcs5000_ts_data *data = dev_id;
+ struct i2c_client *client = data->client;
+ u8 buffer[READ_BLOCK_SIZE];
+ int err;
+ int x;
+ int y;
+
+ err = i2c_smbus_read_i2c_block_data(client, MCS5000_TS_INPUT_INFO,
+ READ_BLOCK_SIZE, buffer);
+ if (err < 0) {
+ dev_err(&client->dev, "%s, err[%d]\n", __func__, err);
+ goto out;
+ }
+
+ switch (buffer[READ_INPUT_INFO]) {
+ case INPUT_TYPE_NONTOUCH:
+ input_report_key(data->input_dev, BTN_TOUCH, 0);
+ input_sync(data->input_dev);
+ break;
+
+ case INPUT_TYPE_SINGLE:
+ x = (buffer[READ_X_POS_UPPER] << 8) | buffer[READ_X_POS_LOWER];
+ y = (buffer[READ_Y_POS_UPPER] << 8) | buffer[READ_Y_POS_LOWER];
+
+ input_report_key(data->input_dev, BTN_TOUCH, 1);
+ input_report_abs(data->input_dev, ABS_X, x);
+ input_report_abs(data->input_dev, ABS_Y, y);
+ input_sync(data->input_dev);
+ break;
+
+ case INPUT_TYPE_DUAL:
+ /* TODO */
+ break;
+
+ case INPUT_TYPE_PALM:
+ /* TODO */
+ break;
+
+ case INPUT_TYPE_PROXIMITY:
+ /* TODO */
+ break;
+
+ default:
+ dev_err(&client->dev, "Unknown ts input type %d\n",
+ buffer[READ_INPUT_INFO]);
+ break;
+ }
+
+ out:
+ return IRQ_HANDLED;
+}
+
+static void mcs5000_ts_phys_init(struct mcs5000_ts_data *data)
+{
+ const struct mcs5000_ts_platform_data *platform_data =
+ data->platform_data;
+ struct i2c_client *client = data->client;
+
+ /* Touch reset & sleep mode */
+ i2c_smbus_write_byte_data(client, MCS5000_TS_OP_MODE,
+ RESET_EXT_SOFT | OP_MODE_SLEEP);
+
+ /* Touch size */
+ i2c_smbus_write_byte_data(client, MCS5000_TS_X_SIZE_UPPER,
+ platform_data->x_size >> 8);
+ i2c_smbus_write_byte_data(client, MCS5000_TS_X_SIZE_LOWER,
+ platform_data->x_size & 0xff);
+ i2c_smbus_write_byte_data(client, MCS5000_TS_Y_SIZE_UPPER,
+ platform_data->y_size >> 8);
+ i2c_smbus_write_byte_data(client, MCS5000_TS_Y_SIZE_LOWER,
+ platform_data->y_size & 0xff);
+
+ /* Touch active mode & 80 report rate */
+ i2c_smbus_write_byte_data(data->client, MCS5000_TS_OP_MODE,
+ OP_MODE_ACTIVE | REPORT_RATE_80);
+}
+
+static int __devinit mcs5000_ts_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct mcs5000_ts_data *data;
+ struct input_dev *input_dev;
+ int ret;
+
+ if (!client->dev.platform_data)
+ return -EINVAL;
+
+ data = kzalloc(sizeof(struct mcs5000_ts_data), GFP_KERNEL);
+ input_dev = input_allocate_device();
+ if (!data || !input_dev) {
+ dev_err(&client->dev, "Failed to allocate memory\n");
+ ret = -ENOMEM;
+ goto err_free_mem;
+ }
+
+ data->client = client;
+ data->input_dev = input_dev;
+ data->platform_data = client->dev.platform_data;
+
+ input_dev->name = "MELPAS MCS-5000 Touchscreen";
+ input_dev->id.bustype = BUS_I2C;
+ input_dev->dev.parent = &client->dev;
+
+ __set_bit(EV_ABS, input_dev->evbit);
+ __set_bit(EV_KEY, input_dev->evbit);
+ __set_bit(BTN_TOUCH, input_dev->keybit);
+ input_set_abs_params(input_dev, ABS_X, 0, MCS5000_MAX_XC, 0, 0);
+ input_set_abs_params(input_dev, ABS_Y, 0, MCS5000_MAX_YC, 0, 0);
+
+ input_set_drvdata(input_dev, data);
+
+ if (data->platform_data->cfg_pin)
+ data->platform_data->cfg_pin();
+
+ ret = request_threaded_irq(client->irq, NULL, mcs5000_ts_interrupt,
+ IRQF_TRIGGER_LOW | IRQF_ONESHOT, "mcs5000_ts", data);
+
+ if (ret < 0) {
+ dev_err(&client->dev, "Failed to register interrupt\n");
+ goto err_free_mem;
+ }
+
+ ret = input_register_device(data->input_dev);
+ if (ret < 0)
+ goto err_free_irq;
+
+ mcs5000_ts_phys_init(data);
+ i2c_set_clientdata(client, data);
+
+ return 0;
+
+err_free_irq:
+ free_irq(client->irq, data);
+err_free_mem:
+ input_free_device(input_dev);
+ kfree(data);
+ return ret;
+}
+
+static int __devexit mcs5000_ts_remove(struct i2c_client *client)
+{
+ struct mcs5000_ts_data *data = i2c_get_clientdata(client);
+
+ free_irq(client->irq, data);
+ input_unregister_device(data->input_dev);
+ kfree(data);
+ i2c_set_clientdata(client, NULL);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int mcs5000_ts_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+ /* Touch sleep mode */
+ i2c_smbus_write_byte_data(client, MCS5000_TS_OP_MODE, OP_MODE_SLEEP);
+
+ return 0;
+}
+
+static int mcs5000_ts_resume(struct i2c_client *client)
+{
+ struct mcs5000_ts_data *data = i2c_get_clientdata(client);
+
+ mcs5000_ts_phys_init(data);
+
+ return 0;
+}
+#else
+#define mcs5000_ts_suspend NULL
+#define mcs5000_ts_resume NULL
+#endif
+
+static const struct i2c_device_id mcs5000_ts_id[] = {
+ { "mcs5000_ts", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, mcs5000_ts_id);
+
+static struct i2c_driver mcs5000_ts_driver = {
+ .probe = mcs5000_ts_probe,
+ .remove = __devexit_p(mcs5000_ts_remove),
+ .suspend = mcs5000_ts_suspend,
+ .resume = mcs5000_ts_resume,
+ .driver = {
+ .name = "mcs5000_ts",
+ },
+ .id_table = mcs5000_ts_id,
+};
+
+static int __init mcs5000_ts_init(void)
+{
+ return i2c_add_driver(&mcs5000_ts_driver);
+}
+
+static void __exit mcs5000_ts_exit(void)
+{
+ i2c_del_driver(&mcs5000_ts_driver);
+}
+
+module_init(mcs5000_ts_init);
+module_exit(mcs5000_ts_exit);
+
+/* Module information */
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_DESCRIPTION("Touchscreen driver for MELFAS MCS-5000 controller");
+MODULE_LICENSE("GPL");
static void clevo_mail_led_set(struct led_classdev *led_cdev,
enum led_brightness value)
{
+ i8042_lock_chip();
+
if (value == LED_OFF)
i8042_command(NULL, CLEVO_MAIL_LED_OFF);
else if (value <= LED_HALF)
else
i8042_command(NULL, CLEVO_MAIL_LED_BLINK_1HZ);
+ i8042_unlock_chip();
+
}
static int clevo_mail_led_blink(struct led_classdev *led_cdev,
{
int status = -EINVAL;
+ i8042_lock_chip();
+
if (*delay_on == 0 /* ms */ && *delay_off == 0 /* ms */) {
/* Special case: the leds subsystem requested us to
* chose one user friendly blinking of the LED, and
*delay_on, *delay_off);
}
+ i8042_unlock_chip();
+
return status;
}
select MD_RAID6_PQ
select ASYNC_MEMCPY
select ASYNC_XOR
+ select ASYNC_PQ
+ select ASYNC_RAID6_RECOV
---help---
A RAID-5 set of N drives with a capacity of C MB per drive provides
the capacity of C * (N - 1) MB, and protects against a failure
If unsure, say Y.
+config MULTICORE_RAID456
+ bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
+ depends on MD_RAID456
+ depends on SMP
+ depends on EXPERIMENTAL
+ ---help---
+ Enable the raid456 module to dispatch per-stripe raid operations to a
+ thread pool.
+
+ If unsure, say N.
+
config MD_RAID6_PQ
tristate
+config ASYNC_RAID6_TEST
+ tristate "Self test for hardware accelerated raid6 recovery"
+ depends on MD_RAID6_PQ
+ select ASYNC_RAID6_RECOV
+ ---help---
+ This is a one-shot self test that permutes through the
+ recovery of all the possible two disk failure scenarios for a
+ N-disk array. Recovery is performed with the asynchronous
+ raid6 recovery routines, and will optionally use an offload
+ engine if one is available.
+
+ If unsure, say N.
+
config MD_MULTIPATH
tristate "Multipath I/O support"
depends on BLK_DEV_MD
* allocated while we're using it
*/
static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create)
+__releases(bitmap->lock)
+__acquires(bitmap->lock)
{
unsigned char *mappage;
return 0;
bad_alignment:
- rcu_read_unlock();
return -EINVAL;
}
static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
sector_t offset, int *blocks,
int create)
+__releases(bitmap->lock)
+__acquires(bitmap->lock)
{
/* If 'create', we might release the lock and reclaim it.
* The lock must have been taken with interrupts enabled.
linear_conf_t *conf;
int i, ret = 0;
+ if (mddev_congested(mddev, bits))
+ return 1;
+
rcu_read_lock();
conf = rcu_dereference(mddev->private);
mddev->pers->quiesce(mddev, 0);
}
+int mddev_congested(mddev_t *mddev, int bits)
+{
+ return mddev->suspended;
+}
+EXPORT_SYMBOL(mddev_congested);
+
static inline mddev_t *mddev_get(mddev_t *mddev)
{
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync,
mddev,
- "%s_resync");
+ "resync");
if (!mddev->sync_thread) {
printk(KERN_ERR "%s: could not start resync"
" thread...\n",
static int get_array_info(mddev_t * mddev, void __user * arg)
{
mdu_array_info_t info;
- int nr,working,active,failed,spare;
+ int nr,working,insync,failed,spare;
mdk_rdev_t *rdev;
- nr=working=active=failed=spare=0;
+ nr=working=insync=failed=spare=0;
list_for_each_entry(rdev, &mddev->disks, same_set) {
nr++;
if (test_bit(Faulty, &rdev->flags))
else {
working++;
if (test_bit(In_sync, &rdev->flags))
- active++;
+ insync++;
else
spare++;
}
info.state = (1<<MD_SB_CLEAN);
if (mddev->bitmap && mddev->bitmap_offset)
info.state = (1<<MD_SB_BITMAP_PRESENT);
- info.active_disks = active;
+ info.active_disks = insync;
info.working_disks = working;
info.failed_disks = failed;
info.spare_disks = spare;
if (!list_empty(&mddev->disks)) {
mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
mdk_rdev_t, same_set);
- int err = super_types[mddev->major_version]
+ err = super_types[mddev->major_version]
.load_super(rdev, rdev0, mddev->minor_version);
if (err < 0) {
printk(KERN_WARNING
thread->run = run;
thread->mddev = mddev;
thread->timeout = MAX_SCHEDULE_TIMEOUT;
- thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev));
+ thread->tsk = kthread_run(md_thread, thread,
+ "%s_%s",
+ mdname(thread->mddev),
+ name ?: mddev->pers->name);
if (IS_ERR(thread->tsk)) {
kfree(thread);
return NULL;
}
mddev->sync_thread = md_register_thread(md_do_sync,
mddev,
- "%s_resync");
+ "resync");
if (!mddev->sync_thread) {
printk(KERN_ERR "%s: could not start resync"
" thread...\n",
extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
+extern int mddev_congested(mddev_t *mddev, int bits);
extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
sector_t sector, int size, struct page *page);
extern void md_super_wait(mddev_t *mddev);
}
mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
- memset(mp_bh, 0, sizeof(*mp_bh));
mp_bh->master_bio = bio;
mp_bh->mddev = mddev;
multipath_conf_t *conf = mddev->private;
int i, ret = 0;
+ if (mddev_congested(mddev, bits))
+ return 1;
+
rcu_read_lock();
for (i = 0; i < mddev->raid_disks ; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
}
{
- mddev->thread = md_register_thread(multipathd, mddev, "%s_multipath");
+ mddev->thread = md_register_thread(multipathd, mddev, NULL);
if (!mddev->thread) {
printk(KERN_ERR "multipath: couldn't allocate thread"
" for %s\n", mdname(mddev));
mdk_rdev_t **devlist = conf->devlist;
int i, ret = 0;
+ if (mddev_congested(mddev, bits))
+ return 1;
+
for (i = 0; i < mddev->raid_disks && !ret ; i++) {
struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
static int create_strip_zones(mddev_t *mddev)
{
- int i, c, j, err;
+ int i, c, err;
sector_t curr_zone_end, sectors;
mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev;
struct strip_zone *zone;
/* now do the other zones */
for (i = 1; i < conf->nr_strip_zones; i++)
{
+ int j;
+
zone = conf->strip_zone + i;
dev = conf->devlist + i * mddev->raid_disks;
c = 0;
for (j=0; j<cnt; j++) {
- char b[BDEVNAME_SIZE];
rdev = conf->devlist[j];
printk(KERN_INFO "raid0: checking %s ...",
bdevname(rdev->bdev, b));
conf_t *conf = mddev->private;
int i, ret = 0;
+ if (mddev_congested(mddev, bits))
+ return 1;
+
rcu_read_lock();
for (i = 0; i < mddev->raid_disks; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
read_bio->bi_bdev = mirror->rdev->bdev;
read_bio->bi_end_io = raid1_end_read_request;
- read_bio->bi_rw = READ | do_sync;
+ read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
read_bio->bi_private = r1_bio;
generic_make_request(read_bio);
mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
mbio->bi_end_io = raid1_end_write_request;
- mbio->bi_rw = WRITE | do_barriers | do_sync;
+ mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) |
+ (do_sync << BIO_RW_SYNCIO);
mbio->bi_private = r1_bio;
if (behind_pages) {
conf->mirrors[i].rdev->data_offset;
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
bio->bi_end_io = raid1_end_write_request;
- bio->bi_rw = WRITE | do_sync;
+ bio->bi_rw = WRITE |
+ (do_sync << BIO_RW_SYNCIO);
bio->bi_private = r1_bio;
r1_bio->bios[i] = bio;
generic_make_request(bio);
bio->bi_sector = r1_bio->sector + rdev->data_offset;
bio->bi_bdev = rdev->bdev;
bio->bi_end_io = raid1_end_read_request;
- bio->bi_rw = READ | do_sync;
+ bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
bio->bi_private = r1_bio;
unplug = 1;
generic_make_request(bio);
conf->last_used = j;
- mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1");
+ mddev->thread = md_register_thread(raid1d, mddev, NULL);
if (!mddev->thread) {
printk(KERN_ERR
"raid1: couldn't allocate thread for %s\n",
conf_t *conf = mddev->private;
int i, ret = 0;
+ if (mddev_congested(mddev, bits))
+ return 1;
rcu_read_lock();
for (i = 0; i < mddev->raid_disks && ret == 0; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
mirror->rdev->data_offset;
read_bio->bi_bdev = mirror->rdev->bdev;
read_bio->bi_end_io = raid10_end_read_request;
- read_bio->bi_rw = READ | do_sync;
+ read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
read_bio->bi_private = r10_bio;
generic_make_request(read_bio);
conf->mirrors[d].rdev->data_offset;
mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
- mbio->bi_rw = WRITE | do_sync;
+ mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO);
mbio->bi_private = r10_bio;
atomic_inc(&r10_bio->remaining);
bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
+ rdev->data_offset;
bio->bi_bdev = rdev->bdev;
- bio->bi_rw = READ | do_sync;
+ bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
bio->bi_private = r10_bio;
bio->bi_end_io = raid10_end_read_request;
unplug = 1;
max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
/* recovery... the complicated one */
- int i, j, k;
+ int j, k;
r10_bio = NULL;
for (i=0 ; i<conf->raid_disks; i++)
}
- mddev->thread = md_register_thread(raid10d, mddev, "%s_raid10");
+ mddev->thread = md_register_thread(raid10d, mddev, NULL);
if (!mddev->thread) {
printk(KERN_ERR
"raid10: couldn't allocate thread for %s\n",
#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
+#include <linux/async.h>
#include <linux/seq_file.h>
+#include <linux/cpu.h>
#include "md.h"
#include "raid5.h"
#include "bitmap.h"
struct page *bio_page;
int i;
int page_offset;
+ struct async_submit_ctl submit;
+ enum async_tx_flags flags = 0;
if (bio->bi_sector >= sector)
page_offset = (signed)(bio->bi_sector - sector) * 512;
else
page_offset = (signed)(sector - bio->bi_sector) * -512;
+
+ if (frombio)
+ flags |= ASYNC_TX_FENCE;
+ init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
+
bio_for_each_segment(bvl, bio, i) {
int len = bio_iovec_idx(bio, i)->bv_len;
int clen;
bio_page = bio_iovec_idx(bio, i)->bv_page;
if (frombio)
tx = async_memcpy(page, bio_page, page_offset,
- b_offset, clen,
- ASYNC_TX_DEP_ACK,
- tx, NULL, NULL);
+ b_offset, clen, &submit);
else
tx = async_memcpy(bio_page, page, b_offset,
- page_offset, clen,
- ASYNC_TX_DEP_ACK,
- tx, NULL, NULL);
+ page_offset, clen, &submit);
}
+ /* chain the operations */
+ submit.depend_tx = tx;
+
if (clen < len) /* hit end of page */
break;
page_offset += len;
{
struct dma_async_tx_descriptor *tx = NULL;
raid5_conf_t *conf = sh->raid_conf;
+ struct async_submit_ctl submit;
int i;
pr_debug("%s: stripe %llu\n", __func__,
}
atomic_inc(&sh->count);
- async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
- ops_complete_biofill, sh);
+ init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
+ async_trigger_callback(&submit);
}
-static void ops_complete_compute5(void *stripe_head_ref)
+static void mark_target_uptodate(struct stripe_head *sh, int target)
{
- struct stripe_head *sh = stripe_head_ref;
- int target = sh->ops.target;
- struct r5dev *tgt = &sh->dev[target];
+ struct r5dev *tgt;
- pr_debug("%s: stripe %llu\n", __func__,
- (unsigned long long)sh->sector);
+ if (target < 0)
+ return;
+ tgt = &sh->dev[target];
set_bit(R5_UPTODATE, &tgt->flags);
BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
clear_bit(R5_Wantcompute, &tgt->flags);
+}
+
+static void ops_complete_compute(void *stripe_head_ref)
+{
+ struct stripe_head *sh = stripe_head_ref;
+
+ pr_debug("%s: stripe %llu\n", __func__,
+ (unsigned long long)sh->sector);
+
+ /* mark the computed target(s) as uptodate */
+ mark_target_uptodate(sh, sh->ops.target);
+ mark_target_uptodate(sh, sh->ops.target2);
+
clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
if (sh->check_state == check_state_compute_run)
sh->check_state = check_state_compute_result;
release_stripe(sh);
}
-static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
+/* return a pointer to the address conversion region of the scribble buffer */
+static addr_conv_t *to_addr_conv(struct stripe_head *sh,
+ struct raid5_percpu *percpu)
+{
+ return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
+}
+
+static struct dma_async_tx_descriptor *
+ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
{
- /* kernel stack size limits the total number of disks */
int disks = sh->disks;
- struct page *xor_srcs[disks];
+ struct page **xor_srcs = percpu->scribble;
int target = sh->ops.target;
struct r5dev *tgt = &sh->dev[target];
struct page *xor_dest = tgt->page;
int count = 0;
struct dma_async_tx_descriptor *tx;
+ struct async_submit_ctl submit;
int i;
pr_debug("%s: stripe %llu block: %d\n",
atomic_inc(&sh->count);
+ init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
+ ops_complete_compute, sh, to_addr_conv(sh, percpu));
if (unlikely(count == 1))
- tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
- 0, NULL, ops_complete_compute5, sh);
+ tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
else
- tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- ASYNC_TX_XOR_ZERO_DST, NULL,
- ops_complete_compute5, sh);
+ tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
return tx;
}
+/* set_syndrome_sources - populate source buffers for gen_syndrome
+ * @srcs - (struct page *) array of size sh->disks
+ * @sh - stripe_head to parse
+ *
+ * Populates srcs in proper layout order for the stripe and returns the
+ * 'count' of sources to be used in a call to async_gen_syndrome. The P
+ * destination buffer is recorded in srcs[count] and the Q destination
+ * is recorded in srcs[count+1]].
+ */
+static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
+{
+ int disks = sh->disks;
+ int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
+ int d0_idx = raid6_d0(sh);
+ int count;
+ int i;
+
+ for (i = 0; i < disks; i++)
+ srcs[i] = (void *)raid6_empty_zero_page;
+
+ count = 0;
+ i = d0_idx;
+ do {
+ int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+
+ srcs[slot] = sh->dev[i].page;
+ i = raid6_next_disk(i, disks);
+ } while (i != d0_idx);
+ BUG_ON(count != syndrome_disks);
+
+ return count;
+}
+
+static struct dma_async_tx_descriptor *
+ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
+{
+ int disks = sh->disks;
+ struct page **blocks = percpu->scribble;
+ int target;
+ int qd_idx = sh->qd_idx;
+ struct dma_async_tx_descriptor *tx;
+ struct async_submit_ctl submit;
+ struct r5dev *tgt;
+ struct page *dest;
+ int i;
+ int count;
+
+ if (sh->ops.target < 0)
+ target = sh->ops.target2;
+ else if (sh->ops.target2 < 0)
+ target = sh->ops.target;
+ else
+ /* we should only have one valid target */
+ BUG();
+ BUG_ON(target < 0);
+ pr_debug("%s: stripe %llu block: %d\n",
+ __func__, (unsigned long long)sh->sector, target);
+
+ tgt = &sh->dev[target];
+ BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
+ dest = tgt->page;
+
+ atomic_inc(&sh->count);
+
+ if (target == qd_idx) {
+ count = set_syndrome_sources(blocks, sh);
+ blocks[count] = NULL; /* regenerating p is not necessary */
+ BUG_ON(blocks[count+1] != dest); /* q should already be set */
+ init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+ ops_complete_compute, sh,
+ to_addr_conv(sh, percpu));
+ tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
+ } else {
+ /* Compute any data- or p-drive using XOR */
+ count = 0;
+ for (i = disks; i-- ; ) {
+ if (i == target || i == qd_idx)
+ continue;
+ blocks[count++] = sh->dev[i].page;
+ }
+
+ init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+ NULL, ops_complete_compute, sh,
+ to_addr_conv(sh, percpu));
+ tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
+ }
+
+ return tx;
+}
+
+static struct dma_async_tx_descriptor *
+ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
+{
+ int i, count, disks = sh->disks;
+ int syndrome_disks = sh->ddf_layout ? disks : disks-2;
+ int d0_idx = raid6_d0(sh);
+ int faila = -1, failb = -1;
+ int target = sh->ops.target;
+ int target2 = sh->ops.target2;
+ struct r5dev *tgt = &sh->dev[target];
+ struct r5dev *tgt2 = &sh->dev[target2];
+ struct dma_async_tx_descriptor *tx;
+ struct page **blocks = percpu->scribble;
+ struct async_submit_ctl submit;
+
+ pr_debug("%s: stripe %llu block1: %d block2: %d\n",
+ __func__, (unsigned long long)sh->sector, target, target2);
+ BUG_ON(target < 0 || target2 < 0);
+ BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
+ BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
+
+ /* we need to open-code set_syndrome_sources to handle the
+ * slot number conversion for 'faila' and 'failb'
+ */
+ for (i = 0; i < disks ; i++)
+ blocks[i] = (void *)raid6_empty_zero_page;
+ count = 0;
+ i = d0_idx;
+ do {
+ int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+
+ blocks[slot] = sh->dev[i].page;
+
+ if (i == target)
+ faila = slot;
+ if (i == target2)
+ failb = slot;
+ i = raid6_next_disk(i, disks);
+ } while (i != d0_idx);
+ BUG_ON(count != syndrome_disks);
+
+ BUG_ON(faila == failb);
+ if (failb < faila)
+ swap(faila, failb);
+ pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
+ __func__, (unsigned long long)sh->sector, faila, failb);
+
+ atomic_inc(&sh->count);
+
+ if (failb == syndrome_disks+1) {
+ /* Q disk is one of the missing disks */
+ if (faila == syndrome_disks) {
+ /* Missing P+Q, just recompute */
+ init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+ ops_complete_compute, sh,
+ to_addr_conv(sh, percpu));
+ return async_gen_syndrome(blocks, 0, count+2,
+ STRIPE_SIZE, &submit);
+ } else {
+ struct page *dest;
+ int data_target;
+ int qd_idx = sh->qd_idx;
+
+ /* Missing D+Q: recompute D from P, then recompute Q */
+ if (target == qd_idx)
+ data_target = target2;
+ else
+ data_target = target;
+
+ count = 0;
+ for (i = disks; i-- ; ) {
+ if (i == data_target || i == qd_idx)
+ continue;
+ blocks[count++] = sh->dev[i].page;
+ }
+ dest = sh->dev[data_target].page;
+ init_async_submit(&submit,
+ ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+ NULL, NULL, NULL,
+ to_addr_conv(sh, percpu));
+ tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
+ &submit);
+
+ count = set_syndrome_sources(blocks, sh);
+ init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+ ops_complete_compute, sh,
+ to_addr_conv(sh, percpu));
+ return async_gen_syndrome(blocks, 0, count+2,
+ STRIPE_SIZE, &submit);
+ }
+ } else {
+ init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+ ops_complete_compute, sh,
+ to_addr_conv(sh, percpu));
+ if (failb == syndrome_disks) {
+ /* We're missing D+P. */
+ return async_raid6_datap_recov(syndrome_disks+2,
+ STRIPE_SIZE, faila,
+ blocks, &submit);
+ } else {
+ /* We're missing D+D. */
+ return async_raid6_2data_recov(syndrome_disks+2,
+ STRIPE_SIZE, faila, failb,
+ blocks, &submit);
+ }
+ }
+}
+
+
static void ops_complete_prexor(void *stripe_head_ref)
{
struct stripe_head *sh = stripe_head_ref;
}
static struct dma_async_tx_descriptor *
-ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
+ struct dma_async_tx_descriptor *tx)
{
- /* kernel stack size limits the total number of disks */
int disks = sh->disks;
- struct page *xor_srcs[disks];
+ struct page **xor_srcs = percpu->scribble;
int count = 0, pd_idx = sh->pd_idx, i;
+ struct async_submit_ctl submit;
/* existing parity data subtracted */
struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
xor_srcs[count++] = dev->page;
}
- tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,
- ops_complete_prexor, sh);
+ init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ ops_complete_prexor, sh, to_addr_conv(sh, percpu));
+ tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
return tx;
}
return tx;
}
-static void ops_complete_postxor(void *stripe_head_ref)
+static void ops_complete_reconstruct(void *stripe_head_ref)
{
struct stripe_head *sh = stripe_head_ref;
- int disks = sh->disks, i, pd_idx = sh->pd_idx;
+ int disks = sh->disks;
+ int pd_idx = sh->pd_idx;
+ int qd_idx = sh->qd_idx;
+ int i;
pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
- if (dev->written || i == pd_idx)
+
+ if (dev->written || i == pd_idx || i == qd_idx)
set_bit(R5_UPTODATE, &dev->flags);
}
}
static void
-ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
+ struct dma_async_tx_descriptor *tx)
{
- /* kernel stack size limits the total number of disks */
int disks = sh->disks;
- struct page *xor_srcs[disks];
-
+ struct page **xor_srcs = percpu->scribble;
+ struct async_submit_ctl submit;
int count = 0, pd_idx = sh->pd_idx, i;
struct page *xor_dest;
int prexor = 0;
* set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
* for the synchronous xor case
*/
- flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |
+ flags = ASYNC_TX_ACK |
(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
atomic_inc(&sh->count);
- if (unlikely(count == 1)) {
- flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
- tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
- flags, tx, ops_complete_postxor, sh);
- } else
- tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- flags, tx, ops_complete_postxor, sh);
+ init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
+ to_addr_conv(sh, percpu));
+ if (unlikely(count == 1))
+ tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+ else
+ tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+}
+
+static void
+ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
+ struct dma_async_tx_descriptor *tx)
+{
+ struct async_submit_ctl submit;
+ struct page **blocks = percpu->scribble;
+ int count;
+
+ pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
+
+ count = set_syndrome_sources(blocks, sh);
+
+ atomic_inc(&sh->count);
+
+ init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
+ sh, to_addr_conv(sh, percpu));
+ async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
}
static void ops_complete_check(void *stripe_head_ref)
release_stripe(sh);
}
-static void ops_run_check(struct stripe_head *sh)
+static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
{
- /* kernel stack size limits the total number of disks */
int disks = sh->disks;
- struct page *xor_srcs[disks];
+ int pd_idx = sh->pd_idx;
+ int qd_idx = sh->qd_idx;
+ struct page *xor_dest;
+ struct page **xor_srcs = percpu->scribble;
struct dma_async_tx_descriptor *tx;
-
- int count = 0, pd_idx = sh->pd_idx, i;
- struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
+ struct async_submit_ctl submit;
+ int count;
+ int i;
pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
+ count = 0;
+ xor_dest = sh->dev[pd_idx].page;
+ xor_srcs[count++] = xor_dest;
for (i = disks; i--; ) {
- struct r5dev *dev = &sh->dev[i];
- if (i != pd_idx)
- xor_srcs[count++] = dev->page;
+ if (i == pd_idx || i == qd_idx)
+ continue;
+ xor_srcs[count++] = sh->dev[i].page;
}
- tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- &sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
+ init_async_submit(&submit, 0, NULL, NULL, NULL,
+ to_addr_conv(sh, percpu));
+ tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
+ &sh->ops.zero_sum_result, &submit);
+
+ atomic_inc(&sh->count);
+ init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
+ tx = async_trigger_callback(&submit);
+}
+
+static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
+{
+ struct page **srcs = percpu->scribble;
+ struct async_submit_ctl submit;
+ int count;
+
+ pr_debug("%s: stripe %llu checkp: %d\n", __func__,
+ (unsigned long long)sh->sector, checkp);
+
+ count = set_syndrome_sources(srcs, sh);
+ if (!checkp)
+ srcs[count] = NULL;
atomic_inc(&sh->count);
- tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
- ops_complete_check, sh);
+ init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
+ sh, to_addr_conv(sh, percpu));
+ async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
+ &sh->ops.zero_sum_result, percpu->spare_page, &submit);
}
-static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
+static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
{
int overlap_clear = 0, i, disks = sh->disks;
struct dma_async_tx_descriptor *tx = NULL;
+ raid5_conf_t *conf = sh->raid_conf;
+ int level = conf->level;
+ struct raid5_percpu *percpu;
+ unsigned long cpu;
+ cpu = get_cpu();
+ percpu = per_cpu_ptr(conf->percpu, cpu);
if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
ops_run_biofill(sh);
overlap_clear++;
}
if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
- tx = ops_run_compute5(sh);
- /* terminate the chain if postxor is not set to be run */
- if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
+ if (level < 6)
+ tx = ops_run_compute5(sh, percpu);
+ else {
+ if (sh->ops.target2 < 0 || sh->ops.target < 0)
+ tx = ops_run_compute6_1(sh, percpu);
+ else
+ tx = ops_run_compute6_2(sh, percpu);
+ }
+ /* terminate the chain if reconstruct is not set to be run */
+ if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
async_tx_ack(tx);
}
if (test_bit(STRIPE_OP_PREXOR, &ops_request))
- tx = ops_run_prexor(sh, tx);
+ tx = ops_run_prexor(sh, percpu, tx);
if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
tx = ops_run_biodrain(sh, tx);
overlap_clear++;
}
- if (test_bit(STRIPE_OP_POSTXOR, &ops_request))
- ops_run_postxor(sh, tx);
+ if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
+ if (level < 6)
+ ops_run_reconstruct5(sh, percpu, tx);
+ else
+ ops_run_reconstruct6(sh, percpu, tx);
+ }
- if (test_bit(STRIPE_OP_CHECK, &ops_request))
- ops_run_check(sh);
+ if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
+ if (sh->check_state == check_state_run)
+ ops_run_check_p(sh, percpu);
+ else if (sh->check_state == check_state_run_q)
+ ops_run_check_pq(sh, percpu, 0);
+ else if (sh->check_state == check_state_run_pq)
+ ops_run_check_pq(sh, percpu, 1);
+ else
+ BUG();
+ }
if (overlap_clear)
for (i = disks; i--; ) {
if (test_and_clear_bit(R5_Overlap, &dev->flags))
wake_up(&sh->raid_conf->wait_for_overlap);
}
+ put_cpu();
}
static int grow_one_stripe(raid5_conf_t *conf)
return 0;
}
+/**
+ * scribble_len - return the required size of the scribble region
+ * @num - total number of disks in the array
+ *
+ * The size must be enough to contain:
+ * 1/ a struct page pointer for each device in the array +2
+ * 2/ room to convert each entry in (1) to its corresponding dma
+ * (dma_map_page()) or page (page_address()) address.
+ *
+ * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
+ * calculate over all devices (not just the data blocks), using zeros in place
+ * of the P and Q blocks.
+ */
+static size_t scribble_len(int num)
+{
+ size_t len;
+
+ len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
+
+ return len;
+}
+
static int resize_stripes(raid5_conf_t *conf, int newsize)
{
/* Make all the stripes able to hold 'newsize' devices.
struct stripe_head *osh, *nsh;
LIST_HEAD(newstripes);
struct disk_info *ndisks;
+ unsigned long cpu;
int err;
struct kmem_cache *sc;
int i;
/* Step 3.
* At this point, we are holding all the stripes so the array
* is completely stalled, so now is a good time to resize
- * conf->disks.
+ * conf->disks and the scribble region
*/
ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
if (ndisks) {
} else
err = -ENOMEM;
+ get_online_cpus();
+ conf->scribble_len = scribble_len(newsize);
+ for_each_present_cpu(cpu) {
+ struct raid5_percpu *percpu;
+ void *scribble;
+
+ percpu = per_cpu_ptr(conf->percpu, cpu);
+ scribble = kmalloc(conf->scribble_len, GFP_NOIO);
+
+ if (scribble) {
+ kfree(percpu->scribble);
+ percpu->scribble = scribble;
+ } else {
+ err = -ENOMEM;
+ break;
+ }
+ }
+ put_online_cpus();
+
/* Step 4, return new stripes to service */
while(!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
list_del_init(&nsh->lru);
+
for (i=conf->raid_disks; i < newsize; i++)
if (nsh->dev[i].page == NULL) {
struct page *p = alloc_page(GFP_NOIO);
}
-
-/*
- * Copy data between a page in the stripe cache, and one or more bion
- * The page could align with the middle of the bio, or there could be
- * several bion, each with several bio_vecs, which cover part of the page
- * Multiple bion are linked together on bi_next. There may be extras
- * at the end of this list. We ignore them.
- */
-static void copy_data(int frombio, struct bio *bio,
- struct page *page,
- sector_t sector)
-{
- char *pa = page_address(page);
- struct bio_vec *bvl;
- int i;
- int page_offset;
-
- if (bio->bi_sector >= sector)
- page_offset = (signed)(bio->bi_sector - sector) * 512;
- else
- page_offset = (signed)(sector - bio->bi_sector) * -512;
- bio_for_each_segment(bvl, bio, i) {
- int len = bio_iovec_idx(bio,i)->bv_len;
- int clen;
- int b_offset = 0;
-
- if (page_offset < 0) {
- b_offset = -page_offset;
- page_offset += b_offset;
- len -= b_offset;
- }
-
- if (len > 0 && page_offset + len > STRIPE_SIZE)
- clen = STRIPE_SIZE - page_offset;
- else clen = len;
-
- if (clen > 0) {
- char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
- if (frombio)
- memcpy(pa+page_offset, ba+b_offset, clen);
- else
- memcpy(ba+b_offset, pa+page_offset, clen);
- __bio_kunmap_atomic(ba, KM_USER0);
- }
- if (clen < len) /* hit end of page */
- break;
- page_offset += len;
- }
-}
-
-#define check_xor() do { \
- if (count == MAX_XOR_BLOCKS) { \
- xor_blocks(count, STRIPE_SIZE, dest, ptr);\
- count = 0; \
- } \
- } while(0)
-
-static void compute_parity6(struct stripe_head *sh, int method)
-{
- raid5_conf_t *conf = sh->raid_conf;
- int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
- int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
- struct bio *chosen;
- /**** FIX THIS: This could be very bad if disks is close to 256 ****/
- void *ptrs[syndrome_disks+2];
-
- pd_idx = sh->pd_idx;
- qd_idx = sh->qd_idx;
- d0_idx = raid6_d0(sh);
-
- pr_debug("compute_parity, stripe %llu, method %d\n",
- (unsigned long long)sh->sector, method);
-
- switch(method) {
- case READ_MODIFY_WRITE:
- BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */
- case RECONSTRUCT_WRITE:
- for (i= disks; i-- ;)
- if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
- chosen = sh->dev[i].towrite;
- sh->dev[i].towrite = NULL;
-
- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
- wake_up(&conf->wait_for_overlap);
-
- BUG_ON(sh->dev[i].written);
- sh->dev[i].written = chosen;
- }
- break;
- case CHECK_PARITY:
- BUG(); /* Not implemented yet */
- }
-
- for (i = disks; i--;)
- if (sh->dev[i].written) {
- sector_t sector = sh->dev[i].sector;
- struct bio *wbi = sh->dev[i].written;
- while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
- copy_data(1, wbi, sh->dev[i].page, sector);
- wbi = r5_next_bio(wbi, sector);
- }
-
- set_bit(R5_LOCKED, &sh->dev[i].flags);
- set_bit(R5_UPTODATE, &sh->dev[i].flags);
- }
-
- /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
-
- for (i = 0; i < disks; i++)
- ptrs[i] = (void *)raid6_empty_zero_page;
-
- count = 0;
- i = d0_idx;
- do {
- int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
-
- ptrs[slot] = page_address(sh->dev[i].page);
- if (slot < syndrome_disks &&
- !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
- printk(KERN_ERR "block %d/%d not uptodate "
- "on parity calc\n", i, count);
- BUG();
- }
-
- i = raid6_next_disk(i, disks);
- } while (i != d0_idx);
- BUG_ON(count != syndrome_disks);
-
- raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
-
- switch(method) {
- case RECONSTRUCT_WRITE:
- set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
- set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
- set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
- set_bit(R5_LOCKED, &sh->dev[qd_idx].flags);
- break;
- case UPDATE_PARITY:
- set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
- set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
- break;
- }
-}
-
-
-/* Compute one missing block */
-static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
-{
- int i, count, disks = sh->disks;
- void *ptr[MAX_XOR_BLOCKS], *dest, *p;
- int qd_idx = sh->qd_idx;
-
- pr_debug("compute_block_1, stripe %llu, idx %d\n",
- (unsigned long long)sh->sector, dd_idx);
-
- if ( dd_idx == qd_idx ) {
- /* We're actually computing the Q drive */
- compute_parity6(sh, UPDATE_PARITY);
- } else {
- dest = page_address(sh->dev[dd_idx].page);
- if (!nozero) memset(dest, 0, STRIPE_SIZE);
- count = 0;
- for (i = disks ; i--; ) {
- if (i == dd_idx || i == qd_idx)
- continue;
- p = page_address(sh->dev[i].page);
- if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
- ptr[count++] = p;
- else
- printk("compute_block() %d, stripe %llu, %d"
- " not present\n", dd_idx,
- (unsigned long long)sh->sector, i);
-
- check_xor();
- }
- if (count)
- xor_blocks(count, STRIPE_SIZE, dest, ptr);
- if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
- else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
- }
-}
-
-/* Compute two missing blocks */
-static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
-{
- int i, count, disks = sh->disks;
- int syndrome_disks = sh->ddf_layout ? disks : disks-2;
- int d0_idx = raid6_d0(sh);
- int faila = -1, failb = -1;
- /**** FIX THIS: This could be very bad if disks is close to 256 ****/
- void *ptrs[syndrome_disks+2];
-
- for (i = 0; i < disks ; i++)
- ptrs[i] = (void *)raid6_empty_zero_page;
- count = 0;
- i = d0_idx;
- do {
- int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
-
- ptrs[slot] = page_address(sh->dev[i].page);
-
- if (i == dd_idx1)
- faila = slot;
- if (i == dd_idx2)
- failb = slot;
- i = raid6_next_disk(i, disks);
- } while (i != d0_idx);
- BUG_ON(count != syndrome_disks);
-
- BUG_ON(faila == failb);
- if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
-
- pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
- (unsigned long long)sh->sector, dd_idx1, dd_idx2,
- faila, failb);
-
- if (failb == syndrome_disks+1) {
- /* Q disk is one of the missing disks */
- if (faila == syndrome_disks) {
- /* Missing P+Q, just recompute */
- compute_parity6(sh, UPDATE_PARITY);
- return;
- } else {
- /* We're missing D+Q; recompute D from P */
- compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
- dd_idx2 : dd_idx1),
- 0);
- compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
- return;
- }
- }
-
- /* We're missing D+P or D+D; */
- if (failb == syndrome_disks) {
- /* We're missing D+P. */
- raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
- } else {
- /* We're missing D+D. */
- raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
- ptrs);
- }
-
- /* Both the above update both missing blocks */
- set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
- set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
-}
-
static void
-schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
+schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
int rcw, int expand)
{
int i, pd_idx = sh->pd_idx, disks = sh->disks;
+ raid5_conf_t *conf = sh->raid_conf;
+ int level = conf->level;
if (rcw) {
/* if we are not expanding this is a proper write request, and
} else
sh->reconstruct_state = reconstruct_state_run;
- set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
+ set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
s->locked++;
}
}
- if (s->locked + 1 == disks)
+ if (s->locked + conf->max_degraded == disks)
if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
- atomic_inc(&sh->raid_conf->pending_full_writes);
+ atomic_inc(&conf->pending_full_writes);
} else {
+ BUG_ON(level == 6);
BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
sh->reconstruct_state = reconstruct_state_prexor_drain_run;
set_bit(STRIPE_OP_PREXOR, &s->ops_request);
set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
- set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
+ set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
}
}
- /* keep the parity disk locked while asynchronous operations
+ /* keep the parity disk(s) locked while asynchronous operations
* are in flight
*/
set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
s->locked++;
+ if (level == 6) {
+ int qd_idx = sh->qd_idx;
+ struct r5dev *dev = &sh->dev[qd_idx];
+
+ set_bit(R5_LOCKED, &dev->flags);
+ clear_bit(R5_UPTODATE, &dev->flags);
+ s->locked++;
+ }
+
pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
__func__, (unsigned long long)sh->sector,
s->locked, s->ops_request);
static void end_reshape(raid5_conf_t *conf);
-static int page_is_zero(struct page *p)
-{
- char *a = page_address(p);
- return ((*(u32*)a) == 0 &&
- memcmp(a, a+4, STRIPE_SIZE-4)==0);
-}
-
static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
struct stripe_head *sh)
{
set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
set_bit(R5_Wantcompute, &dev->flags);
sh->ops.target = disk_idx;
+ sh->ops.target2 = -1;
s->req_compute = 1;
/* Careful: from this point on 'uptodate' is in the eye
- * of raid5_run_ops which services 'compute' operations
+ * of raid_run_ops which services 'compute' operations
* before writes. R5_Wantcompute flags a block that will
* be R5_UPTODATE by the time it is needed for a
* subsequent operation.
set_bit(STRIPE_HANDLE, &sh->state);
}
-static void handle_stripe_fill6(struct stripe_head *sh,
- struct stripe_head_state *s, struct r6_state *r6s,
- int disks)
+/* fetch_block6 - checks the given member device to see if its data needs
+ * to be read or computed to satisfy a request.
+ *
+ * Returns 1 when no more member devices need to be checked, otherwise returns
+ * 0 to tell the loop in handle_stripe_fill6 to continue
+ */
+static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
+ struct r6_state *r6s, int disk_idx, int disks)
{
- int i;
- for (i = disks; i--; ) {
- struct r5dev *dev = &sh->dev[i];
- if (!test_bit(R5_LOCKED, &dev->flags) &&
- !test_bit(R5_UPTODATE, &dev->flags) &&
- (dev->toread || (dev->towrite &&
- !test_bit(R5_OVERWRITE, &dev->flags)) ||
- s->syncing || s->expanding ||
- (s->failed >= 1 &&
- (sh->dev[r6s->failed_num[0]].toread ||
- s->to_write)) ||
- (s->failed >= 2 &&
- (sh->dev[r6s->failed_num[1]].toread ||
- s->to_write)))) {
- /* we would like to get this block, possibly
- * by computing it, but we might not be able to
+ struct r5dev *dev = &sh->dev[disk_idx];
+ struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
+ &sh->dev[r6s->failed_num[1]] };
+
+ if (!test_bit(R5_LOCKED, &dev->flags) &&
+ !test_bit(R5_UPTODATE, &dev->flags) &&
+ (dev->toread ||
+ (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+ s->syncing || s->expanding ||
+ (s->failed >= 1 &&
+ (fdev[0]->toread || s->to_write)) ||
+ (s->failed >= 2 &&
+ (fdev[1]->toread || s->to_write)))) {
+ /* we would like to get this block, possibly by computing it,
+ * otherwise read it if the backing disk is insync
+ */
+ BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
+ BUG_ON(test_bit(R5_Wantread, &dev->flags));
+ if ((s->uptodate == disks - 1) &&
+ (s->failed && (disk_idx == r6s->failed_num[0] ||
+ disk_idx == r6s->failed_num[1]))) {
+ /* have disk failed, and we're requested to fetch it;
+ * do compute it
*/
- if ((s->uptodate == disks - 1) &&
- (s->failed && (i == r6s->failed_num[0] ||
- i == r6s->failed_num[1]))) {
- pr_debug("Computing stripe %llu block %d\n",
- (unsigned long long)sh->sector, i);
- compute_block_1(sh, i, 0);
- s->uptodate++;
- } else if ( s->uptodate == disks-2 && s->failed >= 2 ) {
- /* Computing 2-failure is *very* expensive; only
- * do it if failed >= 2
- */
- int other;
- for (other = disks; other--; ) {
- if (other == i)
- continue;
- if (!test_bit(R5_UPTODATE,
- &sh->dev[other].flags))
- break;
- }
- BUG_ON(other < 0);
- pr_debug("Computing stripe %llu blocks %d,%d\n",
- (unsigned long long)sh->sector,
- i, other);
- compute_block_2(sh, i, other);
- s->uptodate += 2;
- } else if (test_bit(R5_Insync, &dev->flags)) {
- set_bit(R5_LOCKED, &dev->flags);
- set_bit(R5_Wantread, &dev->flags);
- s->locked++;
- pr_debug("Reading block %d (sync=%d)\n",
- i, s->syncing);
+ pr_debug("Computing stripe %llu block %d\n",
+ (unsigned long long)sh->sector, disk_idx);
+ set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+ set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+ set_bit(R5_Wantcompute, &dev->flags);
+ sh->ops.target = disk_idx;
+ sh->ops.target2 = -1; /* no 2nd target */
+ s->req_compute = 1;
+ s->uptodate++;
+ return 1;
+ } else if (s->uptodate == disks-2 && s->failed >= 2) {
+ /* Computing 2-failure is *very* expensive; only
+ * do it if failed >= 2
+ */
+ int other;
+ for (other = disks; other--; ) {
+ if (other == disk_idx)
+ continue;
+ if (!test_bit(R5_UPTODATE,
+ &sh->dev[other].flags))
+ break;
}
+ BUG_ON(other < 0);
+ pr_debug("Computing stripe %llu blocks %d,%d\n",
+ (unsigned long long)sh->sector,
+ disk_idx, other);
+ set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+ set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+ set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
+ set_bit(R5_Wantcompute, &sh->dev[other].flags);
+ sh->ops.target = disk_idx;
+ sh->ops.target2 = other;
+ s->uptodate += 2;
+ s->req_compute = 1;
+ return 1;
+ } else if (test_bit(R5_Insync, &dev->flags)) {
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantread, &dev->flags);
+ s->locked++;
+ pr_debug("Reading block %d (sync=%d)\n",
+ disk_idx, s->syncing);
}
}
+
+ return 0;
+}
+
+/**
+ * handle_stripe_fill6 - read or compute data to satisfy pending requests.
+ */
+static void handle_stripe_fill6(struct stripe_head *sh,
+ struct stripe_head_state *s, struct r6_state *r6s,
+ int disks)
+{
+ int i;
+
+ /* look for blocks to read/compute, skip this if a compute
+ * is already in flight, or if the stripe contents are in the
+ * midst of changing due to a write
+ */
+ if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
+ !sh->reconstruct_state)
+ for (i = disks; i--; )
+ if (fetch_block6(sh, s, r6s, i, disks))
+ break;
set_bit(STRIPE_HANDLE, &sh->state);
}
*/
/* since handle_stripe can be called at any time we need to handle the
* case where a compute block operation has been submitted and then a
- * subsequent call wants to start a write request. raid5_run_ops only
- * handles the case where compute block and postxor are requested
+ * subsequent call wants to start a write request. raid_run_ops only
+ * handles the case where compute block and reconstruct are requested
* simultaneously. If this is not the case then new writes need to be
* held off until the compute completes.
*/
if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
(s->locked == 0 && (rcw == 0 || rmw == 0) &&
!test_bit(STRIPE_BIT_DELAY, &sh->state)))
- schedule_reconstruction5(sh, s, rcw == 0, 0);
+ schedule_reconstruction(sh, s, rcw == 0, 0);
}
static void handle_stripe_dirtying6(raid5_conf_t *conf,
struct stripe_head *sh, struct stripe_head_state *s,
- struct r6_state *r6s, int disks)
-{
- int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
- int qd_idx = sh->qd_idx;
- for (i = disks; i--; ) {
- struct r5dev *dev = &sh->dev[i];
- /* Would I have to read this buffer for reconstruct_write */
- if (!test_bit(R5_OVERWRITE, &dev->flags)
- && i != pd_idx && i != qd_idx
- && (!test_bit(R5_LOCKED, &dev->flags)
- ) &&
- !test_bit(R5_UPTODATE, &dev->flags)) {
- if (test_bit(R5_Insync, &dev->flags)) rcw++;
- else {
- pr_debug("raid6: must_compute: "
- "disk %d flags=%#lx\n", i, dev->flags);
- must_compute++;
- }
- }
- }
- pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
- (unsigned long long)sh->sector, rcw, must_compute);
- set_bit(STRIPE_HANDLE, &sh->state);
-
- if (rcw > 0)
- /* want reconstruct write, but need to get some data */
- for (i = disks; i--; ) {
- struct r5dev *dev = &sh->dev[i];
- if (!test_bit(R5_OVERWRITE, &dev->flags)
- && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
- && !test_bit(R5_LOCKED, &dev->flags) &&
- !test_bit(R5_UPTODATE, &dev->flags) &&
- test_bit(R5_Insync, &dev->flags)) {
- if (
- test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- pr_debug("Read_old stripe %llu "
- "block %d for Reconstruct\n",
- (unsigned long long)sh->sector, i);
- set_bit(R5_LOCKED, &dev->flags);
- set_bit(R5_Wantread, &dev->flags);
- s->locked++;
- } else {
- pr_debug("Request delayed stripe %llu "
- "block %d for Reconstruct\n",
- (unsigned long long)sh->sector, i);
- set_bit(STRIPE_DELAYED, &sh->state);
- set_bit(STRIPE_HANDLE, &sh->state);
- }
+ struct r6_state *r6s, int disks)
+{
+ int rcw = 0, pd_idx = sh->pd_idx, i;
+ int qd_idx = sh->qd_idx;
+
+ set_bit(STRIPE_HANDLE, &sh->state);
+ for (i = disks; i--; ) {
+ struct r5dev *dev = &sh->dev[i];
+ /* check if we haven't enough data */
+ if (!test_bit(R5_OVERWRITE, &dev->flags) &&
+ i != pd_idx && i != qd_idx &&
+ !test_bit(R5_LOCKED, &dev->flags) &&
+ !(test_bit(R5_UPTODATE, &dev->flags) ||
+ test_bit(R5_Wantcompute, &dev->flags))) {
+ rcw++;
+ if (!test_bit(R5_Insync, &dev->flags))
+ continue; /* it's a failed drive */
+
+ if (
+ test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+ pr_debug("Read_old stripe %llu "
+ "block %d for Reconstruct\n",
+ (unsigned long long)sh->sector, i);
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantread, &dev->flags);
+ s->locked++;
+ } else {
+ pr_debug("Request delayed stripe %llu "
+ "block %d for Reconstruct\n",
+ (unsigned long long)sh->sector, i);
+ set_bit(STRIPE_DELAYED, &sh->state);
+ set_bit(STRIPE_HANDLE, &sh->state);
}
}
+ }
/* now if nothing is locked, and if we have enough data, we can start a
* write request
*/
- if (s->locked == 0 && rcw == 0 &&
+ if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
+ s->locked == 0 && rcw == 0 &&
!test_bit(STRIPE_BIT_DELAY, &sh->state)) {
- if (must_compute > 0) {
- /* We have failed blocks and need to compute them */
- switch (s->failed) {
- case 0:
- BUG();
- case 1:
- compute_block_1(sh, r6s->failed_num[0], 0);
- break;
- case 2:
- compute_block_2(sh, r6s->failed_num[0],
- r6s->failed_num[1]);
- break;
- default: /* This request should have been failed? */
- BUG();
- }
- }
-
- pr_debug("Computing parity for stripe %llu\n",
- (unsigned long long)sh->sector);
- compute_parity6(sh, RECONSTRUCT_WRITE);
- /* now every locked buffer is ready to be written */
- for (i = disks; i--; )
- if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
- pr_debug("Writing stripe %llu block %d\n",
- (unsigned long long)sh->sector, i);
- s->locked++;
- set_bit(R5_Wantwrite, &sh->dev[i].flags);
- }
- if (s->locked == disks)
- if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
- atomic_inc(&conf->pending_full_writes);
- /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
- set_bit(STRIPE_INSYNC, &sh->state);
-
- if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- atomic_dec(&conf->preread_active_stripes);
- if (atomic_read(&conf->preread_active_stripes) <
- IO_THRESHOLD)
- md_wakeup_thread(conf->mddev->thread);
- }
+ schedule_reconstruction(sh, s, 1, 0);
}
}
* we are done. Otherwise update the mismatch count and repair
* parity if !MD_RECOVERY_CHECK
*/
- if (sh->ops.zero_sum_result == 0)
+ if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
/* parity is correct (on disc,
* not in buffer any more)
*/
set_bit(R5_Wantcompute,
&sh->dev[sh->pd_idx].flags);
sh->ops.target = sh->pd_idx;
+ sh->ops.target2 = -1;
s->uptodate++;
}
}
static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
- struct stripe_head_state *s,
- struct r6_state *r6s, struct page *tmp_page,
- int disks)
+ struct stripe_head_state *s,
+ struct r6_state *r6s, int disks)
{
- int update_p = 0, update_q = 0;
- struct r5dev *dev;
int pd_idx = sh->pd_idx;
int qd_idx = sh->qd_idx;
+ struct r5dev *dev;
set_bit(STRIPE_HANDLE, &sh->state);
BUG_ON(s->failed > 2);
- BUG_ON(s->uptodate < disks);
+
/* Want to check and possibly repair P and Q.
* However there could be one 'failed' device, in which
* case we can only check one of them, possibly using the
* other to generate missing data
*/
- /* If !tmp_page, we cannot do the calculations,
- * but as we have set STRIPE_HANDLE, we will soon be called
- * by stripe_handle with a tmp_page - just wait until then.
- */
- if (tmp_page) {
+ switch (sh->check_state) {
+ case check_state_idle:
+ /* start a new check operation if there are < 2 failures */
if (s->failed == r6s->q_failed) {
- /* The only possible failed device holds 'Q', so it
+ /* The only possible failed device holds Q, so it
* makes sense to check P (If anything else were failed,
* we would have used P to recreate it).
*/
- compute_block_1(sh, pd_idx, 1);
- if (!page_is_zero(sh->dev[pd_idx].page)) {
- compute_block_1(sh, pd_idx, 0);
- update_p = 1;
- }
+ sh->check_state = check_state_run;
}
if (!r6s->q_failed && s->failed < 2) {
- /* q is not failed, and we didn't use it to generate
+ /* Q is not failed, and we didn't use it to generate
* anything, so it makes sense to check it
*/
- memcpy(page_address(tmp_page),
- page_address(sh->dev[qd_idx].page),
- STRIPE_SIZE);
- compute_parity6(sh, UPDATE_PARITY);
- if (memcmp(page_address(tmp_page),
- page_address(sh->dev[qd_idx].page),
- STRIPE_SIZE) != 0) {
- clear_bit(STRIPE_INSYNC, &sh->state);
- update_q = 1;
- }
+ if (sh->check_state == check_state_run)
+ sh->check_state = check_state_run_pq;
+ else
+ sh->check_state = check_state_run_q;
}
- if (update_p || update_q) {
- conf->mddev->resync_mismatches += STRIPE_SECTORS;
- if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
- /* don't try to repair!! */
- update_p = update_q = 0;
+
+ /* discard potentially stale zero_sum_result */
+ sh->ops.zero_sum_result = 0;
+
+ if (sh->check_state == check_state_run) {
+ /* async_xor_zero_sum destroys the contents of P */
+ clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+ s->uptodate--;
+ }
+ if (sh->check_state >= check_state_run &&
+ sh->check_state <= check_state_run_pq) {
+ /* async_syndrome_zero_sum preserves P and Q, so
+ * no need to mark them !uptodate here
+ */
+ set_bit(STRIPE_OP_CHECK, &s->ops_request);
+ break;
}
+ /* we have 2-disk failure */
+ BUG_ON(s->failed != 2);
+ /* fall through */
+ case check_state_compute_result:
+ sh->check_state = check_state_idle;
+
+ /* check that a write has not made the stripe insync */
+ if (test_bit(STRIPE_INSYNC, &sh->state))
+ break;
+
/* now write out any block on a failed drive,
- * or P or Q if they need it
+ * or P or Q if they were recomputed
*/
-
+ BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
if (s->failed == 2) {
dev = &sh->dev[r6s->failed_num[1]];
s->locked++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
}
-
- if (update_p) {
+ if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
dev = &sh->dev[pd_idx];
s->locked++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
}
- if (update_q) {
+ if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
dev = &sh->dev[qd_idx];
s->locked++;
set_bit(R5_LOCKED, &dev->flags);
clear_bit(STRIPE_DEGRADED, &sh->state);
set_bit(STRIPE_INSYNC, &sh->state);
+ break;
+ case check_state_run:
+ case check_state_run_q:
+ case check_state_run_pq:
+ break; /* we will be called again upon completion */
+ case check_state_check_result:
+ sh->check_state = check_state_idle;
+
+ /* handle a successful check operation, if parity is correct
+ * we are done. Otherwise update the mismatch count and repair
+ * parity if !MD_RECOVERY_CHECK
+ */
+ if (sh->ops.zero_sum_result == 0) {
+ /* both parities are correct */
+ if (!s->failed)
+ set_bit(STRIPE_INSYNC, &sh->state);
+ else {
+ /* in contrast to the raid5 case we can validate
+ * parity, but still have a failure to write
+ * back
+ */
+ sh->check_state = check_state_compute_result;
+ /* Returning at this point means that we may go
+ * off and bring p and/or q uptodate again so
+ * we make sure to check zero_sum_result again
+ * to verify if p or q need writeback
+ */
+ }
+ } else {
+ conf->mddev->resync_mismatches += STRIPE_SECTORS;
+ if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+ /* don't try to repair!! */
+ set_bit(STRIPE_INSYNC, &sh->state);
+ else {
+ int *target = &sh->ops.target;
+
+ sh->ops.target = -1;
+ sh->ops.target2 = -1;
+ sh->check_state = check_state_compute_run;
+ set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+ set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+ if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
+ set_bit(R5_Wantcompute,
+ &sh->dev[pd_idx].flags);
+ *target = pd_idx;
+ target = &sh->ops.target2;
+ s->uptodate++;
+ }
+ if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
+ set_bit(R5_Wantcompute,
+ &sh->dev[qd_idx].flags);
+ *target = qd_idx;
+ s->uptodate++;
+ }
+ }
+ }
+ break;
+ case check_state_compute_run:
+ break;
+ default:
+ printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
+ __func__, sh->check_state,
+ (unsigned long long) sh->sector);
+ BUG();
}
}
if (i != sh->pd_idx && i != sh->qd_idx) {
int dd_idx, j;
struct stripe_head *sh2;
+ struct async_submit_ctl submit;
sector_t bn = compute_blocknr(sh, i, 1);
sector_t s = raid5_compute_sector(conf, bn, 0,
}
/* place all the copies on one channel */
+ init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
tx = async_memcpy(sh2->dev[dd_idx].page,
- sh->dev[i].page, 0, 0, STRIPE_SIZE,
- ASYNC_TX_DEP_ACK, tx, NULL, NULL);
+ sh->dev[i].page, 0, 0, STRIPE_SIZE,
+ &submit);
set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
rcu_read_lock();
for (i=disks; i--; ) {
mdk_rdev_t *rdev;
- struct r5dev *dev = &sh->dev[i];
+
+ dev = &sh->dev[i];
clear_bit(R5_Insync, &dev->flags);
pr_debug("check %d: state 0x%lx toread %p read %p write %p "
/* Need to write out all blocks after computing parity */
sh->disks = conf->raid_disks;
stripe_set_idx(sh->sector, conf, 0, sh);
- schedule_reconstruction5(sh, &s, 1, 1);
+ schedule_reconstruction(sh, &s, 1, 1);
} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes);
md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
if (s.ops_request)
- raid5_run_ops(sh, s.ops_request);
+ raid_run_ops(sh, s.ops_request);
ops_run_io(sh, &s);
return blocked_rdev == NULL;
}
-static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
+static bool handle_stripe6(struct stripe_head *sh)
{
raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks;
mdk_rdev_t *blocked_rdev = NULL;
pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
- "pd_idx=%d, qd_idx=%d\n",
+ "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
(unsigned long long)sh->sector, sh->state,
- atomic_read(&sh->count), pd_idx, qd_idx);
+ atomic_read(&sh->count), pd_idx, qd_idx,
+ sh->check_state, sh->reconstruct_state);
memset(&s, 0, sizeof(s));
spin_lock(&sh->lock);
pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
i, dev->flags, dev->toread, dev->towrite, dev->written);
- /* maybe we can reply to a read */
- if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
- struct bio *rbi, *rbi2;
- pr_debug("Return read for disc %d\n", i);
- spin_lock_irq(&conf->device_lock);
- rbi = dev->toread;
- dev->toread = NULL;
- if (test_and_clear_bit(R5_Overlap, &dev->flags))
- wake_up(&conf->wait_for_overlap);
- spin_unlock_irq(&conf->device_lock);
- while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
- copy_data(0, rbi, dev->page, dev->sector);
- rbi2 = r5_next_bio(rbi, dev->sector);
- spin_lock_irq(&conf->device_lock);
- if (!raid5_dec_bi_phys_segments(rbi)) {
- rbi->bi_next = return_bi;
- return_bi = rbi;
- }
- spin_unlock_irq(&conf->device_lock);
- rbi = rbi2;
- }
- }
+ /* maybe we can reply to a read
+ *
+ * new wantfill requests are only permitted while
+ * ops_complete_biofill is guaranteed to be inactive
+ */
+ if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
+ !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
+ set_bit(R5_Wantfill, &dev->flags);
/* now count some things */
if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
+ if (test_bit(R5_Wantcompute, &dev->flags)) {
+ s.compute++;
+ BUG_ON(s.compute > 2);
+ }
-
- if (dev->toread)
+ if (test_bit(R5_Wantfill, &dev->flags)) {
+ s.to_fill++;
+ } else if (dev->toread)
s.to_read++;
if (dev->towrite) {
s.to_write++;
blocked_rdev = NULL;
}
+ if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
+ set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
+ set_bit(STRIPE_BIOFILL_RUN, &sh->state);
+ }
+
pr_debug("locked=%d uptodate=%d to_read=%d"
" to_write=%d failed=%d failed_num=%d,%d\n",
s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
* or to load a block that is being partially written.
*/
if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
- (s.syncing && (s.uptodate < disks)) || s.expanding)
+ (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
handle_stripe_fill6(sh, &s, &r6s, disks);
- /* now to consider writing and what else, if anything should be read */
- if (s.to_write)
+ /* Now we check to see if any write operations have recently
+ * completed
+ */
+ if (sh->reconstruct_state == reconstruct_state_drain_result) {
+ int qd_idx = sh->qd_idx;
+
+ sh->reconstruct_state = reconstruct_state_idle;
+ /* All the 'written' buffers and the parity blocks are ready to
+ * be written back to disk
+ */
+ BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
+ BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
+ for (i = disks; i--; ) {
+ dev = &sh->dev[i];
+ if (test_bit(R5_LOCKED, &dev->flags) &&
+ (i == sh->pd_idx || i == qd_idx ||
+ dev->written)) {
+ pr_debug("Writing block %d\n", i);
+ BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
+ set_bit(R5_Wantwrite, &dev->flags);
+ if (!test_bit(R5_Insync, &dev->flags) ||
+ ((i == sh->pd_idx || i == qd_idx) &&
+ s.failed == 0))
+ set_bit(STRIPE_INSYNC, &sh->state);
+ }
+ }
+ if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+ atomic_dec(&conf->preread_active_stripes);
+ if (atomic_read(&conf->preread_active_stripes) <
+ IO_THRESHOLD)
+ md_wakeup_thread(conf->mddev->thread);
+ }
+ }
+
+ /* Now to consider new write requests and what else, if anything
+ * should be read. We do not handle new writes when:
+ * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
+ * 2/ A 'check' operation is in flight, as it may clobber the parity
+ * block.
+ */
+ if (s.to_write && !sh->reconstruct_state && !sh->check_state)
handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
/* maybe we need to check and possibly fix the parity for this stripe
* Any reads will already have been scheduled, so we just see if enough
- * data is available
+ * data is available. The parity check is held off while parity
+ * dependent operations are in flight.
*/
- if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state))
- handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks);
+ if (sh->check_state ||
+ (s.syncing && s.locked == 0 &&
+ !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
+ !test_bit(STRIPE_INSYNC, &sh->state)))
+ handle_parity_checks6(conf, sh, &s, &r6s, disks);
if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
md_done_sync(conf->mddev, STRIPE_SECTORS,1);
set_bit(R5_Wantwrite, &dev->flags);
set_bit(R5_ReWrite, &dev->flags);
set_bit(R5_LOCKED, &dev->flags);
+ s.locked++;
} else {
/* let's read it back */
set_bit(R5_Wantread, &dev->flags);
set_bit(R5_LOCKED, &dev->flags);
+ s.locked++;
}
}
}
- if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+ /* Finish reconstruct operations initiated by the expansion process */
+ if (sh->reconstruct_state == reconstruct_state_result) {
+ sh->reconstruct_state = reconstruct_state_idle;
+ clear_bit(STRIPE_EXPANDING, &sh->state);
+ for (i = conf->raid_disks; i--; ) {
+ set_bit(R5_Wantwrite, &sh->dev[i].flags);
+ set_bit(R5_LOCKED, &sh->dev[i].flags);
+ s.locked++;
+ }
+ }
+
+ if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
+ !sh->reconstruct_state) {
struct stripe_head *sh2
= get_active_stripe(conf, sh->sector, 1, 1, 1);
if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
/* Need to write out all blocks after computing P&Q */
sh->disks = conf->raid_disks;
stripe_set_idx(sh->sector, conf, 0, sh);
- compute_parity6(sh, RECONSTRUCT_WRITE);
- for (i = conf->raid_disks ; i-- ; ) {
- set_bit(R5_LOCKED, &sh->dev[i].flags);
- s.locked++;
- set_bit(R5_Wantwrite, &sh->dev[i].flags);
- }
- clear_bit(STRIPE_EXPANDING, &sh->state);
- } else if (s.expanded) {
+ schedule_reconstruction(sh, &s, 1, 1);
+ } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes);
wake_up(&conf->wait_for_overlap);
if (unlikely(blocked_rdev))
md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
+ if (s.ops_request)
+ raid_run_ops(sh, s.ops_request);
+
ops_run_io(sh, &s);
return_io(return_bi);
}
/* returns true if the stripe was handled */
-static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page)
+static bool handle_stripe(struct stripe_head *sh)
{
if (sh->raid_conf->level == 6)
- return handle_stripe6(sh, tmp_page);
+ return handle_stripe6(sh);
else
return handle_stripe5(sh);
}
-
-
static void raid5_activate_delayed(raid5_conf_t *conf)
{
if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
/* No difference between reads and writes. Just check
* how busy the stripe_cache is
*/
+
+ if (mddev_congested(mddev, bits))
+ return 1;
if (conf->inactive_blocked)
return 1;
if (conf->quiesce)
INIT_LIST_HEAD(&stripes);
for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
int j;
- int skipped = 0;
+ int skipped_disk = 0;
sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
set_bit(STRIPE_EXPANDING, &sh->state);
atomic_inc(&conf->reshape_stripes);
continue;
s = compute_blocknr(sh, j, 0);
if (s < raid5_size(mddev, 0, 0)) {
- skipped = 1;
+ skipped_disk = 1;
continue;
}
memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
set_bit(R5_Expanded, &sh->dev[j].flags);
set_bit(R5_UPTODATE, &sh->dev[j].flags);
}
- if (!skipped) {
+ if (!skipped_disk) {
set_bit(STRIPE_EXPAND_READY, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
}
spin_unlock(&sh->lock);
/* wait for any blocked device to be handled */
- while(unlikely(!handle_stripe(sh, NULL)))
+ while (unlikely(!handle_stripe(sh)))
;
release_stripe(sh);
return handled;
}
- handle_stripe(sh, NULL);
+ handle_stripe(sh);
release_stripe(sh);
handled++;
}
return handled;
}
+#ifdef CONFIG_MULTICORE_RAID456
+static void __process_stripe(void *param, async_cookie_t cookie)
+{
+ struct stripe_head *sh = param;
+
+ handle_stripe(sh);
+ release_stripe(sh);
+}
+
+static void process_stripe(struct stripe_head *sh, struct list_head *domain)
+{
+ async_schedule_domain(__process_stripe, sh, domain);
+}
+
+static void synchronize_stripe_processing(struct list_head *domain)
+{
+ async_synchronize_full_domain(domain);
+}
+#else
+static void process_stripe(struct stripe_head *sh, struct list_head *domain)
+{
+ handle_stripe(sh);
+ release_stripe(sh);
+ cond_resched();
+}
+
+static void synchronize_stripe_processing(struct list_head *domain)
+{
+}
+#endif
/*
struct stripe_head *sh;
raid5_conf_t *conf = mddev->private;
int handled;
+ LIST_HEAD(raid_domain);
pr_debug("+++ raid5d active\n");
spin_unlock_irq(&conf->device_lock);
handled++;
- handle_stripe(sh, conf->spare_page);
- release_stripe(sh);
+ process_stripe(sh, &raid_domain);
spin_lock_irq(&conf->device_lock);
}
spin_unlock_irq(&conf->device_lock);
+ synchronize_stripe_processing(&raid_domain);
async_tx_issue_pending_all();
unplug_slaves(mddev);
return sectors * (raid_disks - conf->max_degraded);
}
+static void raid5_free_percpu(raid5_conf_t *conf)
+{
+ struct raid5_percpu *percpu;
+ unsigned long cpu;
+
+ if (!conf->percpu)
+ return;
+
+ get_online_cpus();
+ for_each_possible_cpu(cpu) {
+ percpu = per_cpu_ptr(conf->percpu, cpu);
+ safe_put_page(percpu->spare_page);
+ kfree(percpu->scribble);
+ }
+#ifdef CONFIG_HOTPLUG_CPU
+ unregister_cpu_notifier(&conf->cpu_notify);
+#endif
+ put_online_cpus();
+
+ free_percpu(conf->percpu);
+}
+
static void free_conf(raid5_conf_t *conf)
{
shrink_stripes(conf);
- safe_put_page(conf->spare_page);
+ raid5_free_percpu(conf);
kfree(conf->disks);
kfree(conf->stripe_hashtbl);
kfree(conf);
}
+#ifdef CONFIG_HOTPLUG_CPU
+static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
+{
+ raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify);
+ long cpu = (long)hcpu;
+ struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ if (conf->level == 6 && !percpu->spare_page)
+ percpu->spare_page = alloc_page(GFP_KERNEL);
+ if (!percpu->scribble)
+ percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
+
+ if (!percpu->scribble ||
+ (conf->level == 6 && !percpu->spare_page)) {
+ safe_put_page(percpu->spare_page);
+ kfree(percpu->scribble);
+ pr_err("%s: failed memory allocation for cpu%ld\n",
+ __func__, cpu);
+ return NOTIFY_BAD;
+ }
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ safe_put_page(percpu->spare_page);
+ kfree(percpu->scribble);
+ percpu->spare_page = NULL;
+ percpu->scribble = NULL;
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+#endif
+
+static int raid5_alloc_percpu(raid5_conf_t *conf)
+{
+ unsigned long cpu;
+ struct page *spare_page;
+ struct raid5_percpu *allcpus;
+ void *scribble;
+ int err;
+
+ allcpus = alloc_percpu(struct raid5_percpu);
+ if (!allcpus)
+ return -ENOMEM;
+ conf->percpu = allcpus;
+
+ get_online_cpus();
+ err = 0;
+ for_each_present_cpu(cpu) {
+ if (conf->level == 6) {
+ spare_page = alloc_page(GFP_KERNEL);
+ if (!spare_page) {
+ err = -ENOMEM;
+ break;
+ }
+ per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
+ }
+ scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
+ if (!scribble) {
+ err = -ENOMEM;
+ break;
+ }
+ per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
+ }
+#ifdef CONFIG_HOTPLUG_CPU
+ conf->cpu_notify.notifier_call = raid456_cpu_notify;
+ conf->cpu_notify.priority = 0;
+ if (err == 0)
+ err = register_cpu_notifier(&conf->cpu_notify);
+#endif
+ put_online_cpus();
+
+ return err;
+}
+
static raid5_conf_t *setup_conf(mddev_t *mddev)
{
raid5_conf_t *conf;
goto abort;
conf->raid_disks = mddev->raid_disks;
+ conf->scribble_len = scribble_len(conf->raid_disks);
if (mddev->reshape_position == MaxSector)
conf->previous_raid_disks = mddev->raid_disks;
else
if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
goto abort;
- if (mddev->new_level == 6) {
- conf->spare_page = alloc_page(GFP_KERNEL);
- if (!conf->spare_page)
- goto abort;
- }
+ conf->level = mddev->new_level;
+ if (raid5_alloc_percpu(conf) != 0)
+ goto abort;
+
spin_lock_init(&conf->device_lock);
init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_overlap);
printk(KERN_INFO "raid5: allocated %dkB for %s\n",
memory, mdname(mddev));
- conf->thread = md_register_thread(raid5d, mddev, "%s_raid5");
+ conf->thread = md_register_thread(raid5d, mddev, NULL);
if (!conf->thread) {
printk(KERN_ERR
"raid5: couldn't allocate thread for %s\n",
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
- "%s_reshape");
+ "reshape");
}
/* read-ahead size must cover two whole stripes, which is
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
- "%s_reshape");
+ "reshape");
if (!mddev->sync_thread) {
mddev->recovery = 0;
spin_lock_irq(&conf->device_lock);
#define _RAID5_H
#include <linux/raid/xor.h>
+#include <linux/dmaengine.h>
/*
*
*/
enum check_states {
check_state_idle = 0,
- check_state_run, /* parity check */
+ check_state_run, /* xor parity check */
+ check_state_run_q, /* q-parity check */
+ check_state_run_pq, /* pq dual parity check */
check_state_check_result,
check_state_compute_run, /* parity repair */
check_state_compute_result,
* @target - STRIPE_OP_COMPUTE_BLK target
*/
struct stripe_operations {
- int target;
- u32 zero_sum_result;
+ int target, target2;
+ enum sum_check_flags zero_sum_result;
} ops;
struct r5dev {
struct bio req;
#define STRIPE_OP_COMPUTE_BLK 1
#define STRIPE_OP_PREXOR 2
#define STRIPE_OP_BIODRAIN 3
-#define STRIPE_OP_POSTXOR 4
+#define STRIPE_OP_RECONSTRUCT 4
#define STRIPE_OP_CHECK 5
/*
* (fresh device added).
* Cleared when a sync completes.
*/
-
- struct page *spare_page; /* Used when checking P/Q in raid6 */
+ /* per cpu variables */
+ struct raid5_percpu {
+ struct page *spare_page; /* Used when checking P/Q in raid6 */
+ void *scribble; /* space for constructing buffer
+ * lists and performing address
+ * conversions
+ */
+ } *percpu;
+ size_t scribble_len; /* size of scribble region must be
+ * associated with conf to handle
+ * cpu hotplug while reshaping
+ */
+#ifdef CONFIG_HOTPLUG_CPU
+ struct notifier_block cpu_notify;
+#endif
/*
* Free stripes pool
#define DVB_MAJOR 212
#if defined(CONFIG_DVB_MAX_ADAPTERS) && CONFIG_DVB_MAX_ADAPTERS > 0
-#define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS
+ #define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS
#else
-#warning invalid CONFIG_DVB_MAX_ADAPTERS value
-#define DVB_MAX_ADAPTERS 8
+ #define DVB_MAX_ADAPTERS 8
#endif
#define DVB_UNSET (-1)
select DVB_DIB3000MC if !DVB_FE_CUSTOMISE
select DVB_S5H1411 if !DVB_FE_CUSTOMISE
select DVB_LGDT3305 if !DVB_FE_CUSTOMISE
- select DVB_TUNER_DIB0070 if !DVB_FE_CUSTOMISE
+ select DVB_TUNER_DIB0070
select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMISE
select MEDIA_TUNER_MT2266 if !MEDIA_TUNER_CUSTOMISE
select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE
goto out;
}
- if (debug & DBGLVL_API)
+ if (saa_debug & DBGLVL_API)
saa7164_dumphex16(dev, buf, (buflen/16)*16);
saa7164_api_dump_subdevs(dev, buf, buflen);
dprintk(DBGLVL_API, "%s() len = %d bytes\n", __func__, len);
- if (debug & DBGLVL_I2C)
+ if (saa_debug & DBGLVL_I2C)
saa7164_dumphex16(dev, buf, 2 * 16);
ret = saa7164_cmd_send(bus->dev, unitid, GET_CUR,
if (ret != SAA_OK)
printk(KERN_ERR "%s() error, ret(2) = 0x%x\n", __func__, ret);
else {
- if (debug & DBGLVL_I2C)
+ if (saa_debug & DBGLVL_I2C)
saa7164_dumphex16(dev, buf, sizeof(buf));
memcpy(data, (buf + 2 * sizeof(u32) + reglen), datalen);
}
*((u32 *)(buf + 1 * sizeof(u32))) = datalen - reglen;
memcpy((buf + 2 * sizeof(u32)), data, datalen);
- if (debug & DBGLVL_I2C)
+ if (saa_debug & DBGLVL_I2C)
saa7164_dumphex16(dev, buf, sizeof(buf));
ret = saa7164_cmd_send(bus->dev, unitid, SET_CUR,
unsigned long stamp;
int r;
- if (debug >= 4)
+ if (saa_debug >= 4)
saa7164_bus_dump(dev);
dprintk(DBGLVL_CMD, "%s(seqno=%d)\n", __func__, seqno);
32 bus
*/
-unsigned int debug;
-module_param(debug, int, 0644);
+unsigned int saa_debug;
+module_param_named(debug, saa_debug, int, 0644);
MODULE_PARM_DESC(debug, "enable debug messages");
unsigned int waitsecs = 10;
printk(KERN_ERR "%s() Unsupported board detected, "
"registering without firmware\n", __func__);
- dprintk(1, "%s() parameter debug = %d\n", __func__, debug);
+ dprintk(1, "%s() parameter debug = %d\n", __func__, saa_debug);
dprintk(1, "%s() parameter waitsecs = %d\n", __func__, waitsecs);
fail_fw:
/* ----------------------------------------------------------- */
-extern unsigned int debug;
+extern unsigned int saa_debug;
#define dprintk(level, fmt, arg...)\
- do { if (debug & level)\
+ do { if (saa_debug & level)\
printk(KERN_DEBUG "%s: " fmt, dev->name, ## arg);\
} while (0)
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/slab.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <asm/uaccess.h>
#include <linux/ioport.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
card->id.type = id_reg.type;
card->id.category = id_reg.category;
card->id.class = id_reg.class;
+ dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode);
}
complete(&card->mrq_complete);
- dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode);
return -EAGAIN;
}
}
/* Need 1K cacheline aligned that does not cross page boundary */
p = kmalloc(4096, 0);
+ if (p == NULL)
+ return -ENOMEM;
mq = ALIGNUP(p, 1024);
memset(mes, 0xee, sizeof(mes));
dw = mq;
static int create_proc_file(struct proc_entry *p)
{
- p->entry = create_proc_entry(p->name, p->mode, proc_gru);
+ p->entry = proc_create(p->name, p->mode, proc_gru, p->fops);
if (!p->entry)
return -1;
- p->entry->proc_fops = p->fops;
return 0;
}
struct scatterlist *sg;
unsigned int i;
enum dma_data_direction direction;
+ unsigned int sglen;
/*
* We don't do DMA on "complex" transfers, i.e. with
else
direction = DMA_TO_DEVICE;
+ sglen = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len, direction);
+ if (sglen != data->sg_len)
+ goto unmap_exit;
desc = chan->device->device_prep_slave_sg(chan,
data->sg, data->sg_len, direction,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc)
- return -ENOMEM;
+ goto unmap_exit;
host->dma.data_desc = desc;
desc->callback = atmci_dma_complete;
chan->device->device_issue_pending(chan);
return 0;
+unmap_exit:
+ dma_unmap_sg(&host->pdev->dev, data->sg, sglen, direction);
+ return -ENOMEM;
}
#else /* CONFIG_MMC_ATMELMCI_DMA */
config MTD_OF_PARTS
tristate "Flash partition map based on OF description"
- depends on PPC_OF && MTD_PARTITIONS
+ depends on (MICROBLAZE || PPC_OF) && MTD_PARTITIONS
help
This provides a partition parsing function which derives
the partition map from the children of the flash node,
config MTD_PHYSMAP_OF
tristate "Flash device in physical memory map based on OF description"
- depends on PPC_OF && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM)
+ depends on (MICROBLAZE || PPC_OF) && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM)
help
This provides a 'mapping' driver which allows the NOR Flash and
ROM driver code to communicate with chips which are mapped
if (netif_running(dev)) {
netif_device_detach(dev);
vortex_down(dev, 1);
+ disable_irq(dev->irq);
}
pci_save_state(pdev);
pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
- free_irq(dev->irq, dev);
pci_disable_device(pdev);
pci_set_power_state(pdev, pci_choose_state(pdev, state));
}
return err;
}
pci_set_master(pdev);
- if (request_irq(dev->irq, vp->full_bus_master_rx ?
- &boomerang_interrupt : &vortex_interrupt, IRQF_SHARED, dev->name, dev)) {
- pr_warning("%s: Could not reserve IRQ %d\n", dev->name, dev->irq);
- pci_disable_device(pdev);
- return -EBUSY;
- }
if (netif_running(dev)) {
err = vortex_up(dev);
if (err)
return err;
- else
- netif_device_attach(dev);
+ enable_irq(dev->irq);
+ netif_device_attach(dev);
}
}
return 0;
/* These identify the driver base version and may not be removed. */
static char version[] =
-KERN_INFO DRV_NAME ": 10/100 PCI Ethernet driver v" DRV_VERSION " (" DRV_RELDATE ")\n";
+DRV_NAME ": 10/100 PCI Ethernet driver v" DRV_VERSION " (" DRV_RELDATE ")\n";
MODULE_AUTHOR("Jeff Garzik <jgarzik@pobox.com>");
MODULE_DESCRIPTION("RealTek RTL-8139C+ series 10/100 PCI Ethernet driver");
config FEC
bool "FEC ethernet controller (of ColdFire and some i.MX CPUs)"
- depends on M523x || M527x || M5272 || M528x || M520x || M532x || MACH_MX27 || ARCH_MX35
+ depends on M523x || M527x || M5272 || M528x || M520x || M532x || MACH_MX27 || ARCH_MX35 || ARCH_MX25
help
Say Y here if you want to use the built-in 10/100 Fast ethernet
controller on some Motorola ColdFire and Freescale i.MX processors.
u32 ctrl;
u32 mac_ctrl_data;
u32 master_ctrl_data;
- u32 wol_ctrl_data;
+ u32 wol_ctrl_data = 0;
u16 mii_bmsr_data;
u16 save_autoneg_advertised;
u16 mii_intr_status_data;
CPC-PCIe and CPC-104P cards from EMS Dr. Thomas Wuensche
(http://www.ems-wuensche.de).
+config CAN_EMS_USB
+ tristate "EMS CPC-USB/ARM7 CAN/USB interface"
+ depends on USB && CAN_DEV
+ ---help---
+ This driver is for the one channel CPC-USB/ARM7 CAN/USB interface
+ from from EMS Dr. Thomas Wuensche (http://www.ems-wuensche.de).
+
config CAN_KVASER_PCI
tristate "Kvaser PCIcanx and Kvaser PCIcan PCI Cards"
depends on PCI && CAN_SJA1000
This driver is for the the PCIcanx and PCIcan cards (1, 2 or
4 channel) from Kvaser (http://www.kvaser.com).
+config CAN_AT91
+ tristate "Atmel AT91 onchip CAN controller"
+ depends on CAN && CAN_DEV && ARCH_AT91SAM9263
+ ---help---
+ This is a driver for the SoC CAN controller in Atmel's AT91SAM9263.
+
config CAN_DEBUG_DEVICES
bool "CAN devices debugging messages"
depends on CAN
obj-$(CONFIG_CAN_DEV) += can-dev.o
can-dev-y := dev.o
+obj-y += usb/
+
obj-$(CONFIG_CAN_SJA1000) += sja1000/
+obj-$(CONFIG_CAN_AT91) += at91_can.o
ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
#define EMS_PCI_CDR (CDR_CBP | CDR_CLKOUT_MASK)
#define EMS_PCI_V1_BASE_BAR 1
-#define EMS_PCI_V1_MEM_SIZE 4096
+#define EMS_PCI_V1_CONF_SIZE 4096 /* size of PITA control area */
#define EMS_PCI_V2_BASE_BAR 2
-#define EMS_PCI_V2_MEM_SIZE 128
+#define EMS_PCI_V2_CONF_SIZE 128 /* size of PLX control area */
#define EMS_PCI_CAN_BASE_OFFSET 0x400 /* offset where the controllers starts */
#define EMS_PCI_CAN_CTRL_SIZE 0x200 /* memory size for each controller */
+#define EMS_PCI_BASE_SIZE 4096 /* size of controller area */
+
static struct pci_device_id ems_pci_tbl[] = {
/* CPC-PCI v1 */
{PCI_VENDOR_ID_SIEMENS, 0x2104, PCI_ANY_ID, PCI_ANY_ID,},
struct sja1000_priv *priv;
struct net_device *dev;
struct ems_pci_card *card;
- int max_chan, mem_size, base_bar;
+ int max_chan, conf_size, base_bar;
int err, i;
/* Enabling PCI device */
card->version = 2; /* CPC-PCI v2 */
max_chan = EMS_PCI_V2_MAX_CHAN;
base_bar = EMS_PCI_V2_BASE_BAR;
- mem_size = EMS_PCI_V2_MEM_SIZE;
+ conf_size = EMS_PCI_V2_CONF_SIZE;
} else {
card->version = 1; /* CPC-PCI v1 */
max_chan = EMS_PCI_V1_MAX_CHAN;
base_bar = EMS_PCI_V1_BASE_BAR;
- mem_size = EMS_PCI_V1_MEM_SIZE;
+ conf_size = EMS_PCI_V1_CONF_SIZE;
}
/* Remap configuration space and controller memory area */
- card->conf_addr = pci_iomap(pdev, 0, mem_size);
+ card->conf_addr = pci_iomap(pdev, 0, conf_size);
if (card->conf_addr == NULL) {
err = -ENOMEM;
goto failure_cleanup;
}
- card->base_addr = pci_iomap(pdev, base_bar, mem_size);
+ card->base_addr = pci_iomap(pdev, base_bar, EMS_PCI_BASE_SIZE);
if (card->base_addr == NULL) {
err = -ENOMEM;
goto failure_cleanup;
--- /dev/null
+#
+# Makefile for the Linux Controller Area Network USB drivers.
+#
+
+obj-$(CONFIG_CAN_EMS_USB) += ems_usb.o
--- /dev/null
+/*
+ * CAN driver for EMS Dr. Thomas Wuensche CPC-USB/ARM7
+ *
+ * Copyright (C) 2004-2009 EMS Dr. Thomas Wuensche
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <linux/init.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/usb.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+
+MODULE_AUTHOR("Sebastian Haas <haas@ems-wuensche.com>");
+MODULE_DESCRIPTION("CAN driver for EMS Dr. Thomas Wuensche CAN/USB interfaces");
+MODULE_LICENSE("GPL v2");
+
+/* Control-Values for CPC_Control() Command Subject Selection */
+#define CONTR_CAN_MESSAGE 0x04
+#define CONTR_CAN_STATE 0x0C
+#define CONTR_BUS_ERROR 0x1C
+
+/* Control Command Actions */
+#define CONTR_CONT_OFF 0
+#define CONTR_CONT_ON 1
+#define CONTR_ONCE 2
+
+/* Messages from CPC to PC */
+#define CPC_MSG_TYPE_CAN_FRAME 1 /* CAN data frame */
+#define CPC_MSG_TYPE_RTR_FRAME 8 /* CAN remote frame */
+#define CPC_MSG_TYPE_CAN_PARAMS 12 /* Actual CAN parameters */
+#define CPC_MSG_TYPE_CAN_STATE 14 /* CAN state message */
+#define CPC_MSG_TYPE_EXT_CAN_FRAME 16 /* Extended CAN data frame */
+#define CPC_MSG_TYPE_EXT_RTR_FRAME 17 /* Extended remote frame */
+#define CPC_MSG_TYPE_CONTROL 19 /* change interface behavior */
+#define CPC_MSG_TYPE_CONFIRM 20 /* command processed confirmation */
+#define CPC_MSG_TYPE_OVERRUN 21 /* overrun events */
+#define CPC_MSG_TYPE_CAN_FRAME_ERROR 23 /* detected bus errors */
+#define CPC_MSG_TYPE_ERR_COUNTER 25 /* RX/TX error counter */
+
+/* Messages from the PC to the CPC interface */
+#define CPC_CMD_TYPE_CAN_FRAME 1 /* CAN data frame */
+#define CPC_CMD_TYPE_CONTROL 3 /* control of interface behavior */
+#define CPC_CMD_TYPE_CAN_PARAMS 6 /* set CAN parameters */
+#define CPC_CMD_TYPE_RTR_FRAME 13 /* CAN remote frame */
+#define CPC_CMD_TYPE_CAN_STATE 14 /* CAN state message */
+#define CPC_CMD_TYPE_EXT_CAN_FRAME 15 /* Extended CAN data frame */
+#define CPC_CMD_TYPE_EXT_RTR_FRAME 16 /* Extended CAN remote frame */
+#define CPC_CMD_TYPE_CAN_EXIT 200 /* exit the CAN */
+
+#define CPC_CMD_TYPE_INQ_ERR_COUNTER 25 /* request the CAN error counters */
+#define CPC_CMD_TYPE_CLEAR_MSG_QUEUE 8 /* clear CPC_MSG queue */
+#define CPC_CMD_TYPE_CLEAR_CMD_QUEUE 28 /* clear CPC_CMD queue */
+
+#define CPC_CC_TYPE_SJA1000 2 /* Philips basic CAN controller */
+
+#define CPC_CAN_ECODE_ERRFRAME 0x01 /* Ecode type */
+
+/* Overrun types */
+#define CPC_OVR_EVENT_CAN 0x01
+#define CPC_OVR_EVENT_CANSTATE 0x02
+#define CPC_OVR_EVENT_BUSERROR 0x04
+
+/*
+ * If the CAN controller lost a message we indicate it with the highest bit
+ * set in the count field.
+ */
+#define CPC_OVR_HW 0x80
+
+/* Size of the "struct ems_cpc_msg" without the union */
+#define CPC_MSG_HEADER_LEN 11
+#define CPC_CAN_MSG_MIN_SIZE 5
+
+/* Define these values to match your devices */
+#define USB_CPCUSB_VENDOR_ID 0x12D6
+
+#define USB_CPCUSB_ARM7_PRODUCT_ID 0x0444
+
+/* Mode register NXP LPC2119/SJA1000 CAN Controller */
+#define SJA1000_MOD_NORMAL 0x00
+#define SJA1000_MOD_RM 0x01
+
+/* ECC register NXP LPC2119/SJA1000 CAN Controller */
+#define SJA1000_ECC_SEG 0x1F
+#define SJA1000_ECC_DIR 0x20
+#define SJA1000_ECC_ERR 0x06
+#define SJA1000_ECC_BIT 0x00
+#define SJA1000_ECC_FORM 0x40
+#define SJA1000_ECC_STUFF 0x80
+#define SJA1000_ECC_MASK 0xc0
+
+/* Status register content */
+#define SJA1000_SR_BS 0x80
+#define SJA1000_SR_ES 0x40
+
+#define SJA1000_DEFAULT_OUTPUT_CONTROL 0xDA
+
+/*
+ * The device actually uses a 16MHz clock to generate the CAN clock
+ * but it expects SJA1000 bit settings based on 8MHz (is internally
+ * converted).
+ */
+#define EMS_USB_ARM7_CLOCK 8000000
+
+/*
+ * CAN-Message representation in a CPC_MSG. Message object type is
+ * CPC_MSG_TYPE_CAN_FRAME or CPC_MSG_TYPE_RTR_FRAME or
+ * CPC_MSG_TYPE_EXT_CAN_FRAME or CPC_MSG_TYPE_EXT_RTR_FRAME.
+ */
+struct cpc_can_msg {
+ u32 id;
+ u8 length;
+ u8 msg[8];
+};
+
+/* Representation of the CAN parameters for the SJA1000 controller */
+struct cpc_sja1000_params {
+ u8 mode;
+ u8 acc_code0;
+ u8 acc_code1;
+ u8 acc_code2;
+ u8 acc_code3;
+ u8 acc_mask0;
+ u8 acc_mask1;
+ u8 acc_mask2;
+ u8 acc_mask3;
+ u8 btr0;
+ u8 btr1;
+ u8 outp_contr;
+};
+
+/* CAN params message representation */
+struct cpc_can_params {
+ u8 cc_type;
+
+ /* Will support M16C CAN controller in the future */
+ union {
+ struct cpc_sja1000_params sja1000;
+ } cc_params;
+};
+
+/* Structure for confirmed message handling */
+struct cpc_confirm {
+ u8 error; /* error code */
+};
+
+/* Structure for overrun conditions */
+struct cpc_overrun {
+ u8 event;
+ u8 count;
+};
+
+/* SJA1000 CAN errors (compatible to NXP LPC2119) */
+struct cpc_sja1000_can_error {
+ u8 ecc;
+ u8 rxerr;
+ u8 txerr;
+};
+
+/* structure for CAN error conditions */
+struct cpc_can_error {
+ u8 ecode;
+
+ struct {
+ u8 cc_type;
+
+ /* Other controllers may also provide error code capture regs */
+ union {
+ struct cpc_sja1000_can_error sja1000;
+ } regs;
+ } cc;
+};
+
+/*
+ * Structure containing RX/TX error counter. This structure is used to request
+ * the values of the CAN controllers TX and RX error counter.
+ */
+struct cpc_can_err_counter {
+ u8 rx;
+ u8 tx;
+};
+
+/* Main message type used between library and application */
+struct __attribute__ ((packed)) ems_cpc_msg {
+ u8 type; /* type of message */
+ u8 length; /* length of data within union 'msg' */
+ u8 msgid; /* confirmation handle */
+ u32 ts_sec; /* timestamp in seconds */
+ u32 ts_nsec; /* timestamp in nano seconds */
+
+ union {
+ u8 generic[64];
+ struct cpc_can_msg can_msg;
+ struct cpc_can_params can_params;
+ struct cpc_confirm confirmation;
+ struct cpc_overrun overrun;
+ struct cpc_can_error error;
+ struct cpc_can_err_counter err_counter;
+ u8 can_state;
+ } msg;
+};
+
+/*
+ * Table of devices that work with this driver
+ * NOTE: This driver supports only CPC-USB/ARM7 (LPC2119) yet.
+ */
+static struct usb_device_id ems_usb_table[] = {
+ {USB_DEVICE(USB_CPCUSB_VENDOR_ID, USB_CPCUSB_ARM7_PRODUCT_ID)},
+ {} /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(usb, ems_usb_table);
+
+#define RX_BUFFER_SIZE 64
+#define CPC_HEADER_SIZE 4
+#define INTR_IN_BUFFER_SIZE 4
+
+#define MAX_RX_URBS 10
+#define MAX_TX_URBS CAN_ECHO_SKB_MAX
+
+struct ems_usb;
+
+struct ems_tx_urb_context {
+ struct ems_usb *dev;
+
+ u32 echo_index;
+ u8 dlc;
+};
+
+struct ems_usb {
+ struct can_priv can; /* must be the first member */
+ int open_time;
+
+ struct sk_buff *echo_skb[MAX_TX_URBS];
+
+ struct usb_device *udev;
+ struct net_device *netdev;
+
+ atomic_t active_tx_urbs;
+ struct usb_anchor tx_submitted;
+ struct ems_tx_urb_context tx_contexts[MAX_TX_URBS];
+
+ struct usb_anchor rx_submitted;
+
+ struct urb *intr_urb;
+
+ u8 *tx_msg_buffer;
+
+ u8 *intr_in_buffer;
+ unsigned int free_slots; /* remember number of available slots */
+
+ struct ems_cpc_msg active_params; /* active controller parameters */
+};
+
+static void ems_usb_read_interrupt_callback(struct urb *urb)
+{
+ struct ems_usb *dev = urb->context;
+ struct net_device *netdev = dev->netdev;
+ int err;
+
+ if (!netif_device_present(netdev))
+ return;
+
+ switch (urb->status) {
+ case 0:
+ dev->free_slots = dev->intr_in_buffer[1];
+ break;
+
+ case -ECONNRESET: /* unlink */
+ case -ENOENT:
+ case -ESHUTDOWN:
+ return;
+
+ default:
+ dev_info(netdev->dev.parent, "Rx interrupt aborted %d\n",
+ urb->status);
+ break;
+ }
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+
+ if (err == -ENODEV)
+ netif_device_detach(netdev);
+ else if (err)
+ dev_err(netdev->dev.parent,
+ "failed resubmitting intr urb: %d\n", err);
+
+ return;
+}
+
+static void ems_usb_rx_can_msg(struct ems_usb *dev, struct ems_cpc_msg *msg)
+{
+ struct can_frame *cf;
+ struct sk_buff *skb;
+ int i;
+ struct net_device_stats *stats = &dev->netdev->stats;
+
+ skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame));
+ if (skb == NULL)
+ return;
+
+ skb->protocol = htons(ETH_P_CAN);
+
+ cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+
+ cf->can_id = msg->msg.can_msg.id;
+ cf->can_dlc = min_t(u8, msg->msg.can_msg.length, 8);
+
+ if (msg->type == CPC_MSG_TYPE_EXT_CAN_FRAME
+ || msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME)
+ cf->can_id |= CAN_EFF_FLAG;
+
+ if (msg->type == CPC_MSG_TYPE_RTR_FRAME
+ || msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME) {
+ cf->can_id |= CAN_RTR_FLAG;
+ } else {
+ for (i = 0; i < cf->can_dlc; i++)
+ cf->data[i] = msg->msg.can_msg.msg[i];
+ }
+
+ netif_rx(skb);
+
+ stats->rx_packets++;
+ stats->rx_bytes += cf->can_dlc;
+}
+
+static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg)
+{
+ struct can_frame *cf;
+ struct sk_buff *skb;
+ struct net_device_stats *stats = &dev->netdev->stats;
+
+ skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame));
+ if (skb == NULL)
+ return;
+
+ skb->protocol = htons(ETH_P_CAN);
+
+ cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+ memset(cf, 0, sizeof(struct can_frame));
+
+ cf->can_id = CAN_ERR_FLAG;
+ cf->can_dlc = CAN_ERR_DLC;
+
+ if (msg->type == CPC_MSG_TYPE_CAN_STATE) {
+ u8 state = msg->msg.can_state;
+
+ if (state & SJA1000_SR_BS) {
+ dev->can.state = CAN_STATE_BUS_OFF;
+ cf->can_id |= CAN_ERR_BUSOFF;
+
+ can_bus_off(dev->netdev);
+ } else if (state & SJA1000_SR_ES) {
+ dev->can.state = CAN_STATE_ERROR_WARNING;
+ dev->can.can_stats.error_warning++;
+ } else {
+ dev->can.state = CAN_STATE_ERROR_ACTIVE;
+ dev->can.can_stats.error_passive++;
+ }
+ } else if (msg->type == CPC_MSG_TYPE_CAN_FRAME_ERROR) {
+ u8 ecc = msg->msg.error.cc.regs.sja1000.ecc;
+ u8 txerr = msg->msg.error.cc.regs.sja1000.txerr;
+ u8 rxerr = msg->msg.error.cc.regs.sja1000.rxerr;
+
+ /* bus error interrupt */
+ dev->can.can_stats.bus_error++;
+ stats->rx_errors++;
+
+ cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
+
+ switch (ecc & SJA1000_ECC_MASK) {
+ case SJA1000_ECC_BIT:
+ cf->data[2] |= CAN_ERR_PROT_BIT;
+ break;
+ case SJA1000_ECC_FORM:
+ cf->data[2] |= CAN_ERR_PROT_FORM;
+ break;
+ case SJA1000_ECC_STUFF:
+ cf->data[2] |= CAN_ERR_PROT_STUFF;
+ break;
+ default:
+ cf->data[2] |= CAN_ERR_PROT_UNSPEC;
+ cf->data[3] = ecc & SJA1000_ECC_SEG;
+ break;
+ }
+
+ /* Error occured during transmission? */
+ if ((ecc & SJA1000_ECC_DIR) == 0)
+ cf->data[2] |= CAN_ERR_PROT_TX;
+
+ if (dev->can.state == CAN_STATE_ERROR_WARNING ||
+ dev->can.state == CAN_STATE_ERROR_PASSIVE) {
+ cf->data[1] = (txerr > rxerr) ?
+ CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE;
+ }
+ } else if (msg->type == CPC_MSG_TYPE_OVERRUN) {
+ cf->can_id |= CAN_ERR_CRTL;
+ cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+
+ stats->rx_over_errors++;
+ stats->rx_errors++;
+ }
+
+ netif_rx(skb);
+
+ stats->rx_packets++;
+ stats->rx_bytes += cf->can_dlc;
+}
+
+/*
+ * callback for bulk IN urb
+ */
+static void ems_usb_read_bulk_callback(struct urb *urb)
+{
+ struct ems_usb *dev = urb->context;
+ struct net_device *netdev;
+ int retval;
+
+ netdev = dev->netdev;
+
+ if (!netif_device_present(netdev))
+ return;
+
+ switch (urb->status) {
+ case 0: /* success */
+ break;
+
+ case -ENOENT:
+ return;
+
+ default:
+ dev_info(netdev->dev.parent, "Rx URB aborted (%d)\n",
+ urb->status);
+ goto resubmit_urb;
+ }
+
+ if (urb->actual_length > CPC_HEADER_SIZE) {
+ struct ems_cpc_msg *msg;
+ u8 *ibuf = urb->transfer_buffer;
+ u8 msg_count, again, start;
+
+ msg_count = ibuf[0] & ~0x80;
+ again = ibuf[0] & 0x80;
+
+ start = CPC_HEADER_SIZE;
+
+ while (msg_count) {
+ msg = (struct ems_cpc_msg *)&ibuf[start];
+
+ switch (msg->type) {
+ case CPC_MSG_TYPE_CAN_STATE:
+ /* Process CAN state changes */
+ ems_usb_rx_err(dev, msg);
+ break;
+
+ case CPC_MSG_TYPE_CAN_FRAME:
+ case CPC_MSG_TYPE_EXT_CAN_FRAME:
+ case CPC_MSG_TYPE_RTR_FRAME:
+ case CPC_MSG_TYPE_EXT_RTR_FRAME:
+ ems_usb_rx_can_msg(dev, msg);
+ break;
+
+ case CPC_MSG_TYPE_CAN_FRAME_ERROR:
+ /* Process errorframe */
+ ems_usb_rx_err(dev, msg);
+ break;
+
+ case CPC_MSG_TYPE_OVERRUN:
+ /* Message lost while receiving */
+ ems_usb_rx_err(dev, msg);
+ break;
+ }
+
+ start += CPC_MSG_HEADER_LEN + msg->length;
+ msg_count--;
+
+ if (start > urb->transfer_buffer_length) {
+ dev_err(netdev->dev.parent, "format error\n");
+ break;
+ }
+ }
+ }
+
+resubmit_urb:
+ usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2),
+ urb->transfer_buffer, RX_BUFFER_SIZE,
+ ems_usb_read_bulk_callback, dev);
+
+ retval = usb_submit_urb(urb, GFP_ATOMIC);
+
+ if (retval == -ENODEV)
+ netif_device_detach(netdev);
+ else if (retval)
+ dev_err(netdev->dev.parent,
+ "failed resubmitting read bulk urb: %d\n", retval);
+
+ return;
+}
+
+/*
+ * callback for bulk IN urb
+ */
+static void ems_usb_write_bulk_callback(struct urb *urb)
+{
+ struct ems_tx_urb_context *context = urb->context;
+ struct ems_usb *dev;
+ struct net_device *netdev;
+
+ BUG_ON(!context);
+
+ dev = context->dev;
+ netdev = dev->netdev;
+
+ /* free up our allocated buffer */
+ usb_buffer_free(urb->dev, urb->transfer_buffer_length,
+ urb->transfer_buffer, urb->transfer_dma);
+
+ atomic_dec(&dev->active_tx_urbs);
+
+ if (!netif_device_present(netdev))
+ return;
+
+ if (urb->status)
+ dev_info(netdev->dev.parent, "Tx URB aborted (%d)\n",
+ urb->status);
+
+ netdev->trans_start = jiffies;
+
+ /* transmission complete interrupt */
+ netdev->stats.tx_packets++;
+ netdev->stats.tx_bytes += context->dlc;
+
+ can_get_echo_skb(netdev, context->echo_index);
+
+ /* Release context */
+ context->echo_index = MAX_TX_URBS;
+
+ if (netif_queue_stopped(netdev))
+ netif_wake_queue(netdev);
+}
+
+/*
+ * Send the given CPC command synchronously
+ */
+static int ems_usb_command_msg(struct ems_usb *dev, struct ems_cpc_msg *msg)
+{
+ int actual_length;
+
+ /* Copy payload */
+ memcpy(&dev->tx_msg_buffer[CPC_HEADER_SIZE], msg,
+ msg->length + CPC_MSG_HEADER_LEN);
+
+ /* Clear header */
+ memset(&dev->tx_msg_buffer[0], 0, CPC_HEADER_SIZE);
+
+ return usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, 2),
+ &dev->tx_msg_buffer[0],
+ msg->length + CPC_MSG_HEADER_LEN + CPC_HEADER_SIZE,
+ &actual_length, 1000);
+}
+
+/*
+ * Change CAN controllers' mode register
+ */
+static int ems_usb_write_mode(struct ems_usb *dev, u8 mode)
+{
+ dev->active_params.msg.can_params.cc_params.sja1000.mode = mode;
+
+ return ems_usb_command_msg(dev, &dev->active_params);
+}
+
+/*
+ * Send a CPC_Control command to change behaviour when interface receives a CAN
+ * message, bus error or CAN state changed notifications.
+ */
+static int ems_usb_control_cmd(struct ems_usb *dev, u8 val)
+{
+ struct ems_cpc_msg cmd;
+
+ cmd.type = CPC_CMD_TYPE_CONTROL;
+ cmd.length = CPC_MSG_HEADER_LEN + 1;
+
+ cmd.msgid = 0;
+
+ cmd.msg.generic[0] = val;
+
+ return ems_usb_command_msg(dev, &cmd);
+}
+
+/*
+ * Start interface
+ */
+static int ems_usb_start(struct ems_usb *dev)
+{
+ struct net_device *netdev = dev->netdev;
+ int err, i;
+
+ dev->intr_in_buffer[0] = 0;
+ dev->free_slots = 15; /* initial size */
+
+ for (i = 0; i < MAX_RX_URBS; i++) {
+ struct urb *urb = NULL;
+ u8 *buf = NULL;
+
+ /* create a URB, and a buffer for it */
+ urb = usb_alloc_urb(0, GFP_KERNEL);
+ if (!urb) {
+ dev_err(netdev->dev.parent,
+ "No memory left for URBs\n");
+ return -ENOMEM;
+ }
+
+ buf = usb_buffer_alloc(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL,
+ &urb->transfer_dma);
+ if (!buf) {
+ dev_err(netdev->dev.parent,
+ "No memory left for USB buffer\n");
+ usb_free_urb(urb);
+ return -ENOMEM;
+ }
+
+ usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2),
+ buf, RX_BUFFER_SIZE,
+ ems_usb_read_bulk_callback, dev);
+ urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+ usb_anchor_urb(urb, &dev->rx_submitted);
+
+ err = usb_submit_urb(urb, GFP_KERNEL);
+ if (err) {
+ if (err == -ENODEV)
+ netif_device_detach(dev->netdev);
+
+ usb_unanchor_urb(urb);
+ usb_buffer_free(dev->udev, RX_BUFFER_SIZE, buf,
+ urb->transfer_dma);
+ break;
+ }
+
+ /* Drop reference, USB core will take care of freeing it */
+ usb_free_urb(urb);
+ }
+
+ /* Did we submit any URBs */
+ if (i == 0) {
+ dev_warn(netdev->dev.parent, "couldn't setup read URBs\n");
+ return err;
+ }
+
+ /* Warn if we've couldn't transmit all the URBs */
+ if (i < MAX_RX_URBS)
+ dev_warn(netdev->dev.parent, "rx performance may be slow\n");
+
+ /* Setup and start interrupt URB */
+ usb_fill_int_urb(dev->intr_urb, dev->udev,
+ usb_rcvintpipe(dev->udev, 1),
+ dev->intr_in_buffer,
+ INTR_IN_BUFFER_SIZE,
+ ems_usb_read_interrupt_callback, dev, 1);
+
+ err = usb_submit_urb(dev->intr_urb, GFP_KERNEL);
+ if (err) {
+ if (err == -ENODEV)
+ netif_device_detach(dev->netdev);
+
+ dev_warn(netdev->dev.parent, "intr URB submit failed: %d\n",
+ err);
+
+ return err;
+ }
+
+ /* CPC-USB will transfer received message to host */
+ err = ems_usb_control_cmd(dev, CONTR_CAN_MESSAGE | CONTR_CONT_ON);
+ if (err)
+ goto failed;
+
+ /* CPC-USB will transfer CAN state changes to host */
+ err = ems_usb_control_cmd(dev, CONTR_CAN_STATE | CONTR_CONT_ON);
+ if (err)
+ goto failed;
+
+ /* CPC-USB will transfer bus errors to host */
+ err = ems_usb_control_cmd(dev, CONTR_BUS_ERROR | CONTR_CONT_ON);
+ if (err)
+ goto failed;
+
+ err = ems_usb_write_mode(dev, SJA1000_MOD_NORMAL);
+ if (err)
+ goto failed;
+
+ dev->can.state = CAN_STATE_ERROR_ACTIVE;
+
+ return 0;
+
+failed:
+ if (err == -ENODEV)
+ netif_device_detach(dev->netdev);
+
+ dev_warn(netdev->dev.parent, "couldn't submit control: %d\n", err);
+
+ return err;
+}
+
+static void unlink_all_urbs(struct ems_usb *dev)
+{
+ int i;
+
+ usb_unlink_urb(dev->intr_urb);
+
+ usb_kill_anchored_urbs(&dev->rx_submitted);
+
+ usb_kill_anchored_urbs(&dev->tx_submitted);
+ atomic_set(&dev->active_tx_urbs, 0);
+
+ for (i = 0; i < MAX_TX_URBS; i++)
+ dev->tx_contexts[i].echo_index = MAX_TX_URBS;
+}
+
+static int ems_usb_open(struct net_device *netdev)
+{
+ struct ems_usb *dev = netdev_priv(netdev);
+ int err;
+
+ err = ems_usb_write_mode(dev, SJA1000_MOD_RM);
+ if (err)
+ return err;
+
+ /* common open */
+ err = open_candev(netdev);
+ if (err)
+ return err;
+
+ /* finally start device */
+ err = ems_usb_start(dev);
+ if (err) {
+ if (err == -ENODEV)
+ netif_device_detach(dev->netdev);
+
+ dev_warn(netdev->dev.parent, "couldn't start device: %d\n",
+ err);
+
+ close_candev(netdev);
+
+ return err;
+ }
+
+ dev->open_time = jiffies;
+
+ netif_start_queue(netdev);
+
+ return 0;
+}
+
+static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct ems_usb *dev = netdev_priv(netdev);
+ struct ems_tx_urb_context *context = NULL;
+ struct net_device_stats *stats = &netdev->stats;
+ struct can_frame *cf = (struct can_frame *)skb->data;
+ struct ems_cpc_msg *msg;
+ struct urb *urb;
+ u8 *buf;
+ int i, err;
+ size_t size = CPC_HEADER_SIZE + CPC_MSG_HEADER_LEN
+ + sizeof(struct cpc_can_msg);
+
+ /* create a URB, and a buffer for it, and copy the data to the URB */
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
+ if (!urb) {
+ dev_err(netdev->dev.parent, "No memory left for URBs\n");
+ goto nomem;
+ }
+
+ buf = usb_buffer_alloc(dev->udev, size, GFP_ATOMIC, &urb->transfer_dma);
+ if (!buf) {
+ dev_err(netdev->dev.parent, "No memory left for USB buffer\n");
+ usb_free_urb(urb);
+ goto nomem;
+ }
+
+ msg = (struct ems_cpc_msg *)&buf[CPC_HEADER_SIZE];
+
+ msg->msg.can_msg.id = cf->can_id & CAN_ERR_MASK;
+ msg->msg.can_msg.length = cf->can_dlc;
+
+ if (cf->can_id & CAN_RTR_FLAG) {
+ msg->type = cf->can_id & CAN_EFF_FLAG ?
+ CPC_CMD_TYPE_EXT_RTR_FRAME : CPC_CMD_TYPE_RTR_FRAME;
+
+ msg->length = CPC_CAN_MSG_MIN_SIZE;
+ } else {
+ msg->type = cf->can_id & CAN_EFF_FLAG ?
+ CPC_CMD_TYPE_EXT_CAN_FRAME : CPC_CMD_TYPE_CAN_FRAME;
+
+ for (i = 0; i < cf->can_dlc; i++)
+ msg->msg.can_msg.msg[i] = cf->data[i];
+
+ msg->length = CPC_CAN_MSG_MIN_SIZE + cf->can_dlc;
+ }
+
+ for (i = 0; i < MAX_TX_URBS; i++) {
+ if (dev->tx_contexts[i].echo_index == MAX_TX_URBS) {
+ context = &dev->tx_contexts[i];
+ break;
+ }
+ }
+
+ /*
+ * May never happen! When this happens we'd more URBs in flight as
+ * allowed (MAX_TX_URBS).
+ */
+ if (!context) {
+ usb_unanchor_urb(urb);
+ usb_buffer_free(dev->udev, size, buf, urb->transfer_dma);
+
+ dev_warn(netdev->dev.parent, "couldn't find free context\n");
+
+ return NETDEV_TX_BUSY;
+ }
+
+ context->dev = dev;
+ context->echo_index = i;
+ context->dlc = cf->can_dlc;
+
+ usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), buf,
+ size, ems_usb_write_bulk_callback, context);
+ urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+ usb_anchor_urb(urb, &dev->tx_submitted);
+
+ can_put_echo_skb(skb, netdev, context->echo_index);
+
+ atomic_inc(&dev->active_tx_urbs);
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (unlikely(err)) {
+ can_free_echo_skb(netdev, context->echo_index);
+
+ usb_unanchor_urb(urb);
+ usb_buffer_free(dev->udev, size, buf, urb->transfer_dma);
+ dev_kfree_skb(skb);
+
+ atomic_dec(&dev->active_tx_urbs);
+
+ if (err == -ENODEV) {
+ netif_device_detach(netdev);
+ } else {
+ dev_warn(netdev->dev.parent, "failed tx_urb %d\n", err);
+
+ stats->tx_dropped++;
+ }
+ } else {
+ netdev->trans_start = jiffies;
+
+ /* Slow down tx path */
+ if (atomic_read(&dev->active_tx_urbs) >= MAX_TX_URBS ||
+ dev->free_slots < 5) {
+ netif_stop_queue(netdev);
+ }
+ }
+
+ /*
+ * Release our reference to this URB, the USB core will eventually free
+ * it entirely.
+ */
+ usb_free_urb(urb);
+
+ return NETDEV_TX_OK;
+
+nomem:
+ if (skb)
+ dev_kfree_skb(skb);
+
+ stats->tx_dropped++;
+
+ return NETDEV_TX_OK;
+}
+
+static int ems_usb_close(struct net_device *netdev)
+{
+ struct ems_usb *dev = netdev_priv(netdev);
+
+ /* Stop polling */
+ unlink_all_urbs(dev);
+
+ netif_stop_queue(netdev);
+
+ /* Set CAN controller to reset mode */
+ if (ems_usb_write_mode(dev, SJA1000_MOD_RM))
+ dev_warn(netdev->dev.parent, "couldn't stop device");
+
+ close_candev(netdev);
+
+ dev->open_time = 0;
+
+ return 0;
+}
+
+static const struct net_device_ops ems_usb_netdev_ops = {
+ .ndo_open = ems_usb_open,
+ .ndo_stop = ems_usb_close,
+ .ndo_start_xmit = ems_usb_start_xmit,
+};
+
+static struct can_bittiming_const ems_usb_bittiming_const = {
+ .name = "ems_usb",
+ .tseg1_min = 1,
+ .tseg1_max = 16,
+ .tseg2_min = 1,
+ .tseg2_max = 8,
+ .sjw_max = 4,
+ .brp_min = 1,
+ .brp_max = 64,
+ .brp_inc = 1,
+};
+
+static int ems_usb_set_mode(struct net_device *netdev, enum can_mode mode)
+{
+ struct ems_usb *dev = netdev_priv(netdev);
+
+ if (!dev->open_time)
+ return -EINVAL;
+
+ switch (mode) {
+ case CAN_MODE_START:
+ if (ems_usb_write_mode(dev, SJA1000_MOD_NORMAL))
+ dev_warn(netdev->dev.parent, "couldn't start device");
+
+ if (netif_queue_stopped(netdev))
+ netif_wake_queue(netdev);
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int ems_usb_set_bittiming(struct net_device *netdev)
+{
+ struct ems_usb *dev = netdev_priv(netdev);
+ struct can_bittiming *bt = &dev->can.bittiming;
+ u8 btr0, btr1;
+
+ btr0 = ((bt->brp - 1) & 0x3f) | (((bt->sjw - 1) & 0x3) << 6);
+ btr1 = ((bt->prop_seg + bt->phase_seg1 - 1) & 0xf) |
+ (((bt->phase_seg2 - 1) & 0x7) << 4);
+ if (dev->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES)
+ btr1 |= 0x80;
+
+ dev_info(netdev->dev.parent, "setting BTR0=0x%02x BTR1=0x%02x\n",
+ btr0, btr1);
+
+ dev->active_params.msg.can_params.cc_params.sja1000.btr0 = btr0;
+ dev->active_params.msg.can_params.cc_params.sja1000.btr1 = btr1;
+
+ return ems_usb_command_msg(dev, &dev->active_params);
+}
+
+static void init_params_sja1000(struct ems_cpc_msg *msg)
+{
+ struct cpc_sja1000_params *sja1000 =
+ &msg->msg.can_params.cc_params.sja1000;
+
+ msg->type = CPC_CMD_TYPE_CAN_PARAMS;
+ msg->length = sizeof(struct cpc_can_params);
+ msg->msgid = 0;
+
+ msg->msg.can_params.cc_type = CPC_CC_TYPE_SJA1000;
+
+ /* Acceptance filter open */
+ sja1000->acc_code0 = 0x00;
+ sja1000->acc_code1 = 0x00;
+ sja1000->acc_code2 = 0x00;
+ sja1000->acc_code3 = 0x00;
+
+ /* Acceptance filter open */
+ sja1000->acc_mask0 = 0xFF;
+ sja1000->acc_mask1 = 0xFF;
+ sja1000->acc_mask2 = 0xFF;
+ sja1000->acc_mask3 = 0xFF;
+
+ sja1000->btr0 = 0;
+ sja1000->btr1 = 0;
+
+ sja1000->outp_contr = SJA1000_DEFAULT_OUTPUT_CONTROL;
+ sja1000->mode = SJA1000_MOD_RM;
+}
+
+/*
+ * probe function for new CPC-USB devices
+ */
+static int ems_usb_probe(struct usb_interface *intf,
+ const struct usb_device_id *id)
+{
+ struct net_device *netdev;
+ struct ems_usb *dev;
+ int i, err = -ENOMEM;
+
+ netdev = alloc_candev(sizeof(struct ems_usb));
+ if (!netdev) {
+ dev_err(netdev->dev.parent, "Couldn't alloc candev\n");
+ return -ENOMEM;
+ }
+
+ dev = netdev_priv(netdev);
+
+ dev->udev = interface_to_usbdev(intf);
+ dev->netdev = netdev;
+
+ dev->can.state = CAN_STATE_STOPPED;
+ dev->can.clock.freq = EMS_USB_ARM7_CLOCK;
+ dev->can.bittiming_const = &ems_usb_bittiming_const;
+ dev->can.do_set_bittiming = ems_usb_set_bittiming;
+ dev->can.do_set_mode = ems_usb_set_mode;
+
+ netdev->flags |= IFF_ECHO; /* we support local echo */
+
+ netdev->netdev_ops = &ems_usb_netdev_ops;
+
+ netdev->flags |= IFF_ECHO; /* we support local echo */
+
+ init_usb_anchor(&dev->rx_submitted);
+
+ init_usb_anchor(&dev->tx_submitted);
+ atomic_set(&dev->active_tx_urbs, 0);
+
+ for (i = 0; i < MAX_TX_URBS; i++)
+ dev->tx_contexts[i].echo_index = MAX_TX_URBS;
+
+ dev->intr_urb = usb_alloc_urb(0, GFP_KERNEL);
+ if (!dev->intr_urb) {
+ dev_err(netdev->dev.parent, "Couldn't alloc intr URB\n");
+ goto cleanup_candev;
+ }
+
+ dev->intr_in_buffer = kzalloc(INTR_IN_BUFFER_SIZE, GFP_KERNEL);
+ if (!dev->intr_in_buffer) {
+ dev_err(netdev->dev.parent, "Couldn't alloc Intr buffer\n");
+ goto cleanup_intr_urb;
+ }
+
+ dev->tx_msg_buffer = kzalloc(CPC_HEADER_SIZE +
+ sizeof(struct ems_cpc_msg), GFP_KERNEL);
+ if (!dev->tx_msg_buffer) {
+ dev_err(netdev->dev.parent, "Couldn't alloc Tx buffer\n");
+ goto cleanup_intr_in_buffer;
+ }
+
+ usb_set_intfdata(intf, dev);
+
+ SET_NETDEV_DEV(netdev, &intf->dev);
+
+ init_params_sja1000(&dev->active_params);
+
+ err = ems_usb_command_msg(dev, &dev->active_params);
+ if (err) {
+ dev_err(netdev->dev.parent,
+ "couldn't initialize controller: %d\n", err);
+ goto cleanup_tx_msg_buffer;
+ }
+
+ err = register_candev(netdev);
+ if (err) {
+ dev_err(netdev->dev.parent,
+ "couldn't register CAN device: %d\n", err);
+ goto cleanup_tx_msg_buffer;
+ }
+
+ return 0;
+
+cleanup_tx_msg_buffer:
+ kfree(dev->tx_msg_buffer);
+
+cleanup_intr_in_buffer:
+ kfree(dev->intr_in_buffer);
+
+cleanup_intr_urb:
+ usb_free_urb(dev->intr_urb);
+
+cleanup_candev:
+ free_candev(netdev);
+
+ return err;
+}
+
+/*
+ * called by the usb core when the device is removed from the system
+ */
+static void ems_usb_disconnect(struct usb_interface *intf)
+{
+ struct ems_usb *dev = usb_get_intfdata(intf);
+
+ usb_set_intfdata(intf, NULL);
+
+ if (dev) {
+ unregister_netdev(dev->netdev);
+ free_candev(dev->netdev);
+
+ unlink_all_urbs(dev);
+
+ usb_free_urb(dev->intr_urb);
+
+ kfree(dev->intr_in_buffer);
+ }
+}
+
+/* usb specific object needed to register this driver with the usb subsystem */
+static struct usb_driver ems_usb_driver = {
+ .name = "ems_usb",
+ .probe = ems_usb_probe,
+ .disconnect = ems_usb_disconnect,
+ .id_table = ems_usb_table,
+};
+
+static int __init ems_usb_init(void)
+{
+ int err;
+
+ printk(KERN_INFO "CPC-USB kernel driver loaded\n");
+
+ /* register this driver with the USB subsystem */
+ err = usb_register(&ems_usb_driver);
+
+ if (err) {
+ err("usb_register failed. Error number %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void __exit ems_usb_exit(void)
+{
+ /* deregister this driver with the USB subsystem */
+ usb_deregister(&ems_usb_driver);
+}
+
+module_init(ems_usb_init);
+module_exit(ems_usb_exit);
cp->uio_dev = iminor(inode);
- cnic_shutdown_bnx2_rx_ring(dev);
-
cnic_init_bnx2_tx_ring(dev);
cnic_init_bnx2_rx_ring(dev);
struct cnic_dev *dev = uinfo->priv;
struct cnic_local *cp = dev->cnic_priv;
+ cnic_shutdown_bnx2_rx_ring(dev);
+
cp->uio_dev = -1;
return 0;
}
static int __devinit cpmac_probe(struct platform_device *pdev)
{
int rc, phy_id;
- char mdio_bus_id[BUS_ID_SIZE];
+ char mdio_bus_id[MII_BUS_ID_SIZE];
struct resource *mem;
struct cpmac_priv *priv;
struct net_device *dev;
pdata = pdev->dev.platform_data;
if (external_switch || dumb_switch) {
- strncpy(mdio_bus_id, "0", BUS_ID_SIZE); /* fixed phys bus */
+ strncpy(mdio_bus_id, "0", MII_BUS_ID_SIZE); /* fixed phys bus */
phy_id = pdev->id;
} else {
for (phy_id = 0; phy_id < PHY_MAX_ADDR; phy_id++) {
continue;
if (!cpmac_mii->phy_map[phy_id])
continue;
- strncpy(mdio_bus_id, cpmac_mii->id, BUS_ID_SIZE);
+ strncpy(mdio_bus_id, cpmac_mii->id, MII_BUS_ID_SIZE);
break;
}
}
priv->msg_enable = netif_msg_init(debug_level, 0xff);
memcpy(dev->dev_addr, pdata->dev_addr, sizeof(dev->dev_addr));
- snprintf(priv->phy_name, BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id);
+ snprintf(priv->phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id);
priv->phy = phy_connect(dev, priv->phy_name, &cpmac_adjust_link, 0,
PHY_INTERFACE_MODE_MII);
.ndo_poll_controller = ehea_netpoll,
#endif
.ndo_get_stats = ehea_get_stats,
- .ndo_change_mtu = eth_change_mtu,
.ndo_set_mac_address = ehea_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_multicast_list = ehea_set_multicast_list,
wrfl();
}
-/**
- * igb_update_mc_addr_list - Update Multicast addresses
- * @hw: pointer to the HW structure
- * @mc_addr_list: array of multicast addresses to program
- * @mc_addr_count: number of multicast addresses to program
- *
- * Updates entire Multicast Table Array.
- * The caller must have a packed mc_addr_list of multicast addresses.
- **/
-void igb_update_mc_addr_list(struct e1000_hw *hw,
- u8 *mc_addr_list, u32 mc_addr_count)
-{
- u32 hash_value, hash_bit, hash_reg;
- int i;
-
- /* clear mta_shadow */
- memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
-
- /* update mta_shadow from mc_addr_list */
- for (i = 0; (u32) i < mc_addr_count; i++) {
- hash_value = igb_hash_mc_addr(hw, mc_addr_list);
-
- hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
- hash_bit = hash_value & 0x1F;
-
- hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit);
- mc_addr_list += (ETH_ALEN);
- }
-
- /* replace the entire MTA table */
- for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
- array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
- wrfl();
-}
-
/**
* igb_hash_mc_addr - Generate a multicast hash value
* @hw: pointer to the HW structure
* the multicast filter table array address and new table value. See
* igb_mta_set()
**/
-u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
+static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
{
u32 hash_value, hash_mask;
u8 bit_shift = 0;
return hash_value;
}
+/**
+ * igb_update_mc_addr_list - Update Multicast addresses
+ * @hw: pointer to the HW structure
+ * @mc_addr_list: array of multicast addresses to program
+ * @mc_addr_count: number of multicast addresses to program
+ *
+ * Updates entire Multicast Table Array.
+ * The caller must have a packed mc_addr_list of multicast addresses.
+ **/
+void igb_update_mc_addr_list(struct e1000_hw *hw,
+ u8 *mc_addr_list, u32 mc_addr_count)
+{
+ u32 hash_value, hash_bit, hash_reg;
+ int i;
+
+ /* clear mta_shadow */
+ memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+
+ /* update mta_shadow from mc_addr_list */
+ for (i = 0; (u32) i < mc_addr_count; i++) {
+ hash_value = igb_hash_mc_addr(hw, mc_addr_list);
+
+ hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+ hash_bit = hash_value & 0x1F;
+
+ hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit);
+ mc_addr_list += (ETH_ALEN);
+ }
+
+ /* replace the entire MTA table */
+ for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
+ array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
+ wrfl();
+}
+
/**
* igb_clear_hw_cntrs_base - Clear base hardware counters
* @hw: pointer to the HW structure
#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN 0x2
extern void e1000_init_function_pointers_82575(struct e1000_hw *hw);
-extern u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr);
#endif
enum ixgbe_fc_mode last_lfc_mode;
/* Interrupt Throttle Rate */
- u32 itr_setting;
+ u32 rx_itr_setting;
+ u32 tx_itr_setting;
u16 eitr_low;
u16 eitr_high;
struct ixgbe_hw_stats stats;
/* Interrupt Throttle Rate */
- u32 eitr_param;
+ u32 rx_eitr_param;
+ u32 tx_eitr_param;
unsigned long state;
u64 tx_busy;
ec->tx_max_coalesced_frames_irq = adapter->tx_ring[0].work_limit;
/* only valid if in constant ITR mode */
- switch (adapter->itr_setting) {
+ switch (adapter->rx_itr_setting) {
case 0:
/* throttling disabled */
ec->rx_coalesce_usecs = 0;
break;
default:
/* fixed interrupt rate mode */
- ec->rx_coalesce_usecs = 1000000/adapter->eitr_param;
+ ec->rx_coalesce_usecs = 1000000/adapter->rx_eitr_param;
break;
}
+
+ /* only valid if in constant ITR mode */
+ switch (adapter->tx_itr_setting) {
+ case 0:
+ /* throttling disabled */
+ ec->tx_coalesce_usecs = 0;
+ break;
+ case 1:
+ /* dynamic ITR mode */
+ ec->tx_coalesce_usecs = 1;
+ break;
+ default:
+ ec->tx_coalesce_usecs = 1000000/adapter->tx_eitr_param;
+ break;
+ }
+
return 0;
}
struct ixgbe_q_vector *q_vector;
int i;
+ /*
+ * don't accept tx specific changes if we've got mixed RxTx vectors
+ * test and jump out here if needed before changing the rx numbers
+ */
+ if ((1000000/ec->tx_coalesce_usecs) != adapter->tx_eitr_param &&
+ adapter->q_vector[0]->txr_count && adapter->q_vector[0]->rxr_count)
+ return -EINVAL;
+
if (ec->tx_max_coalesced_frames_irq)
adapter->tx_ring[0].work_limit = ec->tx_max_coalesced_frames_irq;
return -EINVAL;
/* store the value in ints/second */
- adapter->eitr_param = 1000000/ec->rx_coalesce_usecs;
+ adapter->rx_eitr_param = 1000000/ec->rx_coalesce_usecs;
/* static value of interrupt rate */
- adapter->itr_setting = adapter->eitr_param;
+ adapter->rx_itr_setting = adapter->rx_eitr_param;
/* clear the lower bit as its used for dynamic state */
- adapter->itr_setting &= ~1;
+ adapter->rx_itr_setting &= ~1;
} else if (ec->rx_coalesce_usecs == 1) {
/* 1 means dynamic mode */
- adapter->eitr_param = 20000;
- adapter->itr_setting = 1;
+ adapter->rx_eitr_param = 20000;
+ adapter->rx_itr_setting = 1;
} else {
/*
* any other value means disable eitr, which is best
* served by setting the interrupt rate very high
*/
if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
- adapter->eitr_param = IXGBE_MAX_RSC_INT_RATE;
+ adapter->rx_eitr_param = IXGBE_MAX_RSC_INT_RATE;
else
- adapter->eitr_param = IXGBE_MAX_INT_RATE;
- adapter->itr_setting = 0;
+ adapter->rx_eitr_param = IXGBE_MAX_INT_RATE;
+ adapter->rx_itr_setting = 0;
+ }
+
+ if (ec->tx_coalesce_usecs > 1) {
+ /* check the limits */
+ if ((1000000/ec->tx_coalesce_usecs > IXGBE_MAX_INT_RATE) ||
+ (1000000/ec->tx_coalesce_usecs < IXGBE_MIN_INT_RATE))
+ return -EINVAL;
+
+ /* store the value in ints/second */
+ adapter->tx_eitr_param = 1000000/ec->tx_coalesce_usecs;
+
+ /* static value of interrupt rate */
+ adapter->tx_itr_setting = adapter->tx_eitr_param;
+
+ /* clear the lower bit as its used for dynamic state */
+ adapter->tx_itr_setting &= ~1;
+ } else if (ec->tx_coalesce_usecs == 1) {
+ /* 1 means dynamic mode */
+ adapter->tx_eitr_param = 10000;
+ adapter->tx_itr_setting = 1;
+ } else {
+ adapter->tx_eitr_param = IXGBE_MAX_INT_RATE;
+ adapter->tx_itr_setting = 0;
}
/* MSI/MSIx Interrupt Mode */
for (i = 0; i < num_vectors; i++) {
q_vector = adapter->q_vector[i];
if (q_vector->txr_count && !q_vector->rxr_count)
- /* tx vector gets half the rate */
- q_vector->eitr = (adapter->eitr_param >> 1);
+ /* tx only */
+ q_vector->eitr = adapter->tx_eitr_param;
else
/* rx only or mixed */
- q_vector->eitr = adapter->eitr_param;
+ q_vector->eitr = adapter->rx_eitr_param;
ixgbe_write_eitr(q_vector);
}
/* Legacy Interrupt Mode */
} else {
q_vector = adapter->q_vector[0];
- q_vector->eitr = adapter->eitr_param;
+ q_vector->eitr = adapter->rx_eitr_param;
ixgbe_write_eitr(q_vector);
}
r_idx + 1);
}
- /* if this is a tx only vector halve the interrupt rate */
if (q_vector->txr_count && !q_vector->rxr_count)
- q_vector->eitr = (adapter->eitr_param >> 1);
+ /* tx only */
+ q_vector->eitr = adapter->tx_eitr_param;
else if (q_vector->rxr_count)
- /* rx only */
- q_vector->eitr = adapter->eitr_param;
+ /* rx or mixed */
+ q_vector->eitr = adapter->rx_eitr_param;
ixgbe_write_eitr(q_vector);
}
/* If all Rx work done, exit the polling mode */
if (work_done < budget) {
napi_complete(napi);
- if (adapter->itr_setting & 1)
+ if (adapter->rx_itr_setting & 1)
ixgbe_set_itr_msix(q_vector);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter,
/* If all Rx work done, exit the polling mode */
if (work_done < budget) {
napi_complete(napi);
- if (adapter->itr_setting & 1)
+ if (adapter->rx_itr_setting & 1)
ixgbe_set_itr_msix(q_vector);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter,
if (!ixgbe_clean_tx_irq(q_vector, tx_ring))
work_done = budget;
- /* If all Rx work done, exit the polling mode */
+ /* If all Tx work done, exit the polling mode */
if (work_done < budget) {
napi_complete(napi);
- if (adapter->itr_setting & 1)
+ if (adapter->tx_itr_setting & 1)
ixgbe_set_itr_msix(q_vector);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx));
struct ixgbe_hw *hw = &adapter->hw;
IXGBE_WRITE_REG(hw, IXGBE_EITR(0),
- EITR_INTS_PER_SEC_TO_REG(adapter->eitr_param));
+ EITR_INTS_PER_SEC_TO_REG(adapter->rx_eitr_param));
ixgbe_set_ivar(adapter, 0, 0, 0);
ixgbe_set_ivar(adapter, 1, 0, 0);
return mrqc;
}
+/**
+ * ixgbe_configure_rscctl - enable RSC for the indicated ring
+ * @adapter: address of board private structure
+ * @index: index of ring to set
+ * @rx_buf_len: rx buffer length
+ **/
+static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, int index,
+ int rx_buf_len)
+{
+ struct ixgbe_ring *rx_ring;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int j;
+ u32 rscctrl;
+
+ rx_ring = &adapter->rx_ring[index];
+ j = rx_ring->reg_idx;
+ rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(j));
+ rscctrl |= IXGBE_RSCCTL_RSCEN;
+ /*
+ * we must limit the number of descriptors so that the
+ * total size of max desc * buf_len is not greater
+ * than 65535
+ */
+ if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) {
+#if (MAX_SKB_FRAGS > 16)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
+#elif (MAX_SKB_FRAGS > 8)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
+#elif (MAX_SKB_FRAGS > 4)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
+#else
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
+#endif
+ } else {
+ if (rx_buf_len < IXGBE_RXBUFFER_4096)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
+ else if (rx_buf_len < IXGBE_RXBUFFER_8192)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
+ else
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(j), rscctrl);
+}
+
/**
* ixgbe_configure_rx - Configure 8259x Receive Unit after Reset
* @adapter: board private structure
u32 fctrl, hlreg0;
u32 reta = 0, mrqc = 0;
u32 rdrxctl;
- u32 rscctrl;
int rx_buf_len;
/* Decide whether to use packet split mode or not */
if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
/* Enable 82599 HW-RSC */
- for (i = 0; i < adapter->num_rx_queues; i++) {
- rx_ring = &adapter->rx_ring[i];
- j = rx_ring->reg_idx;
- rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(j));
- rscctrl |= IXGBE_RSCCTL_RSCEN;
- /*
- * we must limit the number of descriptors so that the
- * total size of max desc * buf_len is not greater
- * than 65535
- */
- if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) {
-#if (MAX_SKB_FRAGS > 16)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
-#elif (MAX_SKB_FRAGS > 8)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
-#elif (MAX_SKB_FRAGS > 4)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
-#else
- rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
-#endif
- } else {
- if (rx_buf_len < IXGBE_RXBUFFER_4096)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
- else if (rx_buf_len < IXGBE_RXBUFFER_8192)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
- else
- rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
- }
- IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(j), rscctrl);
- }
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ ixgbe_configure_rscctl(adapter, i, rx_buf_len);
+
/* Disable RSC for ACK packets */
IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
(IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
ixgbe_napi_disable_all(adapter);
+ clear_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state);
+ del_timer_sync(&adapter->sfp_timer);
del_timer_sync(&adapter->watchdog_timer);
cancel_work_sync(&adapter->watchdog_task);
/* If budget not fully consumed, exit the polling mode */
if (work_done < budget) {
napi_complete(napi);
- if (adapter->itr_setting & 1)
+ if (adapter->rx_itr_setting & 1)
ixgbe_set_itr(adapter);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter, IXGBE_EIMS_RTX_QUEUE);
if (!q_vector)
goto err_out;
q_vector->adapter = adapter;
- q_vector->eitr = adapter->eitr_param;
+ if (q_vector->txr_count && !q_vector->rxr_count)
+ q_vector->eitr = adapter->tx_eitr_param;
+ else
+ q_vector->eitr = adapter->rx_eitr_param;
q_vector->v_idx = q_idx;
netif_napi_add(adapter->netdev, &q_vector->napi, (*poll), 64);
adapter->q_vector[q_idx] = q_vector;
hw->fc.disable_fc_autoneg = false;
/* enable itr by default in dynamic mode */
- adapter->itr_setting = 1;
- adapter->eitr_param = 20000;
+ adapter->rx_itr_setting = 1;
+ adapter->rx_eitr_param = 20000;
+ adapter->tx_itr_setting = 1;
+ adapter->tx_eitr_param = 10000;
/* set defaults for eitr in MegaBytes */
adapter->eitr_low = 10;
}
netxen_schedule_work(adapter, netxen_fw_poll_work, FW_POLL_DELAY);
+ return 0;
err_out_detach:
netxen_nic_detach(adapter);
netif_wake_queue(adapter->netdev);
- goto done;
+ clear_bit(__NX_RESETTING, &adapter->state);
} else {
+ clear_bit(__NX_RESETTING, &adapter->state);
if (!netxen_nic_reset_context(adapter)) {
adapter->netdev->trans_start = jiffies;
- goto done;
+ return;
}
/* context reset failed, fall through for fw reset */
request_reset:
adapter->need_fw_reset = 1;
-done:
- clear_bit(__NX_RESETTING, &adapter->state);
}
struct net_device_stats *netxen_nic_get_stats(struct net_device *netdev)
base = &virt[hw_info[i].offset & (req.Size-1)];
if ((readb(base+0) == hw_info[i].a0) &&
(readb(base+2) == hw_info[i].a1) &&
- (readb(base+4) == hw_info[i].a2))
- break;
- }
- if (i < NR_INFO) {
- for (j = 0; j < 6; j++)
- dev->dev_addr[j] = readb(base + (j<<1));
+ (readb(base+4) == hw_info[i].a2)) {
+ for (j = 0; j < 6; j++)
+ dev->dev_addr[j] = readb(base + (j<<1));
+ break;
+ }
}
iounmap(virt);
int count;
int cpu;
- if (unlikely(!alloc_cpumask_var(&core_mask, GFP_KERNEL))) {
+ if (unlikely(!zalloc_cpumask_var(&core_mask, GFP_KERNEL))) {
printk(KERN_WARNING
"sfc: RSS disabled due to allocation failure\n");
return 1;
}
- cpumask_clear(core_mask);
count = 0;
for_each_online_cpu(cpu) {
if (!cpumask_test_cpu(cpu, core_mask)) {
if (ramsize > 0) {
u32 rxspace;
- hw->flags |= SKY2_HW_RAM_BUFFER;
pr_debug(PFX "%s: ram buffer %dK\n", dev->name, ramsize);
if (ramsize < 16)
rxspace = ramsize / 2;
++hw->ports;
}
+ if (sky2_read8(hw, B2_E_0))
+ hw->flags |= SKY2_HW_RAM_BUFFER;
+
return 0;
}
.ndo_open = vnet_open,
.ndo_stop = vnet_close,
.ndo_set_multicast_list = vnet_set_rx_mode,
- .ndo_change_mtu = eth_change_mtu,
.ndo_set_mac_address = vnet_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_tx_timeout = vnet_tx_timeout,
char *name;
unsigned long flags = 0;
- err = -EINVAL;
-
if (!capable(CAP_NET_ADMIN))
return -EPERM;
err = security_tun_dev_create();
flags |= TUN_TAP_DEV;
name = "tap%d";
} else
- goto failed;
+ return -EINVAL;
if (*ifr->ifr_name)
name = ifr->ifr_name;
int timeout)
{
struct usb_ctrlrequest *dr;
+ int retval;
dbg("kaweth_control()");
return -ENOMEM;
}
- dr->bRequestType= requesttype;
+ dr->bRequestType = requesttype;
dr->bRequest = request;
dr->wValue = cpu_to_le16(value);
dr->wIndex = cpu_to_le16(index);
dr->wLength = cpu_to_le16(size);
- return kaweth_internal_control_msg(kaweth->dev,
- pipe,
- dr,
- data,
- size,
- timeout);
+ retval = kaweth_internal_control_msg(kaweth->dev,
+ pipe,
+ dr,
+ data,
+ size,
+ timeout);
+
+ kfree(dr);
+ return retval;
}
/****************************************************************
.rx_fixup = smsc95xx_rx_fixup,
.tx_fixup = smsc95xx_tx_fixup,
.status = smsc95xx_status,
- .flags = FLAG_ETHER,
+ .flags = FLAG_ETHER | FLAG_SEND_ZLP,
};
static const struct usb_device_id products[] = {
USB_DEVICE(0x0424, 0x9500),
.driver_info = (unsigned long) &smsc95xx_info,
},
+ {
+ /* SMSC9505 USB Ethernet Device */
+ USB_DEVICE(0x0424, 0x9505),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9500A USB Ethernet Device */
+ USB_DEVICE(0x0424, 0x9E00),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9505A USB Ethernet Device */
+ USB_DEVICE(0x0424, 0x9E01),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
{
/* SMSC9512/9514 USB Hub & Ethernet Device */
USB_DEVICE(0x0424, 0xec00),
.driver_info = (unsigned long) &smsc95xx_info,
},
+ {
+ /* SMSC9500 USB Ethernet Device (SAL10) */
+ USB_DEVICE(0x0424, 0x9900),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9505 USB Ethernet Device (SAL10) */
+ USB_DEVICE(0x0424, 0x9901),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9500A USB Ethernet Device (SAL10) */
+ USB_DEVICE(0x0424, 0x9902),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9505A USB Ethernet Device (SAL10) */
+ USB_DEVICE(0x0424, 0x9903),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9512/9514 USB Hub & Ethernet Device (SAL10) */
+ USB_DEVICE(0x0424, 0x9904),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9500A USB Ethernet Device (HAL) */
+ USB_DEVICE(0x0424, 0x9905),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9505A USB Ethernet Device (HAL) */
+ USB_DEVICE(0x0424, 0x9906),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9500 USB Ethernet Device (Alternate ID) */
+ USB_DEVICE(0x0424, 0x9907),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9500A USB Ethernet Device (Alternate ID) */
+ USB_DEVICE(0x0424, 0x9908),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
+ {
+ /* SMSC9512/9514 USB Hub & Ethernet Device (Alternate ID) */
+ USB_DEVICE(0x0424, 0x9909),
+ .driver_info = (unsigned long) &smsc95xx_info,
+ },
{ }, /* END */
};
MODULE_DEVICE_TABLE(usb, products);
* NOTE: strictly conforming cdc-ether devices should expect
* the ZLP here, but ignore the one-byte packet.
*/
- if ((length % dev->maxpacket) == 0) {
+ if (!(info->flags & FLAG_SEND_ZLP) && (length % dev->maxpacket) == 0) {
urb->transfer_buffer_length++;
if (skb_tailroom(skb)) {
skb->data[skb->len] = 0;
static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0";
-static int arlan_sysctl_info(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int i;
*lenp = pos;
if (!write)
- retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ retv = proc_dostring(ctl, write, buffer, lenp, ppos);
else
{
*lenp = 0;
}
-static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info161719(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int i;
final:
*lenp = pos;
- retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ retv = proc_dostring(ctl, write, buffer, lenp, ppos);
return retv;
}
-static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_infotxRing(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int i;
SARLBNpln(u_char, txBuffer, 0x800);
final:
*lenp = pos;
- retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ retv = proc_dostring(ctl, write, buffer, lenp, ppos);
return retv;
}
-static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_inforxRing(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int i;
SARLBNpln(u_char, rxBuffer, 0x800);
final:
*lenp = pos;
- retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ retv = proc_dostring(ctl, write, buffer, lenp, ppos);
return retv;
}
-static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info18(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int i;
final:
*lenp = pos;
- retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ retv = proc_dostring(ctl, write, buffer, lenp, ppos);
return retv;
}
static char conf_reset_result[200];
-static int arlan_configure(ctl_table * ctl, int write, struct file *filp,
+static int arlan_configure(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int pos = 0;
return -1;
*lenp = pos;
- return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ return proc_dostring(ctl, write, buffer, lenp, ppos);
}
-static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_reset(ctl_table * ctl, int write,
void __user *buffer, size_t * lenp, loff_t *ppos)
{
int pos = 0;
} else
return -1;
*lenp = pos + 3;
- return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+ return proc_dostring(ctl, write, buffer, lenp, ppos);
}
{ USB_DEVICE(0x0cf3, 0x9170) },
/* Atheros TG121N */
{ USB_DEVICE(0x0cf3, 0x1001) },
+ /* TP-Link TL-WN821N v2 */
+ { USB_DEVICE(0x0cf3, 0x1002) },
/* Cace Airpcap NX */
{ USB_DEVICE(0xcace, 0x0300) },
/* D-Link DWA 160A */
AR_PHY_CH1_EXT_CCA,
AR_PHY_CH2_EXT_CCA
};
- u8 chainmask;
+ u8 chainmask, rx_chain_status;
+ rx_chain_status = REG_READ(ah, AR_PHY_RX_CHAINMASK);
if (AR_SREV_9285(ah))
chainmask = 0x9;
- else if (AR_SREV_9280(ah) || AR_SREV_9287(ah))
- chainmask = 0x1B;
- else
- chainmask = 0x3F;
+ else if (AR_SREV_9280(ah) || AR_SREV_9287(ah)) {
+ if ((rx_chain_status & 0x2) || (rx_chain_status & 0x4))
+ chainmask = 0x1B;
+ else
+ chainmask = 0x09;
+ } else {
+ if (rx_chain_status & 0x4)
+ chainmask = 0x3F;
+ else if (rx_chain_status & 0x2)
+ chainmask = 0x1B;
+ else
+ chainmask = 0x09;
+ }
h = ah->nfCalHist;
noise_floor = AR_PHY_CCA_MAX_AR9280_GOOD_VALUE;
else if (AR_SREV_9285(ah))
noise_floor = AR_PHY_CCA_MAX_AR9285_GOOD_VALUE;
+ else if (AR_SREV_9287(ah))
+ noise_floor = AR_PHY_CCA_MAX_AR9287_GOOD_VALUE;
else
noise_floor = AR_PHY_CCA_MAX_AR5416_GOOD_VALUE;
regVal |= (1 << (19 + i));
REG_WRITE(ah, 0x7834, regVal);
udelay(1);
+ regVal = REG_READ(ah, 0x7834);
regVal &= (~(0x1 << (19 + i)));
reg_field = MS(REG_READ(ah, 0x7840), AR9285_AN_RXTXBB1_SPARE9);
regVal |= (reg_field << (19 + i));
#define AR_PHY_CCA_MAX_AR5416_GOOD_VALUE -85
#define AR_PHY_CCA_MAX_AR9280_GOOD_VALUE -112
#define AR_PHY_CCA_MAX_AR9285_GOOD_VALUE -118
+#define AR_PHY_CCA_MAX_AR9287_GOOD_VALUE -118
#define AR_PHY_CCA_MAX_HIGH_VALUE -62
#define AR_PHY_CCA_MIN_BAD_VALUE -140
#define AR_PHY_CCA_FILTERWINDOW_LENGTH_INIT 3
REG_RMW_FIELD(ah, AR_AN_TOP1, AR_AN_TOP1_DACIPMODE,
eep->baseEepHeader.dacLpMode);
+ udelay(100);
+
REG_RMW_FIELD(ah, AR_PHY_FRAME_CTL, AR_PHY_FRAME_CTL_TX_CLIP,
pModal->miscBits >> 2);
u16 powerLimit)
{
#define REDUCE_SCALED_POWER_BY_TWO_CHAIN 6 /* 10*log10(2)*2 */
-#define REDUCE_SCALED_POWER_BY_THREE_CHAIN 10 /* 10*log10(3)*2 */
+#define REDUCE_SCALED_POWER_BY_THREE_CHAIN 9 /* 10*log10(3)*2 */
struct ath_regulatory *regulatory = ath9k_hw_regulatory(ah);
struct ar5416_eeprom_def *pEepData = &ah->eeprom.def;
static void ath9k_hw_init_mode_gain_regs(struct ath_hw *ah)
{
- if (AR_SREV_9287_11(ah))
+ if (AR_SREV_9287_11_OR_LATER(ah))
INIT_INI_ARRAY(&ah->iniModesRxGain,
ar9287Modes_rx_gain_9287_1_1,
ARRAY_SIZE(ar9287Modes_rx_gain_9287_1_1), 6);
else if (AR_SREV_9280_20(ah))
ath9k_hw_init_rxgain_ini(ah);
- if (AR_SREV_9287_11(ah)) {
+ if (AR_SREV_9287_11_OR_LATER(ah)) {
INIT_INI_ARRAY(&ah->iniModesTxGain,
ar9287Modes_tx_gain_9287_1_1,
ARRAY_SIZE(ar9287Modes_tx_gain_9287_1_1), 6);
ath9k_hw_init_mode_regs(ah);
if (ah->is_pciexpress)
- ath9k_hw_configpcipowersave(ah, 0);
+ ath9k_hw_configpcipowersave(ah, 0, 0);
else
ath9k_hw_disablepcie(ah);
*/
REG_SET_BIT(ah, AR_DIAG_SW, (AR_DIAG_RX_DIS | AR_DIAG_RX_ABORT));
+ if (AR_SREV_9280_10_OR_LATER(ah)) {
+ val = REG_READ(ah, AR_PCU_MISC_MODE2) &
+ (~AR_PCU_MISC_MODE2_HWWAR1);
+
+ if (AR_SREV_9287_10_OR_LATER(ah))
+ val = val & (~AR_PCU_MISC_MODE2_HWWAR2);
+
+ REG_WRITE(ah, AR_PCU_MISC_MODE2, val);
+ }
if (!AR_SREV_5416_20_OR_LATER(ah) ||
AR_SREV_9280_10_OR_LATER(ah))
static bool ath9k_hw_chip_reset(struct ath_hw *ah,
struct ath9k_channel *chan)
{
- if (OLC_FOR_AR9280_20_LATER) {
+ if (AR_SREV_9280(ah) && ah->eep_ops->get_eeprom(ah, EEP_OL_PWRCTRL)) {
if (!ath9k_hw_set_reset_reg(ah, ATH9K_RESET_POWER_ON))
return false;
} else if (!ath9k_hw_set_reset_reg(ah, ATH9K_RESET_WARM))
struct ath9k_channel *curchan = ah->curchan;
u32 saveDefAntenna;
u32 macStaId1;
+ u64 tsf = 0;
int i, rx_chainmask, r;
ah->extprotspacing = sc->ht_extprotspacing;
if (!ath9k_hw_setpower(ah, ATH9K_PM_AWAKE))
return -EIO;
- if (curchan)
+ if (curchan && !ah->chip_fullsleep)
ath9k_hw_getnf(ah, curchan);
if (bChannelChange &&
(chan->channel != ah->curchan->channel) &&
((chan->channelFlags & CHANNEL_ALL) ==
(ah->curchan->channelFlags & CHANNEL_ALL)) &&
- (!AR_SREV_9280(ah) || (!IS_CHAN_A_5MHZ_SPACED(chan) &&
- !IS_CHAN_A_5MHZ_SPACED(ah->curchan)))) {
+ !(AR_SREV_9280(ah) || IS_CHAN_A_5MHZ_SPACED(chan) ||
+ IS_CHAN_A_5MHZ_SPACED(ah->curchan))) {
if (ath9k_hw_channel_change(ah, chan, sc->tx_chan_width)) {
ath9k_hw_loadnf(ah, ah->curchan);
macStaId1 = REG_READ(ah, AR_STA_ID1) & AR_STA_ID1_BASE_RATE_11B;
+ /* For chips on which RTC reset is done, save TSF before it gets cleared */
+ if (AR_SREV_9280(ah) && ah->eep_ops->get_eeprom(ah, EEP_OL_PWRCTRL))
+ tsf = ath9k_hw_gettsf64(ah);
+
saveLedState = REG_READ(ah, AR_CFG_LED) &
(AR_CFG_LED_ASSOC_CTL | AR_CFG_LED_MODE_SEL |
AR_CFG_LED_BLINK_THRESH_SEL | AR_CFG_LED_BLINK_SLOW);
udelay(50);
}
+ /* Restore TSF */
+ if (tsf && AR_SREV_9280(ah) && ah->eep_ops->get_eeprom(ah, EEP_OL_PWRCTRL))
+ ath9k_hw_settsf64(ah, tsf);
+
if (AR_SREV_9280_10_OR_LATER(ah))
REG_SET_BIT(ah, AR_GPIO_INPUT_EN_VAL, AR_GPIO_JTAG_DISABLE);
* Programming the SerDes must go through the same 288 bit serial shift
* register as the other analog registers. Hence the 9 writes.
*/
-void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore)
+void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore, int power_off)
{
u8 i;
+ u32 val;
if (ah->is_pciexpress != true)
return;
return;
/* Nothing to do on restore for 11N */
- if (restore)
- return;
+ if (!restore) {
+ if (AR_SREV_9280_20_OR_LATER(ah)) {
+ /*
+ * AR9280 2.0 or later chips use SerDes values from the
+ * initvals.h initialized depending on chipset during
+ * ath9k_hw_init()
+ */
+ for (i = 0; i < ah->iniPcieSerdes.ia_rows; i++) {
+ REG_WRITE(ah, INI_RA(&ah->iniPcieSerdes, i, 0),
+ INI_RA(&ah->iniPcieSerdes, i, 1));
+ }
+ } else if (AR_SREV_9280(ah) &&
+ (ah->hw_version.macRev == AR_SREV_REVISION_9280_10)) {
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fd00);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924);
+
+ /* RX shut off when elecidle is asserted */
+ REG_WRITE(ah, AR_PCIE_SERDES, 0xa8000019);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x13160820);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980560);
+
+ /* Shut off CLKREQ active in L1 */
+ if (ah->config.pcie_clock_req)
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffc);
+ else
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffd);
- if (AR_SREV_9280_20_OR_LATER(ah)) {
- /*
- * AR9280 2.0 or later chips use SerDes values from the
- * initvals.h initialized depending on chipset during
- * ath9k_hw_init()
- */
- for (i = 0; i < ah->iniPcieSerdes.ia_rows; i++) {
- REG_WRITE(ah, INI_RA(&ah->iniPcieSerdes, i, 0),
- INI_RA(&ah->iniPcieSerdes, i, 1));
- }
- } else if (AR_SREV_9280(ah) &&
- (ah->hw_version.macRev == AR_SREV_REVISION_9280_10)) {
- REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fd00);
- REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x00043007);
- /* RX shut off when elecidle is asserted */
- REG_WRITE(ah, AR_PCIE_SERDES, 0xa8000019);
- REG_WRITE(ah, AR_PCIE_SERDES, 0x13160820);
- REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980560);
+ /* Load the new settings */
+ REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
- /* Shut off CLKREQ active in L1 */
- if (ah->config.pcie_clock_req)
- REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffc);
- else
- REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffd);
-
- REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40);
- REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554);
- REG_WRITE(ah, AR_PCIE_SERDES, 0x00043007);
+ } else {
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fc00);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924);
- /* Load the new settings */
- REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
+ /* RX shut off when elecidle is asserted */
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x28000039);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x53160824);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980579);
- } else {
- REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fc00);
- REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924);
+ /*
+ * Ignore ah->ah_config.pcie_clock_req setting for
+ * pre-AR9280 11n
+ */
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x001defff);
- /* RX shut off when elecidle is asserted */
- REG_WRITE(ah, AR_PCIE_SERDES, 0x28000039);
- REG_WRITE(ah, AR_PCIE_SERDES, 0x53160824);
- REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980579);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554);
+ REG_WRITE(ah, AR_PCIE_SERDES, 0x000e3007);
- /*
- * Ignore ah->ah_config.pcie_clock_req setting for
- * pre-AR9280 11n
- */
- REG_WRITE(ah, AR_PCIE_SERDES, 0x001defff);
+ /* Load the new settings */
+ REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
+ }
- REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40);
- REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554);
- REG_WRITE(ah, AR_PCIE_SERDES, 0x000e3007);
+ udelay(1000);
- /* Load the new settings */
- REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
- }
+ /* set bit 19 to allow forcing of pcie core into L1 state */
+ REG_SET_BIT(ah, AR_PCIE_PM_CTRL, AR_PCIE_PM_CTRL_ENA);
- udelay(1000);
+ /* Several PCIe massages to ensure proper behaviour */
+ if (ah->config.pcie_waen) {
+ val = ah->config.pcie_waen;
+ if (!power_off)
+ val &= (~AR_WA_D3_L1_DISABLE);
+ } else {
+ if (AR_SREV_9285(ah) || AR_SREV_9271(ah) ||
+ AR_SREV_9287(ah)) {
+ val = AR9285_WA_DEFAULT;
+ if (!power_off)
+ val &= (~AR_WA_D3_L1_DISABLE);
+ } else if (AR_SREV_9280(ah)) {
+ /*
+ * On AR9280 chips bit 22 of 0x4004 needs to be
+ * set otherwise card may disappear.
+ */
+ val = AR9280_WA_DEFAULT;
+ if (!power_off)
+ val &= (~AR_WA_D3_L1_DISABLE);
+ } else
+ val = AR_WA_DEFAULT;
+ }
- /* set bit 19 to allow forcing of pcie core into L1 state */
- REG_SET_BIT(ah, AR_PCIE_PM_CTRL, AR_PCIE_PM_CTRL_ENA);
+ REG_WRITE(ah, AR_WA, val);
+ }
- /* Several PCIe massages to ensure proper behaviour */
- if (ah->config.pcie_waen) {
- REG_WRITE(ah, AR_WA, ah->config.pcie_waen);
- } else {
- if (AR_SREV_9285(ah) || AR_SREV_9271(ah) || AR_SREV_9287(ah))
- REG_WRITE(ah, AR_WA, AR9285_WA_DEFAULT);
+ if (power_off) {
/*
- * On AR9280 chips bit 22 of 0x4004 needs to be set to
- * otherwise card may disappear.
+ * Set PCIe workaround bits
+ * bit 14 in WA register (disable L1) should only
+ * be set when device enters D3 and be cleared
+ * when device comes back to D0.
*/
- else if (AR_SREV_9280(ah))
- REG_WRITE(ah, AR_WA, AR9280_WA_DEFAULT);
- else
- REG_WRITE(ah, AR_WA, AR_WA_DEFAULT);
+ if (ah->config.pcie_waen) {
+ if (ah->config.pcie_waen & AR_WA_D3_L1_DISABLE)
+ REG_SET_BIT(ah, AR_WA, AR_WA_D3_L1_DISABLE);
+ } else {
+ if (((AR_SREV_9285(ah) || AR_SREV_9271(ah) ||
+ AR_SREV_9287(ah)) &&
+ (AR9285_WA_DEFAULT & AR_WA_D3_L1_DISABLE)) ||
+ (AR_SREV_9280(ah) &&
+ (AR9280_WA_DEFAULT & AR_WA_D3_L1_DISABLE))) {
+ REG_SET_BIT(ah, AR_WA, AR_WA_D3_L1_DISABLE);
+ }
+ }
}
}
}
#endif
- if ((ah->hw_version.macVersion == AR_SREV_VERSION_5416_PCI) ||
- (ah->hw_version.macVersion == AR_SREV_VERSION_5416_PCIE) ||
- (ah->hw_version.macVersion == AR_SREV_VERSION_9160) ||
- (ah->hw_version.macVersion == AR_SREV_VERSION_9100) ||
- (ah->hw_version.macVersion == AR_SREV_VERSION_9280) ||
- (ah->hw_version.macVersion == AR_SREV_VERSION_9285))
- pCap->hw_caps &= ~ATH9K_HW_CAP_AUTOSLEEP;
- else
- pCap->hw_caps |= ATH9K_HW_CAP_AUTOSLEEP;
+ pCap->hw_caps &= ~ATH9K_HW_CAP_AUTOSLEEP;
if (AR_SREV_9280(ah) || AR_SREV_9285(ah))
pCap->hw_caps &= ~ATH9K_HW_CAP_4KB_SPLITTRANS;
#define AH_TSF_WRITE_TIMEOUT 100 /* (us) */
#define AH_TIME_QUANTUM 10
#define AR_KEYTABLE_SIZE 128
-#define POWER_UP_TIME 200000
+#define POWER_UP_TIME 10000
#define SPUR_RSSI_THRESH 40
#define CAB_TIMEOUT_VAL 10
const struct ath9k_beacon_state *bs);
bool ath9k_hw_setpower(struct ath_hw *ah,
enum ath9k_power_mode mode);
-void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore);
+void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore, int power_off);
/* Interrupt Handling */
bool ath9k_hw_intrpend(struct ath_hw *ah);
int r;
ath9k_ps_wakeup(sc);
- ath9k_hw_configpcipowersave(ah, 0);
+ ath9k_hw_configpcipowersave(ah, 0, 0);
if (!ah->curchan)
ah->curchan = ath_get_curchannel(sc, sc->hw);
spin_unlock_bh(&sc->sc_resetlock);
ath9k_hw_phy_disable(ah);
- ath9k_hw_configpcipowersave(ah, 1);
+ ath9k_hw_configpcipowersave(ah, 1, 1);
ath9k_ps_restore(sc);
ath9k_hw_setpower(ah, ATH9K_PM_FULL_SLEEP);
}
bool blocked = !!ath_is_rfkill_set(sc);
wiphy_rfkill_set_hw_state(hw->wiphy, blocked);
-
- if (blocked)
- ath_radio_disable(sc);
- else
- ath_radio_enable(sc);
}
static void ath_start_rfkill_poll(struct ath_softc *sc)
DPRINTF(sc, ATH_DBG_CONFIG, "Detach ATH hw\n");
ath_deinit_leds(sc);
+ wiphy_rfkill_stop_polling(sc->hw->wiphy);
for (i = 0; i < sc->num_sec_wiphy; i++) {
struct ath_wiphy *aphy = sc->sec_wiphy[i];
init_channel = ath_get_curchannel(sc, hw);
/* Reset SERDES registers */
- ath9k_hw_configpcipowersave(sc->sc_ah, 0);
+ ath9k_hw_configpcipowersave(sc->sc_ah, 0, 0);
/*
* The basic interface to setting the hardware in a good
} else
sc->rx.rxlink = NULL;
- wiphy_rfkill_stop_polling(sc->hw->wiphy);
-
/* disable HAL and put h/w to sleep */
ath9k_hw_disable(sc->sc_ah);
- ath9k_hw_configpcipowersave(sc->sc_ah, 1);
+ ath9k_hw_configpcipowersave(sc->sc_ah, 1, 1);
ath9k_hw_setpower(sc->sc_ah, ATH9K_PM_FULL_SLEEP);
sc->sc_flags |= SC_OP_INVALID;
#define AR_RC_HOSTIF 0x00000100
#define AR_WA 0x4004
+#define AR_WA_D3_L1_DISABLE (1 << 14)
#define AR9285_WA_DEFAULT 0x004a05cb
-#define AR9280_WA_DEFAULT 0x0040073f
+#define AR9280_WA_DEFAULT 0x0040073b
#define AR_WA_DEFAULT 0x0000073f
If unsure, say N.
+config B43_SDIO
+ bool "Broadcom 43xx SDIO device support (EXPERIMENTAL)"
+ depends on B43 && SSB_SDIOHOST_POSSIBLE && EXPERIMENTAL
+ select SSB_SDIOHOST
+ ---help---
+ Broadcom 43xx device support for Soft-MAC SDIO devices.
+
+ With this config option you can drive Soft-MAC b43 cards with a
+ Secure Digital I/O interface.
+ This includes the WLAN daughter card found on the Nintendo Wii
+ video game console.
+ Note that this does not support Broadcom 43xx Full-MAC devices.
+
+ It's safe to select Y here, even if you don't have a B43 SDIO device.
+
+ If unsure, say N.
+
# Data transfers to the device via PIO
-# This is only needed on PCMCIA devices. All others can do DMA properly.
+# This is only needed on PCMCIA and SDIO devices. All others can do DMA properly.
config B43_PIO
bool
- depends on B43 && (B43_PCMCIA || B43_FORCE_PIO)
+ depends on B43 && (B43_SDIO || B43_PCMCIA || B43_FORCE_PIO)
select SSB_BLOCKIO
default y
b43-y += rfkill.o
b43-$(CONFIG_B43_LEDS) += leds.o
b43-$(CONFIG_B43_PCMCIA) += pcmcia.o
+b43-$(CONFIG_B43_SDIO) += sdio.o
b43-$(CONFIG_B43_DEBUG) += debugfs.o
obj-$(CONFIG_B43) += b43.o
* from the mac80211 subsystem. */
u16 mac80211_initially_registered_queues;
- /* R/W lock for data transmission.
- * Transmissions on 2+ queues can run concurrently, but somebody else
- * might sync with TX by write_lock_irqsave()'ing. */
- rwlock_t tx_lock;
- /* Lock for LEDs access. */
- spinlock_t leds_lock;
-
/* We can only have one operating interface (802.11 core)
* at a time. General information about this interface follows.
*/
struct work_struct tx_work;
/* Queue of packets to be transmitted. */
struct sk_buff_head tx_queue;
+
+ /* The device LEDs. */
+ struct b43_leds leds;
};
/* The type of the firmware file. */
/* The device initialization status.
* Use b43_status() to query. */
atomic_t __init_status;
- /* Saved init status for handling suspend. */
- int suspend_init_status;
bool bad_frames_preempt; /* Use "Bad Frames Preemption" (default off) */
bool dfq_valid; /* Directed frame queue valid (IBSS PS mode, ATIM) */
bool radio_hw_enable; /* saved state of radio hardware enabled state */
- bool suspend_in_progress; /* TRUE, if we are in a suspend/resume cycle */
bool qos_enabled; /* TRUE, if QoS is used. */
bool hwcrypto_enabled; /* TRUE, if HW crypto acceleration is enabled. */
/* Various statistics about the physical device. */
struct b43_stats stats;
- /* The device LEDs. */
- struct b43_led led_tx;
- struct b43_led led_rx;
- struct b43_led led_assoc;
- struct b43_led led_radio;
-
/* Reason code of the last interrupt. */
u32 irq_reason;
u32 dma_reason[6];
/* Debugging stuff follows. */
#ifdef CONFIG_B43_DEBUG
struct b43_dfsentry *dfsentry;
+ unsigned int irq_count;
+ unsigned int irq_bit_count[32];
+ unsigned int tx_count;
+ unsigned int rx_count;
#endif
};
add_dyn_dbg("debug_lo", B43_DBG_LO, 0);
add_dyn_dbg("debug_firmware", B43_DBG_FIRMWARE, 0);
add_dyn_dbg("debug_keys", B43_DBG_KEYS, 0);
+ add_dyn_dbg("debug_verbose_stats", B43_DBG_VERBOSESTATS, 0);
#undef add_dyn_dbg
}
B43_DBG_LO,
B43_DBG_FIRMWARE,
B43_DBG_KEYS,
+ B43_DBG_VERBOSESTATS,
__B43_NR_DYNDBG,
};
ring->nr_failed_tx_packets++;
ring->nr_total_packet_tries += status->frame_count;
#endif /* DEBUG */
- ieee80211_tx_status_irqsafe(dev->wl->hw, meta->skb);
+ ieee80211_tx_status(dev->wl->hw, meta->skb);
- /* skb is freed by ieee80211_tx_status_irqsafe() */
+ /* skb is freed by ieee80211_tx_status() */
meta->skb = NULL;
} else {
/* No need to call free_descriptor_buffer here, as
static void b43_led_turn_on(struct b43_wldev *dev, u8 led_index,
bool activelow)
{
- struct b43_wl *wl = dev->wl;
- unsigned long flags;
u16 ctl;
- spin_lock_irqsave(&wl->leds_lock, flags);
ctl = b43_read16(dev, B43_MMIO_GPIO_CONTROL);
if (activelow)
ctl &= ~(1 << led_index);
else
ctl |= (1 << led_index);
b43_write16(dev, B43_MMIO_GPIO_CONTROL, ctl);
- spin_unlock_irqrestore(&wl->leds_lock, flags);
}
static void b43_led_turn_off(struct b43_wldev *dev, u8 led_index,
bool activelow)
{
- struct b43_wl *wl = dev->wl;
- unsigned long flags;
u16 ctl;
- spin_lock_irqsave(&wl->leds_lock, flags);
ctl = b43_read16(dev, B43_MMIO_GPIO_CONTROL);
if (activelow)
ctl |= (1 << led_index);
else
ctl &= ~(1 << led_index);
b43_write16(dev, B43_MMIO_GPIO_CONTROL, ctl);
- spin_unlock_irqrestore(&wl->leds_lock, flags);
}
-/* Callback from the LED subsystem. */
-static void b43_led_brightness_set(struct led_classdev *led_dev,
- enum led_brightness brightness)
+static void b43_led_update(struct b43_wldev *dev,
+ struct b43_led *led)
{
- struct b43_led *led = container_of(led_dev, struct b43_led, led_dev);
- struct b43_wldev *dev = led->dev;
bool radio_enabled;
+ bool turn_on;
- if (unlikely(b43_status(dev) < B43_STAT_INITIALIZED))
+ if (!led->wl)
return;
- /* Checking the radio-enabled status here is slightly racy,
- * but we want to avoid the locking overhead and we don't care
- * whether the LED has the wrong state for a second. */
radio_enabled = (dev->phy.radio_on && dev->radio_hw_enable);
- if (brightness == LED_OFF || !radio_enabled)
- b43_led_turn_off(dev, led->index, led->activelow);
+ /* The led->state read is racy, but we don't care. In case we raced
+ * with the brightness_set handler, we will be called again soon
+ * to fixup our state. */
+ if (radio_enabled)
+ turn_on = atomic_read(&led->state) != LED_OFF;
else
+ turn_on = 0;
+ if (turn_on == led->hw_state)
+ return;
+ led->hw_state = turn_on;
+
+ if (turn_on)
b43_led_turn_on(dev, led->index, led->activelow);
+ else
+ b43_led_turn_off(dev, led->index, led->activelow);
+}
+
+static void b43_leds_work(struct work_struct *work)
+{
+ struct b43_leds *leds = container_of(work, struct b43_leds, work);
+ struct b43_wl *wl = container_of(leds, struct b43_wl, leds);
+ struct b43_wldev *dev;
+
+ mutex_lock(&wl->mutex);
+ dev = wl->current_dev;
+ if (unlikely(!dev || b43_status(dev) < B43_STAT_STARTED))
+ goto out_unlock;
+
+ b43_led_update(dev, &wl->leds.led_tx);
+ b43_led_update(dev, &wl->leds.led_rx);
+ b43_led_update(dev, &wl->leds.led_radio);
+ b43_led_update(dev, &wl->leds.led_assoc);
+
+out_unlock:
+ mutex_unlock(&wl->mutex);
+}
+
+/* Callback from the LED subsystem. */
+static void b43_led_brightness_set(struct led_classdev *led_dev,
+ enum led_brightness brightness)
+{
+ struct b43_led *led = container_of(led_dev, struct b43_led, led_dev);
+ struct b43_wl *wl = led->wl;
+
+ if (likely(!wl->leds.stop)) {
+ atomic_set(&led->state, brightness);
+ ieee80211_queue_work(wl->hw, &wl->leds.work);
+ }
}
static int b43_register_led(struct b43_wldev *dev, struct b43_led *led,
{
int err;
- b43_led_turn_off(dev, led_index, activelow);
- if (led->dev)
+ if (led->wl)
return -EEXIST;
if (!default_trigger)
return -EINVAL;
- led->dev = dev;
+ led->wl = dev->wl;
led->index = led_index;
led->activelow = activelow;
strncpy(led->name, name, sizeof(led->name));
+ atomic_set(&led->state, 0);
led->led_dev.name = led->name;
led->led_dev.default_trigger = default_trigger;
err = led_classdev_register(dev->dev->dev, &led->led_dev);
if (err) {
b43warn(dev->wl, "LEDs: Failed to register %s\n", name);
- led->dev = NULL;
+ led->wl = NULL;
return err;
}
+
return 0;
}
static void b43_unregister_led(struct b43_led *led)
{
- if (!led->dev)
+ if (!led->wl)
return;
led_classdev_unregister(&led->led_dev);
- b43_led_turn_off(led->dev, led->index, led->activelow);
- led->dev = NULL;
+ led->wl = NULL;
}
static void b43_map_led(struct b43_wldev *dev,
* generic LED triggers. */
switch (behaviour) {
case B43_LED_INACTIVE:
- break;
case B43_LED_OFF:
- b43_led_turn_off(dev, led_index, activelow);
- break;
case B43_LED_ON:
- b43_led_turn_on(dev, led_index, activelow);
break;
case B43_LED_ACTIVITY:
case B43_LED_TRANSFER:
case B43_LED_APTRANSFER:
snprintf(name, sizeof(name),
"b43-%s::tx", wiphy_name(hw->wiphy));
- b43_register_led(dev, &dev->led_tx, name,
+ b43_register_led(dev, &dev->wl->leds.led_tx, name,
ieee80211_get_tx_led_name(hw),
led_index, activelow);
snprintf(name, sizeof(name),
"b43-%s::rx", wiphy_name(hw->wiphy));
- b43_register_led(dev, &dev->led_rx, name,
+ b43_register_led(dev, &dev->wl->leds.led_rx, name,
ieee80211_get_rx_led_name(hw),
led_index, activelow);
break;
case B43_LED_MODE_BG:
snprintf(name, sizeof(name),
"b43-%s::radio", wiphy_name(hw->wiphy));
- b43_register_led(dev, &dev->led_radio, name,
+ b43_register_led(dev, &dev->wl->leds.led_radio, name,
ieee80211_get_radio_led_name(hw),
led_index, activelow);
- /* Sync the RF-kill LED state with radio and switch states. */
- if (dev->phy.radio_on && b43_is_hw_radio_enabled(dev))
- b43_led_turn_on(dev, led_index, activelow);
break;
case B43_LED_WEIRD:
case B43_LED_ASSOC:
snprintf(name, sizeof(name),
"b43-%s::assoc", wiphy_name(hw->wiphy));
- b43_register_led(dev, &dev->led_assoc, name,
+ b43_register_led(dev, &dev->wl->leds.led_assoc, name,
ieee80211_get_assoc_led_name(hw),
led_index, activelow);
break;
}
}
-void b43_leds_init(struct b43_wldev *dev)
+static void b43_led_get_sprominfo(struct b43_wldev *dev,
+ unsigned int led_index,
+ enum b43_led_behaviour *behaviour,
+ bool *activelow)
{
struct ssb_bus *bus = dev->dev->bus;
u8 sprom[4];
- int i;
- enum b43_led_behaviour behaviour;
- bool activelow;
sprom[0] = bus->sprom.gpio0;
sprom[1] = bus->sprom.gpio1;
sprom[2] = bus->sprom.gpio2;
sprom[3] = bus->sprom.gpio3;
- for (i = 0; i < 4; i++) {
- if (sprom[i] == 0xFF) {
- /* There is no LED information in the SPROM
- * for this LED. Hardcode it here. */
- activelow = 0;
- switch (i) {
- case 0:
- behaviour = B43_LED_ACTIVITY;
- activelow = 1;
- if (bus->boardinfo.vendor == PCI_VENDOR_ID_COMPAQ)
- behaviour = B43_LED_RADIO_ALL;
- break;
- case 1:
- behaviour = B43_LED_RADIO_B;
- if (bus->boardinfo.vendor == PCI_VENDOR_ID_ASUSTEK)
- behaviour = B43_LED_ASSOC;
- break;
- case 2:
- behaviour = B43_LED_RADIO_A;
- break;
- case 3:
- behaviour = B43_LED_OFF;
- break;
- default:
- B43_WARN_ON(1);
- return;
- }
+ if (sprom[led_index] == 0xFF) {
+ /* There is no LED information in the SPROM
+ * for this LED. Hardcode it here. */
+ *activelow = 0;
+ switch (led_index) {
+ case 0:
+ *behaviour = B43_LED_ACTIVITY;
+ *activelow = 1;
+ if (bus->boardinfo.vendor == PCI_VENDOR_ID_COMPAQ)
+ *behaviour = B43_LED_RADIO_ALL;
+ break;
+ case 1:
+ *behaviour = B43_LED_RADIO_B;
+ if (bus->boardinfo.vendor == PCI_VENDOR_ID_ASUSTEK)
+ *behaviour = B43_LED_ASSOC;
+ break;
+ case 2:
+ *behaviour = B43_LED_RADIO_A;
+ break;
+ case 3:
+ *behaviour = B43_LED_OFF;
+ break;
+ default:
+ B43_WARN_ON(1);
+ return;
+ }
+ } else {
+ *behaviour = sprom[led_index] & B43_LED_BEHAVIOUR;
+ *activelow = !!(sprom[led_index] & B43_LED_ACTIVELOW);
+ }
+}
+
+void b43_leds_init(struct b43_wldev *dev)
+{
+ struct b43_led *led;
+ unsigned int i;
+ enum b43_led_behaviour behaviour;
+ bool activelow;
+
+ /* Sync the RF-kill LED state (if we have one) with radio and switch states. */
+ led = &dev->wl->leds.led_radio;
+ if (led->wl) {
+ if (dev->phy.radio_on && b43_is_hw_radio_enabled(dev)) {
+ b43_led_turn_on(dev, led->index, led->activelow);
+ led->hw_state = 1;
+ atomic_set(&led->state, 1);
} else {
- behaviour = sprom[i] & B43_LED_BEHAVIOUR;
- activelow = !!(sprom[i] & B43_LED_ACTIVELOW);
+ b43_led_turn_off(dev, led->index, led->activelow);
+ led->hw_state = 0;
+ atomic_set(&led->state, 0);
}
- b43_map_led(dev, i, behaviour, activelow);
}
+
+ /* Initialize TX/RX/ASSOC leds */
+ led = &dev->wl->leds.led_tx;
+ if (led->wl) {
+ b43_led_turn_off(dev, led->index, led->activelow);
+ led->hw_state = 0;
+ atomic_set(&led->state, 0);
+ }
+ led = &dev->wl->leds.led_rx;
+ if (led->wl) {
+ b43_led_turn_off(dev, led->index, led->activelow);
+ led->hw_state = 0;
+ atomic_set(&led->state, 0);
+ }
+ led = &dev->wl->leds.led_assoc;
+ if (led->wl) {
+ b43_led_turn_off(dev, led->index, led->activelow);
+ led->hw_state = 0;
+ atomic_set(&led->state, 0);
+ }
+
+ /* Initialize other LED states. */
+ for (i = 0; i < B43_MAX_NR_LEDS; i++) {
+ b43_led_get_sprominfo(dev, i, &behaviour, &activelow);
+ switch (behaviour) {
+ case B43_LED_OFF:
+ b43_led_turn_off(dev, i, activelow);
+ break;
+ case B43_LED_ON:
+ b43_led_turn_on(dev, i, activelow);
+ break;
+ default:
+ /* Leave others as-is. */
+ break;
+ }
+ }
+
+ dev->wl->leds.stop = 0;
}
void b43_leds_exit(struct b43_wldev *dev)
{
- b43_unregister_led(&dev->led_tx);
- b43_unregister_led(&dev->led_rx);
- b43_unregister_led(&dev->led_assoc);
- b43_unregister_led(&dev->led_radio);
+ struct b43_leds *leds = &dev->wl->leds;
+
+ b43_led_turn_off(dev, leds->led_tx.index, leds->led_tx.activelow);
+ b43_led_turn_off(dev, leds->led_rx.index, leds->led_rx.activelow);
+ b43_led_turn_off(dev, leds->led_assoc.index, leds->led_assoc.activelow);
+ b43_led_turn_off(dev, leds->led_radio.index, leds->led_radio.activelow);
+}
+
+void b43_leds_stop(struct b43_wldev *dev)
+{
+ struct b43_leds *leds = &dev->wl->leds;
+
+ leds->stop = 1;
+ cancel_work_sync(&leds->work);
+}
+
+void b43_leds_register(struct b43_wldev *dev)
+{
+ unsigned int i;
+ enum b43_led_behaviour behaviour;
+ bool activelow;
+
+ INIT_WORK(&dev->wl->leds.work, b43_leds_work);
+
+ /* Register the LEDs to the LED subsystem. */
+ for (i = 0; i < B43_MAX_NR_LEDS; i++) {
+ b43_led_get_sprominfo(dev, i, &behaviour, &activelow);
+ b43_map_led(dev, i, behaviour, activelow);
+ }
+}
+
+void b43_leds_unregister(struct b43_wldev *dev)
+{
+ struct b43_leds *leds = &dev->wl->leds;
+
+ b43_unregister_led(&leds->led_tx);
+ b43_unregister_led(&leds->led_rx);
+ b43_unregister_led(&leds->led_assoc);
+ b43_unregister_led(&leds->led_radio);
}
#include <linux/types.h>
#include <linux/leds.h>
+#include <linux/workqueue.h>
#define B43_LED_MAX_NAME_LEN 31
struct b43_led {
- struct b43_wldev *dev;
+ struct b43_wl *wl;
/* The LED class device */
struct led_classdev led_dev;
/* The index number of the LED. */
bool activelow;
/* The unique name string for this LED device. */
char name[B43_LED_MAX_NAME_LEN + 1];
+ /* The current status of the LED. This is updated locklessly. */
+ atomic_t state;
+ /* The active state in hardware. */
+ bool hw_state;
};
+struct b43_leds {
+ struct b43_led led_tx;
+ struct b43_led led_rx;
+ struct b43_led led_radio;
+ struct b43_led led_assoc;
+
+ bool stop;
+ struct work_struct work;
+};
+
+#define B43_MAX_NR_LEDS 4
+
#define B43_LED_BEHAVIOUR 0x7F
#define B43_LED_ACTIVELOW 0x80
/* LED behaviour values */
B43_LED_INACTIVE,
};
+void b43_leds_register(struct b43_wldev *dev);
+void b43_leds_unregister(struct b43_wldev *dev);
void b43_leds_init(struct b43_wldev *dev);
void b43_leds_exit(struct b43_wldev *dev);
+void b43_leds_stop(struct b43_wldev *dev);
#else /* CONFIG_B43_LEDS */
/* LED support disabled */
-struct b43_led {
+struct b43_leds {
/* empty */
};
+static inline void b43_leds_register(struct b43_wldev *dev)
+{
+}
+static inline void b43_leds_unregister(struct b43_wldev *dev)
+{
+}
static inline void b43_leds_init(struct b43_wldev *dev)
{
}
static inline void b43_leds_exit(struct b43_wldev *dev)
{
}
+static inline void b43_leds_stop(struct b43_wldev *dev)
+{
+}
#endif /* CONFIG_B43_LEDS */
#endif /* B43_LEDS_H_ */
Copyright (c) 2005 Danny van Dyk <kugelfang@gentoo.org>
Copyright (c) 2005 Andreas Jaggi <andreas.jaggi@waterwave.ch>
+ SDIO support
+ Copyright (c) 2009 Albert Herranz <albert_herranz@yahoo.es>
+
Some parts of the code in this file are derived from the ipw2200
driver Copyright(c) 2003 - 2004 Intel Corporation.
#include "xmit.h"
#include "lo.h"
#include "pcmcia.h"
+#include "sdio.h"
+#include <linux/mmc/sdio_func.h>
MODULE_DESCRIPTION("Broadcom B43 wireless driver");
MODULE_AUTHOR("Martin Langer");
mutex_lock(&wl->mutex);
dev = wl->current_dev;
if (likely(dev && (b43_status(dev) >= B43_STAT_INITIALIZED))) {
- if (0 /*FIXME dev->dev->bus->bustype == SSB_BUSTYPE_SDIO*/) {
+ if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) {
/* wl->mutex is enough. */
b43_do_beacon_update_trigger_work(dev);
mmiowb();
/* Re-enable interrupts on the device by restoring the current interrupt mask. */
b43_write32(dev, B43_MMIO_GEN_IRQ_MASK, dev->irq_mask);
+
+#if B43_DEBUG
+ if (b43_debug(dev, B43_DBG_VERBOSESTATS)) {
+ dev->irq_count++;
+ for (i = 0; i < ARRAY_SIZE(dev->irq_bit_count); i++) {
+ if (reason & (1 << i))
+ dev->irq_bit_count[i]++;
+ }
+ }
+#endif
}
/* Interrupt thread handler. Handles device interrupts in thread context. */
return ret;
}
+/* SDIO interrupt handler. This runs in process context. */
+static void b43_sdio_interrupt_handler(struct b43_wldev *dev)
+{
+ struct b43_wl *wl = dev->wl;
+ irqreturn_t ret;
+
+ mutex_lock(&wl->mutex);
+
+ ret = b43_do_interrupt(dev);
+ if (ret == IRQ_WAKE_THREAD)
+ b43_do_interrupt_thread(dev);
+
+ mutex_unlock(&wl->mutex);
+}
+
void b43_do_release_fw(struct b43_firmware_file *fw)
{
release_firmware(fw->data);
cfp_pretbtt = 50;
}
b43_write16(dev, 0x612, cfp_pretbtt);
+
+ /* FIXME: We don't currently implement the PMQ mechanism,
+ * so always disable it. If we want to implement PMQ,
+ * we need to enable it here (clear DISCPMQ) in AP mode.
+ */
+ if (0 /* ctl & B43_MACCTL_AP */) {
+ b43_write32(dev, B43_MMIO_MACCTL,
+ b43_read32(dev, B43_MMIO_MACCTL)
+ & ~B43_MACCTL_DISCPMQ);
+ } else {
+ b43_write32(dev, B43_MMIO_MACCTL,
+ b43_read32(dev, B43_MMIO_MACCTL)
+ | B43_MACCTL_DISCPMQ);
+ }
}
static void b43_rate_memory_write(struct b43_wldev *dev, u16 rate, int is_ofdm)
atomic_set(&phy->txerr_cnt, B43_PHY_TX_BADNESS_LIMIT);
wmb();
+
+#if B43_DEBUG
+ if (b43_debug(dev, B43_DBG_VERBOSESTATS)) {
+ unsigned int i;
+
+ b43dbg(dev->wl, "Stats: %7u IRQs/sec, %7u TX/sec, %7u RX/sec\n",
+ dev->irq_count / 15,
+ dev->tx_count / 15,
+ dev->rx_count / 15);
+ dev->irq_count = 0;
+ dev->tx_count = 0;
+ dev->rx_count = 0;
+ for (i = 0; i < ARRAY_SIZE(dev->irq_bit_count); i++) {
+ if (dev->irq_bit_count[i]) {
+ b43dbg(dev->wl, "Stats: %7u IRQ-%02u/sec (0x%08X)\n",
+ dev->irq_bit_count[i] / 15, i, (1 << i));
+ dev->irq_bit_count[i] = 0;
+ }
+ }
+ }
+#endif
}
static void do_periodic_work(struct b43_wldev *dev)
static int b43_rng_read(struct hwrng *rng, u32 *data)
{
struct b43_wl *wl = (struct b43_wl *)rng->priv;
+ struct b43_wldev *dev;
+ int count = -ENODEV;
- /* FIXME: We need to take wl->mutex here to make sure the device
- * is not going away from under our ass. However it could deadlock
- * with hwrng internal locking. */
-
- *data = b43_read16(wl->current_dev, B43_MMIO_RNG);
+ mutex_lock(&wl->mutex);
+ dev = wl->current_dev;
+ if (likely(dev && b43_status(dev) >= B43_STAT_INITIALIZED)) {
+ *data = b43_read16(dev, B43_MMIO_RNG);
+ count = sizeof(u16);
+ }
+ mutex_unlock(&wl->mutex);
- return (sizeof(u16));
+ return count;
}
#endif /* CONFIG_B43_HWRNG */
dev_kfree_skb(skb); /* Drop it */
}
+#if B43_DEBUG
+ dev->tx_count++;
+#endif
mutex_unlock(&wl->mutex);
}
/* Disable interrupts on the device. */
b43_set_status(dev, B43_STAT_INITIALIZED);
- if (0 /*FIXME dev->dev->bus->bustype == SSB_BUSTYPE_SDIO*/) {
+ if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) {
/* wl->mutex is locked. That is enough. */
b43_write32(dev, B43_MMIO_GEN_IRQ_MASK, 0);
b43_read32(dev, B43_MMIO_GEN_IRQ_MASK); /* Flush */
b43_read32(dev, B43_MMIO_GEN_IRQ_MASK); /* Flush */
spin_unlock_irq(&wl->hardirq_lock);
}
- /* Synchronize the interrupt handlers. Unlock to avoid deadlocks. */
+ /* Synchronize and free the interrupt handlers. Unlock to avoid deadlocks. */
orig_dev = dev;
mutex_unlock(&wl->mutex);
- synchronize_irq(dev->dev->irq);
+ if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) {
+ b43_sdio_free_irq(dev);
+ } else {
+ synchronize_irq(dev->dev->irq);
+ free_irq(dev->dev->irq, dev);
+ }
mutex_lock(&wl->mutex);
dev = wl->current_dev;
if (!dev)
dev_kfree_skb(skb_dequeue(&wl->tx_queue));
b43_mac_suspend(dev);
- free_irq(dev->dev->irq, dev);
+ b43_leds_exit(dev);
b43dbg(wl, "Wireless interface stopped\n");
return dev;
B43_WARN_ON(b43_status(dev) != B43_STAT_INITIALIZED);
drain_txstatus_queue(dev);
- err = request_threaded_irq(dev->dev->irq, b43_interrupt_handler,
- b43_interrupt_thread_handler,
- IRQF_SHARED, KBUILD_MODNAME, dev);
- if (err) {
- b43err(dev->wl, "Cannot request IRQ-%d\n", dev->dev->irq);
- goto out;
+ if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) {
+ err = b43_sdio_request_irq(dev, b43_sdio_interrupt_handler);
+ if (err) {
+ b43err(dev->wl, "Cannot request SDIO IRQ\n");
+ goto out;
+ }
+ } else {
+ err = request_threaded_irq(dev->dev->irq, b43_interrupt_handler,
+ b43_interrupt_thread_handler,
+ IRQF_SHARED, KBUILD_MODNAME, dev);
+ if (err) {
+ b43err(dev->wl, "Cannot request IRQ-%d\n", dev->dev->irq);
+ goto out;
+ }
}
/* We are ready to run. */
/* Start maintainance work */
b43_periodic_tasks_setup(dev);
+ b43_leds_init(dev);
+
b43dbg(dev->wl, "Wireless interface started\n");
- out:
+out:
return err;
}
macctl |= B43_MACCTL_PSM_JMP0;
b43_write32(dev, B43_MMIO_MACCTL, macctl);
- if (!dev->suspend_in_progress) {
- b43_leds_exit(dev);
- b43_rng_exit(dev->wl);
- }
b43_dma_free(dev);
b43_pio_free(dev);
b43_chip_exit(dev);
/* Initialize a wireless core */
static int b43_wireless_core_init(struct b43_wldev *dev)
{
- struct b43_wl *wl = dev->wl;
struct ssb_bus *bus = dev->dev->bus;
struct ssb_sprom *sprom = &bus->sprom;
struct b43_phy *phy = &dev->phy;
/* Maximum Contention Window */
b43_shm_write16(dev, B43_SHM_SCRATCH, B43_SHM_SC_MAXCONT, 0x3FF);
- if ((dev->dev->bus->bustype == SSB_BUSTYPE_PCMCIA) || B43_FORCE_PIO) {
+ if ((dev->dev->bus->bustype == SSB_BUSTYPE_PCMCIA) ||
+ (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) ||
+ B43_FORCE_PIO) {
dev->__using_pio_transfers = 1;
err = b43_pio_init(dev);
} else {
ssb_bus_powerup(bus, !(sprom->boardflags_lo & B43_BFL_XTAL_NOSLOW));
b43_upload_card_macaddress(dev);
b43_security_init(dev);
- if (!dev->suspend_in_progress)
- b43_rng_init(wl);
+
+ ieee80211_wake_queues(dev->wl->hw);
ieee80211_wake_queues(dev->wl->hw);
b43_set_status(dev, B43_STAT_INITIALIZED);
- if (!dev->suspend_in_progress)
- b43_leds_init(dev);
out:
return err;
/* Initialize struct b43_wl */
wl->hw = hw;
- spin_lock_init(&wl->leds_lock);
mutex_init(&wl->mutex);
spin_lock_init(&wl->hardirq_lock);
INIT_LIST_HEAD(&wl->devlist);
err = ieee80211_register_hw(wl->hw);
if (err)
goto err_one_core_detach;
+ b43_leds_register(wl->current_dev);
+ b43_rng_init(wl);
}
out:
* might have modified it. Restoring is important, so the networking
* stack can properly free resources. */
wl->hw->queues = wl->mac80211_initially_registered_queues;
+ b43_leds_stop(wldev);
ieee80211_unregister_hw(wl->hw);
}
b43_one_core_detach(dev);
if (list_empty(&wl->devlist)) {
+ b43_rng_exit(wl);
+ b43_leds_unregister(wldev);
/* Last core on the chip unregistered.
* We can destroy common struct b43_wl.
*/
ieee80211_queue_work(dev->wl->hw, &dev->restart_work);
}
-#ifdef CONFIG_PM
-
-static int b43_suspend(struct ssb_device *dev, pm_message_t state)
-{
- struct b43_wldev *wldev = ssb_get_drvdata(dev);
- struct b43_wl *wl = wldev->wl;
-
- b43dbg(wl, "Suspending...\n");
-
- mutex_lock(&wl->mutex);
- wldev->suspend_in_progress = true;
- wldev->suspend_init_status = b43_status(wldev);
- if (wldev->suspend_init_status >= B43_STAT_STARTED)
- wldev = b43_wireless_core_stop(wldev);
- if (wldev && wldev->suspend_init_status >= B43_STAT_INITIALIZED)
- b43_wireless_core_exit(wldev);
- mutex_unlock(&wl->mutex);
-
- b43dbg(wl, "Device suspended.\n");
-
- return 0;
-}
-
-static int b43_resume(struct ssb_device *dev)
-{
- struct b43_wldev *wldev = ssb_get_drvdata(dev);
- struct b43_wl *wl = wldev->wl;
- int err = 0;
-
- b43dbg(wl, "Resuming...\n");
-
- mutex_lock(&wl->mutex);
- if (wldev->suspend_init_status >= B43_STAT_INITIALIZED) {
- err = b43_wireless_core_init(wldev);
- if (err) {
- b43err(wl, "Resume failed at core init\n");
- goto out;
- }
- }
- if (wldev->suspend_init_status >= B43_STAT_STARTED) {
- err = b43_wireless_core_start(wldev);
- if (err) {
- b43_leds_exit(wldev);
- b43_rng_exit(wldev->wl);
- b43_wireless_core_exit(wldev);
- b43err(wl, "Resume failed at core start\n");
- goto out;
- }
- }
- b43dbg(wl, "Device resumed.\n");
- out:
- wldev->suspend_in_progress = false;
- mutex_unlock(&wl->mutex);
- return err;
-}
-
-#else /* CONFIG_PM */
-# define b43_suspend NULL
-# define b43_resume NULL
-#endif /* CONFIG_PM */
-
static struct ssb_driver b43_ssb_driver = {
.name = KBUILD_MODNAME,
.id_table = b43_ssb_tbl,
.probe = b43_probe,
.remove = b43_remove,
- .suspend = b43_suspend,
- .resume = b43_resume,
};
static void b43_print_driverinfo(void)
{
const char *feat_pci = "", *feat_pcmcia = "", *feat_nphy = "",
- *feat_leds = "";
+ *feat_leds = "", *feat_sdio = "";
#ifdef CONFIG_B43_PCI_AUTOSELECT
feat_pci = "P";
#endif
#ifdef CONFIG_B43_LEDS
feat_leds = "L";
+#endif
+#ifdef CONFIG_B43_SDIO
+ feat_sdio = "S";
#endif
printk(KERN_INFO "Broadcom 43xx driver loaded "
- "[ Features: %s%s%s%s, Firmware-ID: "
+ "[ Features: %s%s%s%s%s, Firmware-ID: "
B43_SUPPORTED_FIRMWARE_ID " ]\n",
feat_pci, feat_pcmcia, feat_nphy,
- feat_leds);
+ feat_leds, feat_sdio);
}
static int __init b43_init(void)
err = b43_pcmcia_init();
if (err)
goto err_dfs_exit;
- err = ssb_driver_register(&b43_ssb_driver);
+ err = b43_sdio_init();
if (err)
goto err_pcmcia_exit;
+ err = ssb_driver_register(&b43_ssb_driver);
+ if (err)
+ goto err_sdio_exit;
b43_print_driverinfo();
return err;
+err_sdio_exit:
+ b43_sdio_exit();
err_pcmcia_exit:
b43_pcmcia_exit();
err_dfs_exit:
static void __exit b43_exit(void)
{
ssb_driver_unregister(&b43_ssb_driver);
+ b43_sdio_exit();
b43_pcmcia_exit();
b43_debugfs_exit();
}
return B43_TXPWR_RES_DONE;
}
+void b43_lpphy_op_switch_analog(struct b43_wldev *dev, bool on)
+{
+ if (on) {
+ b43_phy_mask(dev, B43_LPPHY_AFE_CTL_OVR, 0xfff8);
+ } else {
+ b43_phy_set(dev, B43_LPPHY_AFE_CTL_OVRVAL, 0x0007);
+ b43_phy_set(dev, B43_LPPHY_AFE_CTL_OVR, 0x0007);
+ }
+}
+
const struct b43_phy_operations b43_phyops_lp = {
.allocate = b43_lpphy_op_allocate,
.free = b43_lpphy_op_free,
.radio_read = b43_lpphy_op_radio_read,
.radio_write = b43_lpphy_op_radio_write,
.software_rfkill = b43_lpphy_op_software_rfkill,
- .switch_analog = b43_phyop_switch_analog_generic,
+ .switch_analog = b43_lpphy_op_switch_analog,
.switch_channel = b43_lpphy_op_switch_channel,
.get_default_chan = b43_lpphy_op_get_default_chan,
.set_rx_antenna = b43_lpphy_op_set_rx_antenna,
q->buffer_used -= total_len;
q->free_packet_slots += 1;
- ieee80211_tx_status_irqsafe(dev->wl->hw, pack->skb);
+ ieee80211_tx_status(dev->wl->hw, pack->skb);
pack->skb = NULL;
list_add(&pack->list, &q->packets_list);
/* Returns TRUE, if the radio is enabled in hardware. */
bool b43_is_hw_radio_enabled(struct b43_wldev *dev)
{
- if (dev->phy.rev >= 3) {
+ if (dev->phy.rev >= 3 || dev->phy.type == B43_PHYTYPE_LP) {
if (!(b43_read32(dev, B43_MMIO_RADIO_HWENABLED_HI)
& B43_MMIO_RADIO_HWENABLED_HI_MASK))
return 1;
--- /dev/null
+/*
+ * Broadcom B43 wireless driver
+ *
+ * SDIO over Sonics Silicon Backplane bus glue for b43.
+ *
+ * Copyright (C) 2009 Albert Herranz
+ * Copyright (C) 2009 Michael Buesch <mb@bu3sch.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/sdio_func.h>
+#include <linux/mmc/sdio_ids.h>
+#include <linux/ssb/ssb.h>
+
+#include "sdio.h"
+#include "b43.h"
+
+
+#define HNBU_CHIPID 0x01 /* vendor & device id */
+
+#define B43_SDIO_BLOCK_SIZE 64 /* rx fifo max size in bytes */
+
+
+static const struct b43_sdio_quirk {
+ u16 vendor;
+ u16 device;
+ unsigned int quirks;
+} b43_sdio_quirks[] = {
+ { 0x14E4, 0x4318, SSB_QUIRK_SDIO_READ_AFTER_WRITE32, },
+ { },
+};
+
+
+static unsigned int b43_sdio_get_quirks(u16 vendor, u16 device)
+{
+ const struct b43_sdio_quirk *q;
+
+ for (q = b43_sdio_quirks; q->quirks; q++) {
+ if (vendor == q->vendor && device == q->device)
+ return q->quirks;
+ }
+
+ return 0;
+}
+
+static void b43_sdio_interrupt_dispatcher(struct sdio_func *func)
+{
+ struct b43_sdio *sdio = sdio_get_drvdata(func);
+ struct b43_wldev *dev = sdio->irq_handler_opaque;
+
+ if (unlikely(b43_status(dev) < B43_STAT_STARTED))
+ return;
+
+ sdio_release_host(func);
+ sdio->irq_handler(dev);
+ sdio_claim_host(func);
+}
+
+int b43_sdio_request_irq(struct b43_wldev *dev,
+ void (*handler)(struct b43_wldev *dev))
+{
+ struct ssb_bus *bus = dev->dev->bus;
+ struct sdio_func *func = bus->host_sdio;
+ struct b43_sdio *sdio = sdio_get_drvdata(func);
+ int err;
+
+ sdio->irq_handler_opaque = dev;
+ sdio->irq_handler = handler;
+ sdio_claim_host(func);
+ err = sdio_claim_irq(func, b43_sdio_interrupt_dispatcher);
+ sdio_release_host(func);
+
+ return err;
+}
+
+void b43_sdio_free_irq(struct b43_wldev *dev)
+{
+ struct ssb_bus *bus = dev->dev->bus;
+ struct sdio_func *func = bus->host_sdio;
+ struct b43_sdio *sdio = sdio_get_drvdata(func);
+
+ sdio_claim_host(func);
+ sdio_release_irq(func);
+ sdio_release_host(func);
+ sdio->irq_handler_opaque = NULL;
+ sdio->irq_handler = NULL;
+}
+
+static int b43_sdio_probe(struct sdio_func *func,
+ const struct sdio_device_id *id)
+{
+ struct b43_sdio *sdio;
+ struct sdio_func_tuple *tuple;
+ u16 vendor = 0, device = 0;
+ int error;
+
+ /* Look for the card chip identifier. */
+ tuple = func->tuples;
+ while (tuple) {
+ switch (tuple->code) {
+ case 0x80:
+ switch (tuple->data[0]) {
+ case HNBU_CHIPID:
+ if (tuple->size != 5)
+ break;
+ vendor = tuple->data[1] | (tuple->data[2]<<8);
+ device = tuple->data[3] | (tuple->data[4]<<8);
+ dev_info(&func->dev, "Chip ID %04x:%04x\n",
+ vendor, device);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ tuple = tuple->next;
+ }
+ if (!vendor || !device) {
+ error = -ENODEV;
+ goto out;
+ }
+
+ sdio_claim_host(func);
+ error = sdio_set_block_size(func, B43_SDIO_BLOCK_SIZE);
+ if (error) {
+ dev_err(&func->dev, "failed to set block size to %u bytes,"
+ " error %d\n", B43_SDIO_BLOCK_SIZE, error);
+ goto err_release_host;
+ }
+ error = sdio_enable_func(func);
+ if (error) {
+ dev_err(&func->dev, "failed to enable func, error %d\n", error);
+ goto err_release_host;
+ }
+ sdio_release_host(func);
+
+ sdio = kzalloc(sizeof(*sdio), GFP_KERNEL);
+ if (!sdio) {
+ error = -ENOMEM;
+ dev_err(&func->dev, "failed to allocate ssb bus\n");
+ goto err_disable_func;
+ }
+ error = ssb_bus_sdiobus_register(&sdio->ssb, func,
+ b43_sdio_get_quirks(vendor, device));
+ if (error) {
+ dev_err(&func->dev, "failed to register ssb sdio bus,"
+ " error %d\n", error);
+ goto err_free_ssb;
+ }
+ sdio_set_drvdata(func, sdio);
+
+ return 0;
+
+err_free_ssb:
+ kfree(sdio);
+err_disable_func:
+ sdio_disable_func(func);
+err_release_host:
+ sdio_release_host(func);
+out:
+ return error;
+}
+
+static void b43_sdio_remove(struct sdio_func *func)
+{
+ struct b43_sdio *sdio = sdio_get_drvdata(func);
+
+ ssb_bus_unregister(&sdio->ssb);
+ sdio_disable_func(func);
+ kfree(sdio);
+ sdio_set_drvdata(func, NULL);
+}
+
+static const struct sdio_device_id b43_sdio_ids[] = {
+ { SDIO_DEVICE(0x02d0, 0x044b) }, /* Nintendo Wii WLAN daughter card */
+ { },
+};
+
+static struct sdio_driver b43_sdio_driver = {
+ .name = "b43-sdio",
+ .id_table = b43_sdio_ids,
+ .probe = b43_sdio_probe,
+ .remove = b43_sdio_remove,
+};
+
+int b43_sdio_init(void)
+{
+ return sdio_register_driver(&b43_sdio_driver);
+}
+
+void b43_sdio_exit(void)
+{
+ sdio_unregister_driver(&b43_sdio_driver);
+}
--- /dev/null
+#ifndef B43_SDIO_H_
+#define B43_SDIO_H_
+
+#include <linux/ssb/ssb.h>
+
+struct b43_wldev;
+
+
+#ifdef CONFIG_B43_SDIO
+
+struct b43_sdio {
+ struct ssb_bus ssb;
+ void *irq_handler_opaque;
+ void (*irq_handler)(struct b43_wldev *dev);
+};
+
+int b43_sdio_request_irq(struct b43_wldev *dev,
+ void (*handler)(struct b43_wldev *dev));
+void b43_sdio_free_irq(struct b43_wldev *dev);
+
+int b43_sdio_init(void);
+void b43_sdio_exit(void);
+
+
+#else /* CONFIG_B43_SDIO */
+
+
+int b43_sdio_request_irq(struct b43_wldev *dev,
+ void (*handler)(struct b43_wldev *dev))
+{
+ return -ENODEV;
+}
+void b43_sdio_free_irq(struct b43_wldev *dev)
+{
+}
+static inline int b43_sdio_init(void)
+{
+ return 0;
+}
+static inline void b43_sdio_exit(void)
+{
+}
+
+#endif /* CONFIG_B43_SDIO */
+#endif /* B43_SDIO_H_ */
}
memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
- ieee80211_rx_irqsafe(dev->wl->hw, skb);
+ ieee80211_rx(dev->wl->hw, skb);
+#if B43_DEBUG
+ dev->rx_count++;
+#endif
return;
drop:
b43dbg(dev->wl, "RX: Packet dropped\n");
agg->frame_count, txq_id, idx);
hdr = iwl_tx_queue_get_hdr(priv, txq_id, idx);
+ if (!hdr) {
+ IWL_ERR(priv,
+ "BUG_ON idx doesn't point to valid skb"
+ " idx=%d, txq_id=%d\n", idx, txq_id);
+ return -1;
+ }
sc = le16_to_cpu(hdr->seq_ctrl);
if (idx != (SEQ_TO_SN(sc) & 0xff)) {
agg->frame_count, txq_id, idx);
hdr = iwl_tx_queue_get_hdr(priv, txq_id, idx);
+ if (!hdr) {
+ IWL_ERR(priv,
+ "BUG_ON idx doesn't point to valid skb"
+ " idx=%d, txq_id=%d\n", idx, txq_id);
+ return -1;
+ }
sc = le16_to_cpu(hdr->seq_ctrl);
if (idx != (SEQ_TO_SN(sc) & 0xff)) {
}
spin_unlock_irqrestore(&rxq->lock, flags);
+ if (rxq->free_count > RX_LOW_WATERMARK)
+ priority |= __GFP_NOWARN;
/* Alloc a new receive buffer */
skb = alloc_skb(priv->hw_params.rx_buf_size + 256,
priority);
if (!skb) {
- IWL_CRIT(priv, "Can not allocate SKB buffers\n");
+ if (net_ratelimit())
+ IWL_DEBUG_INFO(priv, "Failed to allocate SKB buffer.\n");
+ if ((rxq->free_count <= RX_LOW_WATERMARK) &&
+ net_ratelimit())
+ IWL_CRIT(priv, "Failed to allocate SKB buffer with %s. Only %u free buffers remaining.\n",
+ priority == GFP_ATOMIC ? "GFP_ATOMIC" : "GFP_KERNEL",
+ rxq->free_count);
/* We don't reschedule replenish work here -- we will
* call the restock method and if it still needs
* more buffers it will schedule replenish */
struct iwl_host_cmd cmd = {
.id = REPLY_WEPKEY,
.data = wep_cmd,
- .flags = CMD_SYNC,
+ .flags = CMD_ASYNC,
};
memset(wep_cmd, 0, cmd_size +
}
spin_unlock_irqrestore(&rxq->lock, flags);
+ if (rxq->free_count > RX_LOW_WATERMARK)
+ priority |= __GFP_NOWARN;
/* Alloc a new receive buffer */
skb = alloc_skb(priv->hw_params.rx_buf_size, priority);
if (!skb) {
if (net_ratelimit())
- IWL_CRIT(priv, ": Can not allocate SKB buffers\n");
+ IWL_DEBUG_INFO(priv, "Failed to allocate SKB buffer.\n");
+ if ((rxq->free_count <= RX_LOW_WATERMARK) &&
+ net_ratelimit())
+ IWL_CRIT(priv, "Failed to allocate SKB buffer with %s. Only %u free buffers remaining.\n",
+ priority == GFP_ATOMIC ? "GFP_ATOMIC" : "GFP_KERNEL",
+ rxq->free_count);
/* We don't reschedule replenish work here -- we will
* call the restock method and if it still needs
* more buffers it will schedule replenish */
{
}
-static inline void rt2x00crypto_rx_insert_iv(struct sk_buff *skb, bool l2pad,
+static inline void rt2x00crypto_rx_insert_iv(struct sk_buff *skb,
unsigned int header_length,
struct rxdone_entry_desc *rxdesc)
{
menuconfig WL12XX
- boolean "TI wl12xx driver support"
+ tristate "TI wl12xx driver support"
depends on MAC80211 && WLAN_80211 && EXPERIMENTAL
---help---
This will enable TI wl12xx driver support. The drivers make
/* Find bulk out endpoint */
endpoint = &iface_desc->endpoint[1].desc;
- if ((endpoint->bEndpointAddress & USB_TYPE_MASK) == USB_DIR_OUT &&
+ if (usb_endpoint_dir_out(endpoint) &&
usb_endpoint_xfer_bulk(endpoint)) {
bulk_out_ep = endpoint->bEndpointAddress;
} else {
}
/* Enable the Rx interrupts for the first buffer */
- reg_data = in_be32(drvdata->base_addr + XEL_RSR_OFFSET);
out_be32(drvdata->base_addr + XEL_RSR_OFFSET,
- reg_data | XEL_RSR_RECV_IE_MASK);
+ XEL_RSR_RECV_IE_MASK);
/* Enable the Rx interrupts for the second Buffer if
* configured in HW */
if (drvdata->rx_ping_pong != 0) {
- reg_data = in_be32(drvdata->base_addr + XEL_BUFFER_OFFSET +
- XEL_RSR_OFFSET);
out_be32(drvdata->base_addr + XEL_BUFFER_OFFSET +
XEL_RSR_OFFSET,
- reg_data | XEL_RSR_RECV_IE_MASK);
+ XEL_RSR_RECV_IE_MASK);
}
/* Enable the Global Interrupt Enable */
{
int err;
- if (!alloc_cpumask_var(&marked_cpus, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL))
return -ENOMEM;
- cpumask_clear(marked_cpus);
start_cpu_work();
#define PARPORT_MIN_SPINTIME_VALUE 1
#define PARPORT_MAX_SPINTIME_VALUE 1000
-static int do_active_device(ctl_table *table, int write, struct file *filp,
+static int do_active_device(ctl_table *table, int write,
void __user *result, size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
}
#ifdef CONFIG_PARPORT_1284
-static int do_autoprobe(ctl_table *table, int write, struct file *filp,
+static int do_autoprobe(ctl_table *table, int write,
void __user *result, size_t *lenp, loff_t *ppos)
{
struct parport_device_info *info = table->extra2;
#endif /* IEEE1284.3 support. */
static int do_hardware_base_addr (ctl_table *table, int write,
- struct file *filp, void __user *result,
+ void __user *result,
size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
}
static int do_hardware_irq (ctl_table *table, int write,
- struct file *filp, void __user *result,
+ void __user *result,
size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
}
static int do_hardware_dma (ctl_table *table, int write,
- struct file *filp, void __user *result,
+ void __user *result,
size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
}
static int do_hardware_modes (ctl_table *table, int write,
- struct file *filp, void __user *result,
+ void __user *result,
size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
#define SLOT_NAME_SIZE 10
struct slot {
- u8 bus;
- u8 device;
u8 state;
- u8 hp_slot;
- u32 number;
struct controller *ctrl;
- struct hpc_ops *hpc_ops;
struct hotplug_slot *hotplug_slot;
- struct list_head slot_list;
struct delayed_work work; /* work for button event */
struct mutex lock;
};
};
struct controller {
- struct mutex crit_sect; /* critical section mutex */
struct mutex ctrl_lock; /* controller lock */
- int num_slots; /* Number of slots on ctlr */
- int slot_num_inc; /* 1 or -1 */
- struct pci_dev *pci_dev;
struct pcie_device *pcie; /* PCI Express port service */
- struct list_head slot_list;
- struct hpc_ops *hpc_ops;
+ struct slot *slot;
wait_queue_head_t queue; /* sleep & wake process */
- u8 slot_device_offset;
- u32 first_slot; /* First physical slot number */ /* PCIE only has 1 slot */
- u8 slot_bus; /* Bus where the slots handled by this controller sit */
u32 slot_cap;
u8 cap_base;
struct timer_list poll_timer;
#define POWERON_STATE 3
#define POWEROFF_STATE 4
-/* Error messages */
-#define INTERLOCK_OPEN 0x00000002
-#define ADD_NOT_SUPPORTED 0x00000003
-#define CARD_FUNCTIONING 0x00000005
-#define ADAPTER_NOT_SAME 0x00000006
-#define NO_ADAPTER_PRESENT 0x00000009
-#define NOT_ENOUGH_RESOURCES 0x0000000B
-#define DEVICE_TYPE_NOT_SUPPORTED 0x0000000C
-#define WRONG_BUS_FREQUENCY 0x0000000D
-#define POWER_FAILURE 0x0000000E
-
-/* Field definitions in Slot Capabilities Register */
-#define ATTN_BUTTN_PRSN 0x00000001
-#define PWR_CTRL_PRSN 0x00000002
-#define MRL_SENS_PRSN 0x00000004
-#define ATTN_LED_PRSN 0x00000008
-#define PWR_LED_PRSN 0x00000010
-#define HP_SUPR_RM_SUP 0x00000020
-#define EMI_PRSN 0x00020000
-#define NO_CMD_CMPL_SUP 0x00040000
-
-#define ATTN_BUTTN(ctrl) ((ctrl)->slot_cap & ATTN_BUTTN_PRSN)
-#define POWER_CTRL(ctrl) ((ctrl)->slot_cap & PWR_CTRL_PRSN)
-#define MRL_SENS(ctrl) ((ctrl)->slot_cap & MRL_SENS_PRSN)
-#define ATTN_LED(ctrl) ((ctrl)->slot_cap & ATTN_LED_PRSN)
-#define PWR_LED(ctrl) ((ctrl)->slot_cap & PWR_LED_PRSN)
-#define HP_SUPR_RM(ctrl) ((ctrl)->slot_cap & HP_SUPR_RM_SUP)
-#define EMI(ctrl) ((ctrl)->slot_cap & EMI_PRSN)
-#define NO_CMD_CMPL(ctrl) ((ctrl)->slot_cap & NO_CMD_CMPL_SUP)
+#define ATTN_BUTTN(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_ABP)
+#define POWER_CTRL(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PCP)
+#define MRL_SENS(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_MRLSP)
+#define ATTN_LED(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_AIP)
+#define PWR_LED(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PIP)
+#define HP_SUPR_RM(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_HPS)
+#define EMI(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_EIP)
+#define NO_CMD_CMPL(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_NCCS)
+#define PSN(ctrl) ((ctrl)->slot_cap >> 19)
extern int pciehp_sysfs_enable_slot(struct slot *slot);
extern int pciehp_sysfs_disable_slot(struct slot *slot);
extern u8 pciehp_handle_attention_button(struct slot *p_slot);
- extern u8 pciehp_handle_switch_change(struct slot *p_slot);
+extern u8 pciehp_handle_switch_change(struct slot *p_slot);
extern u8 pciehp_handle_presence_change(struct slot *p_slot);
extern u8 pciehp_handle_power_fault(struct slot *p_slot);
extern int pciehp_configure_device(struct slot *p_slot);
int pciehp_enable_slot(struct slot *p_slot);
int pciehp_disable_slot(struct slot *p_slot);
int pcie_enable_notification(struct controller *ctrl);
+int pciehp_power_on_slot(struct slot *slot);
+int pciehp_power_off_slot(struct slot *slot);
+int pciehp_get_power_status(struct slot *slot, u8 *status);
+int pciehp_get_attention_status(struct slot *slot, u8 *status);
+
+int pciehp_set_attention_status(struct slot *slot, u8 status);
+int pciehp_get_latch_status(struct slot *slot, u8 *status);
+int pciehp_get_adapter_status(struct slot *slot, u8 *status);
+int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *speed);
+int pciehp_get_max_link_width(struct slot *slot, enum pcie_link_width *val);
+int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *speed);
+int pciehp_get_cur_link_width(struct slot *slot, enum pcie_link_width *val);
+int pciehp_query_power_fault(struct slot *slot);
+void pciehp_green_led_on(struct slot *slot);
+void pciehp_green_led_off(struct slot *slot);
+void pciehp_green_led_blink(struct slot *slot);
+int pciehp_check_link_status(struct controller *ctrl);
+void pciehp_release_ctrl(struct controller *ctrl);
static inline const char *slot_name(struct slot *slot)
{
return hotplug_slot_name(slot->hotplug_slot);
}
-static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device)
-{
- struct slot *slot;
-
- list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
- if (slot->device == device)
- return slot;
- }
-
- ctrl_err(ctrl, "Slot (device=0x%02x) not found\n", device);
- return NULL;
-}
-
-struct hpc_ops {
- int (*power_on_slot)(struct slot *slot);
- int (*power_off_slot)(struct slot *slot);
- int (*get_power_status)(struct slot *slot, u8 *status);
- int (*get_attention_status)(struct slot *slot, u8 *status);
- int (*set_attention_status)(struct slot *slot, u8 status);
- int (*get_latch_status)(struct slot *slot, u8 *status);
- int (*get_adapter_status)(struct slot *slot, u8 *status);
- int (*get_max_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
- int (*get_cur_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
- int (*get_max_lnk_width)(struct slot *slot, enum pcie_link_width *val);
- int (*get_cur_lnk_width)(struct slot *slot, enum pcie_link_width *val);
- int (*query_power_fault)(struct slot *slot);
- void (*green_led_on)(struct slot *slot);
- void (*green_led_off)(struct slot *slot);
- void (*green_led_blink)(struct slot *slot);
- void (*release_ctlr)(struct controller *ctrl);
- int (*check_lnk_status)(struct controller *ctrl);
-};
-
#ifdef CONFIG_ACPI
#include <acpi/acpi.h>
#include <acpi/acpi_bus.h>
#define PCIEHP_DETECT_AUTO (2)
#define PCIEHP_DETECT_DEFAULT PCIEHP_DETECT_AUTO
+struct dummy_slot {
+ u32 number;
+ struct list_head list;
+};
+
static int slot_detection_mode;
static char *pciehp_detect_mode;
module_param(pciehp_detect_mode, charp, 0444);
int pos;
u32 slot_cap;
acpi_handle handle;
- struct slot *slot, *tmp;
+ struct dummy_slot *slot, *tmp;
struct pci_dev *pdev = dev->port;
/* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
if (pciehp_get_hp_hw_control_from_firmware(pdev))
if (!slot)
return -ENOMEM;
slot->number = slot_cap >> 19;
- list_for_each_entry(tmp, &dummy_slots, slot_list) {
+ list_for_each_entry(tmp, &dummy_slots, list) {
if (tmp->number == slot->number)
dup_slot_id++;
}
- list_add_tail(&slot->slot_list, &dummy_slots);
+ list_add_tail(&slot->list, &dummy_slots);
handle = DEVICE_ACPI_HANDLE(&pdev->dev);
if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle))
acpi_slot_detected = 1;
static int __init select_detection_mode(void)
{
- struct slot *slot, *tmp;
+ struct dummy_slot *slot, *tmp;
pcie_port_service_register(&dummy_driver);
pcie_port_service_unregister(&dummy_driver);
- list_for_each_entry_safe(slot, tmp, &dummy_slots, slot_list) {
- list_del(&slot->slot_list);
+ list_for_each_entry_safe(slot, tmp, &dummy_slots, list) {
+ list_del(&slot->list);
kfree(slot);
}
if (acpi_slot_detected && dup_slot_id)
kfree(hotplug_slot);
}
-static int init_slots(struct controller *ctrl)
+static int init_slot(struct controller *ctrl)
{
- struct slot *slot;
- struct hotplug_slot *hotplug_slot;
- struct hotplug_slot_info *info;
+ struct slot *slot = ctrl->slot;
+ struct hotplug_slot *hotplug = NULL;
+ struct hotplug_slot_info *info = NULL;
char name[SLOT_NAME_SIZE];
int retval = -ENOMEM;
- list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
- hotplug_slot = kzalloc(sizeof(*hotplug_slot), GFP_KERNEL);
- if (!hotplug_slot)
- goto error;
-
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- goto error_hpslot;
-
- /* register this slot with the hotplug pci core */
- hotplug_slot->info = info;
- hotplug_slot->private = slot;
- hotplug_slot->release = &release_slot;
- hotplug_slot->ops = &pciehp_hotplug_slot_ops;
- slot->hotplug_slot = hotplug_slot;
- snprintf(name, SLOT_NAME_SIZE, "%u", slot->number);
-
- ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x "
- "hp_slot=%x sun=%x slot_device_offset=%x\n",
- pci_domain_nr(ctrl->pci_dev->subordinate),
- slot->bus, slot->device, slot->hp_slot, slot->number,
- ctrl->slot_device_offset);
- retval = pci_hp_register(hotplug_slot,
- ctrl->pci_dev->subordinate,
- slot->device,
- name);
- if (retval) {
- ctrl_err(ctrl, "pci_hp_register failed with error %d\n",
- retval);
- goto error_info;
- }
- get_power_status(hotplug_slot, &info->power_status);
- get_attention_status(hotplug_slot, &info->attention_status);
- get_latch_status(hotplug_slot, &info->latch_status);
- get_adapter_status(hotplug_slot, &info->adapter_status);
+ hotplug = kzalloc(sizeof(*hotplug), GFP_KERNEL);
+ if (!hotplug)
+ goto out;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ goto out;
+
+ /* register this slot with the hotplug pci core */
+ hotplug->info = info;
+ hotplug->private = slot;
+ hotplug->release = &release_slot;
+ hotplug->ops = &pciehp_hotplug_slot_ops;
+ slot->hotplug_slot = hotplug;
+ snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl));
+
+ ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:00 sun=%x\n",
+ pci_domain_nr(ctrl->pcie->port->subordinate),
+ ctrl->pcie->port->subordinate->number, PSN(ctrl));
+ retval = pci_hp_register(hotplug,
+ ctrl->pcie->port->subordinate, 0, name);
+ if (retval) {
+ ctrl_err(ctrl,
+ "pci_hp_register failed with error %d\n", retval);
+ goto out;
+ }
+ get_power_status(hotplug, &info->power_status);
+ get_attention_status(hotplug, &info->attention_status);
+ get_latch_status(hotplug, &info->latch_status);
+ get_adapter_status(hotplug, &info->adapter_status);
+out:
+ if (retval) {
+ kfree(info);
+ kfree(hotplug);
}
-
- return 0;
-error_info:
- kfree(info);
-error_hpslot:
- kfree(hotplug_slot);
-error:
return retval;
}
-static void cleanup_slots(struct controller *ctrl)
+static void cleanup_slot(struct controller *ctrl)
{
- struct slot *slot;
- list_for_each_entry(slot, &ctrl->slot_list, slot_list)
- pci_hp_deregister(slot->hotplug_slot);
+ pci_hp_deregister(ctrl->slot->hotplug_slot);
}
/*
hotplug_slot->info->attention_status = status;
if (ATTN_LED(slot->ctrl))
- slot->hpc_ops->set_attention_status(slot, status);
+ pciehp_set_attention_status(slot, status);
return 0;
}
ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
__func__, slot_name(slot));
- retval = slot->hpc_ops->get_power_status(slot, value);
+ retval = pciehp_get_power_status(slot, value);
if (retval < 0)
*value = hotplug_slot->info->power_status;
ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
__func__, slot_name(slot));
- retval = slot->hpc_ops->get_attention_status(slot, value);
+ retval = pciehp_get_attention_status(slot, value);
if (retval < 0)
*value = hotplug_slot->info->attention_status;
ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
__func__, slot_name(slot));
- retval = slot->hpc_ops->get_latch_status(slot, value);
+ retval = pciehp_get_latch_status(slot, value);
if (retval < 0)
*value = hotplug_slot->info->latch_status;
ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
__func__, slot_name(slot));
- retval = slot->hpc_ops->get_adapter_status(slot, value);
+ retval = pciehp_get_adapter_status(slot, value);
if (retval < 0)
*value = hotplug_slot->info->adapter_status;
ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
__func__, slot_name(slot));
- retval = slot->hpc_ops->get_max_bus_speed(slot, value);
+ retval = pciehp_get_max_link_speed(slot, value);
if (retval < 0)
*value = PCI_SPEED_UNKNOWN;
ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
__func__, slot_name(slot));
- retval = slot->hpc_ops->get_cur_bus_speed(slot, value);
+ retval = pciehp_get_cur_link_speed(slot, value);
if (retval < 0)
*value = PCI_SPEED_UNKNOWN;
{
int rc;
struct controller *ctrl;
- struct slot *t_slot;
+ struct slot *slot;
u8 value;
struct pci_dev *pdev = dev->port;
set_service_data(dev, ctrl);
/* Setup the slot information structures */
- rc = init_slots(ctrl);
+ rc = init_slot(ctrl);
if (rc) {
if (rc == -EBUSY)
ctrl_warn(ctrl, "Slot already registered by another "
}
/* Check if slot is occupied */
- t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
- t_slot->hpc_ops->get_adapter_status(t_slot, &value);
+ slot = ctrl->slot;
+ pciehp_get_adapter_status(slot, &value);
if (value) {
if (pciehp_force)
- pciehp_enable_slot(t_slot);
+ pciehp_enable_slot(slot);
} else {
/* Power off slot if not occupied */
if (POWER_CTRL(ctrl)) {
- rc = t_slot->hpc_ops->power_off_slot(t_slot);
+ rc = pciehp_power_off_slot(slot);
if (rc)
goto err_out_free_ctrl_slot;
}
return 0;
err_out_free_ctrl_slot:
- cleanup_slots(ctrl);
+ cleanup_slot(ctrl);
err_out_release_ctlr:
- ctrl->hpc_ops->release_ctlr(ctrl);
+ pciehp_release_ctrl(ctrl);
err_out_none:
return -ENODEV;
}
-static void pciehp_remove (struct pcie_device *dev)
+static void pciehp_remove(struct pcie_device *dev)
{
struct controller *ctrl = get_service_data(dev);
- cleanup_slots(ctrl);
- ctrl->hpc_ops->release_ctlr(ctrl);
+ cleanup_slot(ctrl);
+ pciehp_release_ctrl(ctrl);
}
#ifdef CONFIG_PM
dev_info(&dev->device, "%s ENTRY\n", __func__);
if (pciehp_force) {
struct controller *ctrl = get_service_data(dev);
- struct slot *t_slot;
+ struct slot *slot;
u8 status;
/* reinitialize the chipset's event detection logic */
pcie_enable_notification(ctrl);
- t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
+ slot = ctrl->slot;
/* Check if slot is occupied */
- t_slot->hpc_ops->get_adapter_status(t_slot, &status);
+ pciehp_get_adapter_status(slot, &status);
if (status)
- pciehp_enable_slot(t_slot);
+ pciehp_enable_slot(slot);
else
- pciehp_disable_slot(t_slot);
+ pciehp_disable_slot(slot);
}
return 0;
}
/* Switch Change */
ctrl_dbg(ctrl, "Switch interrupt received\n");
- p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+ pciehp_get_latch_status(p_slot, &getstatus);
if (getstatus) {
/*
* Switch opened
/* Switch is open, assume a presence change
* Save the presence state
*/
- p_slot->hpc_ops->get_adapter_status(p_slot, &presence_save);
+ pciehp_get_adapter_status(p_slot, &presence_save);
if (presence_save) {
/*
* Card Present
/* power fault */
ctrl_dbg(ctrl, "Power fault interrupt received\n");
- if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) {
+ if (!pciehp_query_power_fault(p_slot)) {
/*
* power fault Cleared
*/
{
/* turn off slot, turn on Amber LED, turn off Green LED if supported*/
if (POWER_CTRL(ctrl)) {
- if (pslot->hpc_ops->power_off_slot(pslot)) {
+ if (pciehp_power_off_slot(pslot)) {
ctrl_err(ctrl,
"Issue of Slot Power Off command failed\n");
return;
}
if (PWR_LED(ctrl))
- pslot->hpc_ops->green_led_off(pslot);
+ pciehp_green_led_off(pslot);
if (ATTN_LED(ctrl)) {
- if (pslot->hpc_ops->set_attention_status(pslot, 1)) {
+ if (pciehp_set_attention_status(pslot, 1)) {
ctrl_err(ctrl,
"Issue of Set Attention Led command failed\n");
return;
{
int retval = 0;
struct controller *ctrl = p_slot->ctrl;
- struct pci_bus *parent = ctrl->pci_dev->subordinate;
-
- ctrl_dbg(ctrl, "%s: slot device, slot offset, hp slot = %d, %d, %d\n",
- __func__, p_slot->device, ctrl->slot_device_offset,
- p_slot->hp_slot);
+ struct pci_bus *parent = ctrl->pcie->port->subordinate;
if (POWER_CTRL(ctrl)) {
/* Power on slot */
- retval = p_slot->hpc_ops->power_on_slot(p_slot);
+ retval = pciehp_power_on_slot(p_slot);
if (retval)
return retval;
}
if (PWR_LED(ctrl))
- p_slot->hpc_ops->green_led_blink(p_slot);
+ pciehp_green_led_blink(p_slot);
/* Check link training status */
- retval = p_slot->hpc_ops->check_lnk_status(ctrl);
+ retval = pciehp_check_link_status(ctrl);
if (retval) {
ctrl_err(ctrl, "Failed to check link status\n");
set_slot_off(ctrl, p_slot);
}
/* Check for a power fault */
- if (p_slot->hpc_ops->query_power_fault(p_slot)) {
+ if (pciehp_query_power_fault(p_slot)) {
ctrl_dbg(ctrl, "Power fault detected\n");
- retval = POWER_FAILURE;
+ retval = -EIO;
goto err_exit;
}
retval = pciehp_configure_device(p_slot);
if (retval) {
- ctrl_err(ctrl, "Cannot add device at %04x:%02x:%02x\n",
- pci_domain_nr(parent), p_slot->bus, p_slot->device);
+ ctrl_err(ctrl, "Cannot add device at %04x:%02x:00\n",
+ pci_domain_nr(parent), parent->number);
goto err_exit;
}
if (PWR_LED(ctrl))
- p_slot->hpc_ops->green_led_on(p_slot);
+ pciehp_green_led_on(p_slot);
return 0;
if (retval)
return retval;
- ctrl_dbg(ctrl, "%s: hp_slot = %d\n", __func__, p_slot->hp_slot);
-
if (POWER_CTRL(ctrl)) {
/* power off slot */
- retval = p_slot->hpc_ops->power_off_slot(p_slot);
+ retval = pciehp_power_off_slot(p_slot);
if (retval) {
ctrl_err(ctrl,
"Issue of Slot Disable command failed\n");
msleep(1000);
}
+ /* turn off Green LED */
if (PWR_LED(ctrl))
- /* turn off Green LED */
- p_slot->hpc_ops->green_led_off(p_slot);
+ pciehp_green_led_off(p_slot);
return 0;
}
case POWEROFF_STATE:
mutex_unlock(&p_slot->lock);
ctrl_dbg(p_slot->ctrl,
- "Disabling domain:bus:device=%04x:%02x:%02x\n",
- pci_domain_nr(p_slot->ctrl->pci_dev->subordinate),
- p_slot->bus, p_slot->device);
+ "Disabling domain:bus:device=%04x:%02x:00\n",
+ pci_domain_nr(p_slot->ctrl->pcie->port->subordinate),
+ p_slot->ctrl->pcie->port->subordinate->number);
pciehp_disable_slot(p_slot);
mutex_lock(&p_slot->lock);
p_slot->state = STATIC_STATE;
break;
case POWERON_STATE:
mutex_unlock(&p_slot->lock);
- if (pciehp_enable_slot(p_slot) &&
- PWR_LED(p_slot->ctrl))
- p_slot->hpc_ops->green_led_off(p_slot);
+ if (pciehp_enable_slot(p_slot) && PWR_LED(p_slot->ctrl))
+ pciehp_green_led_off(p_slot);
mutex_lock(&p_slot->lock);
p_slot->state = STATIC_STATE;
break;
if (!info)
return -ENOMEM;
- slot->hpc_ops->get_power_status(slot, &(info->power_status));
- slot->hpc_ops->get_attention_status(slot, &(info->attention_status));
- slot->hpc_ops->get_latch_status(slot, &(info->latch_status));
- slot->hpc_ops->get_adapter_status(slot, &(info->adapter_status));
+ pciehp_get_power_status(slot, &info->power_status);
+ pciehp_get_attention_status(slot, &info->attention_status);
+ pciehp_get_latch_status(slot, &info->latch_status);
+ pciehp_get_adapter_status(slot, &info->adapter_status);
result = pci_hp_change_slot_info(slot->hotplug_slot, info);
kfree (info);
switch (p_slot->state) {
case STATIC_STATE:
- p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+ pciehp_get_power_status(p_slot, &getstatus);
if (getstatus) {
p_slot->state = BLINKINGOFF_STATE;
ctrl_info(ctrl,
}
/* blink green LED and turn off amber */
if (PWR_LED(ctrl))
- p_slot->hpc_ops->green_led_blink(p_slot);
+ pciehp_green_led_blink(p_slot);
if (ATTN_LED(ctrl))
- p_slot->hpc_ops->set_attention_status(p_slot, 0);
+ pciehp_set_attention_status(p_slot, 0);
schedule_delayed_work(&p_slot->work, 5*HZ);
break;
cancel_delayed_work(&p_slot->work);
if (p_slot->state == BLINKINGOFF_STATE) {
if (PWR_LED(ctrl))
- p_slot->hpc_ops->green_led_on(p_slot);
+ pciehp_green_led_on(p_slot);
} else {
if (PWR_LED(ctrl))
- p_slot->hpc_ops->green_led_off(p_slot);
+ pciehp_green_led_off(p_slot);
}
if (ATTN_LED(ctrl))
- p_slot->hpc_ops->set_attention_status(p_slot, 0);
+ pciehp_set_attention_status(p_slot, 0);
ctrl_info(ctrl, "PCI slot #%s - action canceled "
"due to button press\n", slot_name(p_slot));
p_slot->state = STATIC_STATE;
info->p_slot = p_slot;
INIT_WORK(&info->work, pciehp_power_thread);
- p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+ pciehp_get_adapter_status(p_slot, &getstatus);
if (!getstatus)
p_slot->state = POWEROFF_STATE;
else
if (!POWER_CTRL(ctrl))
break;
if (ATTN_LED(ctrl))
- p_slot->hpc_ops->set_attention_status(p_slot, 1);
+ pciehp_set_attention_status(p_slot, 1);
if (PWR_LED(ctrl))
- p_slot->hpc_ops->green_led_off(p_slot);
+ pciehp_green_led_off(p_slot);
break;
case INT_PRESENCE_ON:
case INT_PRESENCE_OFF:
int rc;
struct controller *ctrl = p_slot->ctrl;
- /* Check to see if (latch closed, card present, power off) */
- mutex_lock(&p_slot->ctrl->crit_sect);
-
- rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+ rc = pciehp_get_adapter_status(p_slot, &getstatus);
if (rc || !getstatus) {
ctrl_info(ctrl, "No adapter on slot(%s)\n", slot_name(p_slot));
- mutex_unlock(&p_slot->ctrl->crit_sect);
return -ENODEV;
}
if (MRL_SENS(p_slot->ctrl)) {
- rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+ rc = pciehp_get_latch_status(p_slot, &getstatus);
if (rc || getstatus) {
ctrl_info(ctrl, "Latch open on slot(%s)\n",
slot_name(p_slot));
- mutex_unlock(&p_slot->ctrl->crit_sect);
return -ENODEV;
}
}
if (POWER_CTRL(p_slot->ctrl)) {
- rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+ rc = pciehp_get_power_status(p_slot, &getstatus);
if (rc || getstatus) {
ctrl_info(ctrl, "Already enabled on slot(%s)\n",
slot_name(p_slot));
- mutex_unlock(&p_slot->ctrl->crit_sect);
return -EINVAL;
}
}
- p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+ pciehp_get_latch_status(p_slot, &getstatus);
rc = board_added(p_slot);
if (rc) {
- p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+ pciehp_get_latch_status(p_slot, &getstatus);
}
update_slot_info(p_slot);
- mutex_unlock(&p_slot->ctrl->crit_sect);
return rc;
}
if (!p_slot->ctrl)
return 1;
- /* Check to see if (latch closed, card present, power on) */
- mutex_lock(&p_slot->ctrl->crit_sect);
-
if (!HP_SUPR_RM(p_slot->ctrl)) {
- ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+ ret = pciehp_get_adapter_status(p_slot, &getstatus);
if (ret || !getstatus) {
ctrl_info(ctrl, "No adapter on slot(%s)\n",
slot_name(p_slot));
- mutex_unlock(&p_slot->ctrl->crit_sect);
return -ENODEV;
}
}
if (MRL_SENS(p_slot->ctrl)) {
- ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+ ret = pciehp_get_latch_status(p_slot, &getstatus);
if (ret || getstatus) {
ctrl_info(ctrl, "Latch open on slot(%s)\n",
slot_name(p_slot));
- mutex_unlock(&p_slot->ctrl->crit_sect);
return -ENODEV;
}
}
if (POWER_CTRL(p_slot->ctrl)) {
- ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+ ret = pciehp_get_power_status(p_slot, &getstatus);
if (ret || !getstatus) {
ctrl_info(ctrl, "Already disabled on slot(%s)\n",
slot_name(p_slot));
- mutex_unlock(&p_slot->ctrl->crit_sect);
return -EINVAL;
}
}
ret = remove_board(p_slot);
update_slot_info(p_slot);
- mutex_unlock(&p_slot->ctrl->crit_sect);
return ret;
}
static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value)
{
- struct pci_dev *dev = ctrl->pci_dev;
+ struct pci_dev *dev = ctrl->pcie->port;
return pci_read_config_word(dev, ctrl->cap_base + reg, value);
}
static inline int pciehp_readl(struct controller *ctrl, int reg, u32 *value)
{
- struct pci_dev *dev = ctrl->pci_dev;
+ struct pci_dev *dev = ctrl->pcie->port;
return pci_read_config_dword(dev, ctrl->cap_base + reg, value);
}
static inline int pciehp_writew(struct controller *ctrl, int reg, u16 value)
{
- struct pci_dev *dev = ctrl->pci_dev;
+ struct pci_dev *dev = ctrl->pcie->port;
return pci_write_config_word(dev, ctrl->cap_base + reg, value);
}
static inline int pciehp_writel(struct controller *ctrl, int reg, u32 value)
{
- struct pci_dev *dev = ctrl->pci_dev;
+ struct pci_dev *dev = ctrl->pcie->port;
return pci_write_config_dword(dev, ctrl->cap_base + reg, value);
}
ctrl_dbg(ctrl, "Data Link Layer Link Active not set in 1000 msec\n");
}
-static int hpc_check_lnk_status(struct controller *ctrl)
+int pciehp_check_link_status(struct controller *ctrl)
{
u16 lnk_status;
int retval = 0;
return retval;
}
-static int hpc_get_attention_status(struct slot *slot, u8 *status)
+int pciehp_get_attention_status(struct slot *slot, u8 *status)
{
struct controller *ctrl = slot->ctrl;
u16 slot_ctrl;
return 0;
}
-static int hpc_get_power_status(struct slot *slot, u8 *status)
+int pciehp_get_power_status(struct slot *slot, u8 *status)
{
struct controller *ctrl = slot->ctrl;
u16 slot_ctrl;
return retval;
}
-static int hpc_get_latch_status(struct slot *slot, u8 *status)
+int pciehp_get_latch_status(struct slot *slot, u8 *status)
{
struct controller *ctrl = slot->ctrl;
u16 slot_status;
return 0;
}
-static int hpc_get_adapter_status(struct slot *slot, u8 *status)
+int pciehp_get_adapter_status(struct slot *slot, u8 *status)
{
struct controller *ctrl = slot->ctrl;
u16 slot_status;
return 0;
}
-static int hpc_query_power_fault(struct slot *slot)
+int pciehp_query_power_fault(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
u16 slot_status;
return !!(slot_status & PCI_EXP_SLTSTA_PFD);
}
-static int hpc_set_attention_status(struct slot *slot, u8 value)
+int pciehp_set_attention_status(struct slot *slot, u8 value)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
return rc;
}
-static void hpc_set_green_led_on(struct slot *slot)
+void pciehp_green_led_on(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
__func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
}
-static void hpc_set_green_led_off(struct slot *slot)
+void pciehp_green_led_off(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
__func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
}
-static void hpc_set_green_led_blink(struct slot *slot)
+void pciehp_green_led_blink(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
__func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
}
-static int hpc_power_on_slot(struct slot * slot)
+int pciehp_power_on_slot(struct slot * slot)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
u16 slot_status;
int retval = 0;
- ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
-
/* Clear sticky power-fault bit from previous power failures */
retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
if (retval) {
static inline int pcie_mask_bad_dllp(struct controller *ctrl)
{
- struct pci_dev *dev = ctrl->pci_dev;
+ struct pci_dev *dev = ctrl->pcie->port;
int pos;
u32 reg;
static inline void pcie_unmask_bad_dllp(struct controller *ctrl)
{
- struct pci_dev *dev = ctrl->pci_dev;
+ struct pci_dev *dev = ctrl->pcie->port;
u32 reg;
int pos;
pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg);
}
-static int hpc_power_off_slot(struct slot * slot)
+int pciehp_power_off_slot(struct slot * slot)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
int retval = 0;
int changed;
- ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
-
/*
* Set Bad DLLP Mask bit in Correctable Error Mask
* Register. This is the workaround against Bad DLLP error
static irqreturn_t pcie_isr(int irq, void *dev_id)
{
struct controller *ctrl = (struct controller *)dev_id;
+ struct slot *slot = ctrl->slot;
u16 detected, intr_loc;
- struct slot *p_slot;
/*
* In order to guarantee that all interrupt events are
if (!(intr_loc & ~PCI_EXP_SLTSTA_CC))
return IRQ_HANDLED;
- p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
-
/* Check MRL Sensor Changed */
if (intr_loc & PCI_EXP_SLTSTA_MRLSC)
- pciehp_handle_switch_change(p_slot);
+ pciehp_handle_switch_change(slot);
/* Check Attention Button Pressed */
if (intr_loc & PCI_EXP_SLTSTA_ABP)
- pciehp_handle_attention_button(p_slot);
+ pciehp_handle_attention_button(slot);
/* Check Presence Detect Changed */
if (intr_loc & PCI_EXP_SLTSTA_PDC)
- pciehp_handle_presence_change(p_slot);
+ pciehp_handle_presence_change(slot);
/* Check Power Fault Detected */
if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
ctrl->power_fault_detected = 1;
- pciehp_handle_power_fault(p_slot);
+ pciehp_handle_power_fault(slot);
}
return IRQ_HANDLED;
}
-static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
+int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *value)
{
struct controller *ctrl = slot->ctrl;
enum pcie_link_speed lnk_speed;
return retval;
}
-static int hpc_get_max_lnk_width(struct slot *slot,
+int pciehp_get_max_lnk_width(struct slot *slot,
enum pcie_link_width *value)
{
struct controller *ctrl = slot->ctrl;
return retval;
}
-static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
+int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *value)
{
struct controller *ctrl = slot->ctrl;
enum pcie_link_speed lnk_speed = PCI_SPEED_UNKNOWN;
return retval;
}
-static int hpc_get_cur_lnk_width(struct slot *slot,
+int pciehp_get_cur_lnk_width(struct slot *slot,
enum pcie_link_width *value)
{
struct controller *ctrl = slot->ctrl;
return retval;
}
-static void pcie_release_ctrl(struct controller *ctrl);
-static struct hpc_ops pciehp_hpc_ops = {
- .power_on_slot = hpc_power_on_slot,
- .power_off_slot = hpc_power_off_slot,
- .set_attention_status = hpc_set_attention_status,
- .get_power_status = hpc_get_power_status,
- .get_attention_status = hpc_get_attention_status,
- .get_latch_status = hpc_get_latch_status,
- .get_adapter_status = hpc_get_adapter_status,
-
- .get_max_bus_speed = hpc_get_max_lnk_speed,
- .get_cur_bus_speed = hpc_get_cur_lnk_speed,
- .get_max_lnk_width = hpc_get_max_lnk_width,
- .get_cur_lnk_width = hpc_get_cur_lnk_width,
-
- .query_power_fault = hpc_query_power_fault,
- .green_led_on = hpc_set_green_led_on,
- .green_led_off = hpc_set_green_led_off,
- .green_led_blink = hpc_set_green_led_blink,
-
- .release_ctlr = pcie_release_ctrl,
- .check_lnk_status = hpc_check_lnk_status,
-};
-
int pcie_enable_notification(struct controller *ctrl)
{
u16 cmd, mask;
if (!slot)
return -ENOMEM;
- slot->hp_slot = 0;
slot->ctrl = ctrl;
- slot->bus = ctrl->pci_dev->subordinate->number;
- slot->device = ctrl->slot_device_offset + slot->hp_slot;
- slot->hpc_ops = ctrl->hpc_ops;
- slot->number = ctrl->first_slot;
mutex_init(&slot->lock);
INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work);
- list_add(&slot->slot_list, &ctrl->slot_list);
+ ctrl->slot = slot;
return 0;
}
static void pcie_cleanup_slot(struct controller *ctrl)
{
- struct slot *slot;
- slot = list_first_entry(&ctrl->slot_list, struct slot, slot_list);
- list_del(&slot->slot_list);
+ struct slot *slot = ctrl->slot;
cancel_delayed_work(&slot->work);
flush_scheduled_work();
flush_workqueue(pciehp_wq);
{
int i;
u16 reg16;
- struct pci_dev *pdev = ctrl->pci_dev;
+ struct pci_dev *pdev = ctrl->pcie->port;
if (!pciehp_debug)
return;
(unsigned long long)pci_resource_start(pdev, i));
}
ctrl_info(ctrl, "Slot Capabilities : 0x%08x\n", ctrl->slot_cap);
- ctrl_info(ctrl, " Physical Slot Number : %d\n", ctrl->first_slot);
+ ctrl_info(ctrl, " Physical Slot Number : %d\n", PSN(ctrl));
ctrl_info(ctrl, " Attention Button : %3s\n",
ATTN_BUTTN(ctrl) ? "yes" : "no");
ctrl_info(ctrl, " Power Controller : %3s\n",
dev_err(&dev->device, "%s: Out of memory\n", __func__);
goto abort;
}
- INIT_LIST_HEAD(&ctrl->slot_list);
-
ctrl->pcie = dev;
- ctrl->pci_dev = pdev;
ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
if (!ctrl->cap_base) {
ctrl_err(ctrl, "Cannot find PCI Express capability\n");
}
ctrl->slot_cap = slot_cap;
- ctrl->first_slot = slot_cap >> 19;
- ctrl->slot_device_offset = 0;
- ctrl->num_slots = 1;
- ctrl->hpc_ops = &pciehp_hpc_ops;
- mutex_init(&ctrl->crit_sect);
mutex_init(&ctrl->ctrl_lock);
init_waitqueue_head(&ctrl->queue);
dbg_ctrl(ctrl);
return NULL;
}
-void pcie_release_ctrl(struct controller *ctrl)
+void pciehp_release_ctrl(struct controller *ctrl)
{
pcie_shutdown_notification(ctrl);
pcie_cleanup_slot(ctrl);
int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
+ struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;
- dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0));
+ dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
if (dev) {
ctrl_err(ctrl, "Device %s already exists "
- "at %04x:%02x:%02x, cannot hot-add\n", pci_name(dev),
- pci_domain_nr(parent), p_slot->bus, p_slot->device);
+ "at %04x:%02x:00, cannot hot-add\n", pci_name(dev),
+ pci_domain_nr(parent), parent->number);
pci_dev_put(dev);
return -EINVAL;
}
- num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0));
+ num = pci_scan_slot(parent, PCI_DEVFN(0, 0));
if (num == 0) {
ctrl_err(ctrl, "No new device found\n");
return -ENODEV;
}
for (fn = 0; fn < 8; fn++) {
- dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, fn));
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
if (!dev)
continue;
if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
int j;
u8 bctl = 0;
u8 presence = 0;
- struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
+ struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
u16 command;
struct controller *ctrl = p_slot->ctrl;
- ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:%02x\n",
- __func__, pci_domain_nr(parent), p_slot->bus, p_slot->device);
- ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence);
+ ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
+ __func__, pci_domain_nr(parent), parent->number);
+ ret = pciehp_get_adapter_status(p_slot, &presence);
if (ret)
presence = 0;
for (j = 0; j < 8; j++) {
- struct pci_dev* temp = pci_get_slot(parent,
- (p_slot->device << 3) | j);
+ struct pci_dev* temp = pci_get_slot(parent, PCI_DEVFN(0, j));
if (!temp)
continue;
if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
{
if (pcie_aer_disable)
return -ENXIO;
+ if (!pci_msi_enabled())
+ return -ENXIO;
return pcie_port_service_register(&aerdriver);
}
pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
pci_read_config_dword(pdev, pos + PCI_EXP_LNKCAP, ®32);
info->support = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10;
- /* 00b and 10b are defined as "Reserved". */
- if (info->support == PCIE_LINK_STATE_L1)
- info->support = 0;
info->latency_encoding_l0s = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12;
info->latency_encoding_l1 = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15;
pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, ®16);
return AE_BAD_PARAMETER;
if (quirks->mailled == 1) {
param = value ? 0x92 : 0x93;
+ i8042_lock_chip();
i8042_command(¶m, 0x1059);
+ i8042_unlock_chip();
return 0;
}
break;
#include <linux/init.h>
#include <linux/miscdevice.h>
-#include <linux/utsname.h>
#include <linux/debugfs.h>
#include <asm/ipl.h>
#include <asm/sclp.h>
source "drivers/staging/vt6656/Kconfig"
-source "drivers/staging/cpc-usb/Kconfig"
-
source "drivers/staging/udlfb/Kconfig"
source "drivers/staging/hv/Kconfig"
obj-$(CONFIG_OCTEON_ETHERNET) += octeon/
obj-$(CONFIG_VT6655) += vt6655/
obj-$(CONFIG_VT6656) += vt6656/
-obj-$(CONFIG_USB_CPC) += cpc-usb/
obj-$(CONFIG_FB_UDL) += udlfb/
obj-$(CONFIG_HYPERV) += hv/
obj-$(CONFIG_VME_BUS) += vme/
+++ /dev/null
-config USB_CPC
- tristate "CPC CAN USB driver"
- depends on USB && PROC_FS
- default n
+++ /dev/null
-obj-$(CONFIG_USB_CPC) += cpc-usb.o
-
-cpc-usb-y := cpc-usb_drv.o sja2m16c_2.o
+++ /dev/null
-Things to do for this driver to get merged into the main portion of the
-kernel:
- - checkpatch cleanups
- - sparse clean
- - remove proc code
- - tie into CAN socket interfaces if possible
- - figure out sane userspace api
- - use linux's error codes
-
-Send patches to Greg Kroah-Hartman <greg@kroah.com>
+++ /dev/null
-/*
- * CPC-USB CAN Interface Kernel Driver
- *
- * Copyright (C) 2004-2009 EMS Dr. Thomas Wuensche
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published
- * by the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/poll.h>
-#include <linux/smp_lock.h>
-#include <linux/completion.h>
-#include <asm/uaccess.h>
-#include <linux/usb.h>
-
-
-#include <linux/proc_fs.h>
-
-#include "cpc.h"
-
-#include "cpc_int.h"
-#include "cpcusb.h"
-
-#include "sja2m16c.h"
-
-/* Version Information */
-#define DRIVER_AUTHOR "Sebastian Haas <haas@ems-wuensche.com>"
-#define DRIVER_DESC "CPC-USB Driver for Linux Kernel 2.6"
-#define DRIVER_VERSION CPC_DRIVER_VERSION
-
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL v2");
-
-/* Define these values to match your devices */
-#define USB_CPCUSB_VENDOR_ID 0x12D6
-
-#define USB_CPCUSB_M16C_PRODUCT_ID 0x0888
-#define USB_CPCUSB_LPC2119_PRODUCT_ID 0x0444
-
-#define CPC_USB_PROC_DIR CPC_PROC_DIR "cpc-usb"
-
-static struct proc_dir_entry *procDir;
-static struct proc_dir_entry *procEntry;
-
-/* Module parameters */
-static int debug;
-module_param(debug, int, S_IRUGO);
-
-/* table of devices that work with this driver */
-static struct usb_device_id cpcusb_table[] = {
- {USB_DEVICE(USB_CPCUSB_VENDOR_ID, USB_CPCUSB_M16C_PRODUCT_ID)},
- {USB_DEVICE(USB_CPCUSB_VENDOR_ID, USB_CPCUSB_LPC2119_PRODUCT_ID)},
- {} /* Terminating entry */
-};
-
-MODULE_DEVICE_TABLE(usb, cpcusb_table);
-
-/* use to prevent kernel panic if driver is unloaded
- * while a programm has still open the device
- */
-DECLARE_WAIT_QUEUE_HEAD(rmmodWq);
-atomic_t useCount;
-
-static CPC_USB_T *CPCUSB_Table[CPC_USB_CARD_CNT] = { 0 };
-static unsigned int CPCUsbCnt;
-
-/* prevent races between open() and disconnect() */
-static DECLARE_MUTEX(disconnect_sem);
-
-/* local function prototypes */
-static ssize_t cpcusb_read(struct file *file, char *buffer, size_t count,
- loff_t *ppos);
-static ssize_t cpcusb_write(struct file *file, const char *buffer,
- size_t count, loff_t *ppos);
-static unsigned int cpcusb_poll(struct file *file, poll_table * wait);
-static int cpcusb_open(struct inode *inode, struct file *file);
-static int cpcusb_release(struct inode *inode, struct file *file);
-
-static int cpcusb_probe(struct usb_interface *interface,
- const struct usb_device_id *id);
-static void cpcusb_disconnect(struct usb_interface *interface);
-
-static void cpcusb_read_bulk_callback(struct urb *urb);
-static void cpcusb_write_bulk_callback(struct urb *urb);
-static void cpcusb_read_interrupt_callback(struct urb *urb);
-
-static int cpcusb_setup_intrep(CPC_USB_T *card);
-
-static struct file_operations cpcusb_fops = {
- /*
- * The owner field is part of the module-locking
- * mechanism. The idea is that the kernel knows
- * which module to increment the use-counter of
- * BEFORE it calls the device's open() function.
- * This also means that the kernel can decrement
- * the use-counter again before calling release()
- * or should the open() function fail.
- */
- .owner = THIS_MODULE,
-
- .read = cpcusb_read,
- .write = cpcusb_write,
- .poll = cpcusb_poll,
- .open = cpcusb_open,
- .release = cpcusb_release,
-};
-
-/*
- * usb class driver info in order to get a minor number from the usb core,
- * and to have the device registered with devfs and the driver core
- */
-static struct usb_class_driver cpcusb_class = {
- .name = "usb/cpc_usb%d",
- .fops = &cpcusb_fops,
- .minor_base = CPC_USB_BASE_MNR,
-};
-
-/* usb specific object needed to register this driver with the usb subsystem */
-static struct usb_driver cpcusb_driver = {
- .name = "cpc-usb",
- .probe = cpcusb_probe,
- .disconnect = cpcusb_disconnect,
- .id_table = cpcusb_table,
-};
-
-static int cpcusb_create_info_output(char *buf)
-{
- int i = 0, j;
-
- for (j = 0; j < CPC_USB_CARD_CNT; j++) {
- if (CPCUSB_Table[j]) {
- CPC_USB_T *card = CPCUSB_Table[j];
- CPC_CHAN_T *chan = card->chan;
-
- /* MINOR CHANNELNO BUSNO SLOTNO */
- i += sprintf(&buf[i], "%d %s\n", chan->minor,
- card->serialNumber);
- }
- }
-
- return i;
-}
-
-static int cpcusb_proc_read_info(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len = cpcusb_create_info_output(page);
-
- if (len <= off + count)
- *eof = 1;
- *start = page + off;
- len -= off;
- if (len > count)
- len = count;
- if (len < 0)
- len = 0;
-
- return len;
-}
-
-/*
- * Remove CPC-USB and cleanup
- */
-static inline void cpcusb_delete(CPC_USB_T *card)
-{
- if (card) {
- if (card->chan) {
- if (card->chan->buf)
- vfree(card->chan->buf);
-
- if (card->chan->CPCWait_q)
- kfree(card->chan->CPCWait_q);
-
- kfree(card->chan);
- }
-
- CPCUSB_Table[card->idx] = NULL;
- kfree(card);
- }
-}
-
-/*
- * setup the interrupt IN endpoint of a specific CPC-USB device
- */
-static int cpcusb_setup_intrep(CPC_USB_T *card)
-{
- int retval = 0;
- struct usb_endpoint_descriptor *ep;
-
- ep = &card->interface->altsetting[0].endpoint[card->num_intr_in].desc;
-
- card->intr_in_buffer[0] = 0;
- card->free_slots = 15; /* initial size */
-
- /* setup the urb */
- usb_fill_int_urb(card->intr_in_urb, card->udev,
- usb_rcvintpipe(card->udev, card->num_intr_in),
- card->intr_in_buffer,
- sizeof(card->intr_in_buffer),
- cpcusb_read_interrupt_callback,
- card,
- ep->bInterval);
-
- card->intr_in_urb->status = 0; /* needed! */
-
- /* submit the urb */
- retval = usb_submit_urb(card->intr_in_urb, GFP_KERNEL);
-
- if (retval)
- err("%s - failed submitting intr urb, error %d", __func__,
- retval);
-
- return retval;
-}
-
-static int cpcusb_open(struct inode *inode, struct file *file)
-{
- CPC_USB_T *card = NULL;
- struct usb_interface *interface;
- int subminor;
- int j, retval = 0;
-
- subminor = iminor(inode);
-
- /* prevent disconnects */
- down(&disconnect_sem);
-
- interface = usb_find_interface(&cpcusb_driver, subminor);
- if (!interface) {
- err("%s - error, can't find device for minor %d",
- __func__, subminor);
- retval = CPC_ERR_NO_INTERFACE_PRESENT;
- goto exit_no_device;
- }
-
- card = usb_get_intfdata(interface);
- if (!card) {
- retval = CPC_ERR_NO_INTERFACE_PRESENT;
- goto exit_no_device;
- }
-
- /* lock this device */
- down(&card->sem);
-
- /* increment our usage count for the driver */
- if (card->open) {
- dbg("device already opened");
- retval = CPC_ERR_CHANNEL_ALREADY_OPEN;
- goto exit_on_error;
- }
-
- /* save our object in the file's private structure */
- file->private_data = card;
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- usb_fill_bulk_urb(card->urbs[j].urb, card->udev,
- usb_rcvbulkpipe(card->udev, card->num_bulk_in),
- card->urbs[j].buffer, card->urbs[j].size,
- cpcusb_read_bulk_callback, card);
-
- retval = usb_submit_urb(card->urbs[j].urb, GFP_KERNEL);
-
- if (retval) {
- err("%s - failed submitting read urb, error %d",
- __func__, retval);
- retval = CPC_ERR_TRANSMISSION_FAILED;
- goto exit_on_error;
- }
- }
-
- info("%s - %d URB's submitted", __func__, j);
-
- ResetBuffer(card->chan);
-
- cpcusb_setup_intrep(card);
- card->open = 1;
-
- atomic_inc(&useCount);
-
-exit_on_error:
- /* unlock this device */
- up(&card->sem);
-
-exit_no_device:
- up(&disconnect_sem);
-
- return retval;
-}
-
-static unsigned int cpcusb_poll(struct file *file, poll_table * wait)
-{
- CPC_USB_T *card = (CPC_USB_T *) file->private_data;
- unsigned int retval = 0;
-
- if (!card) {
- err("%s - device object lost", __func__);
- return -EIO;
- }
-
- poll_wait(file, card->chan->CPCWait_q, wait);
-
- if (IsBufferNotEmpty(card->chan) || !(card->present))
- retval |= (POLLIN | POLLRDNORM);
-
- if (card->free_slots)
- retval |= (POLLOUT | POLLWRNORM);
-
- return retval;
-}
-
-static int cpcusb_release(struct inode *inode, struct file *file)
-{
- CPC_USB_T *card = (CPC_USB_T *) file->private_data;
- int j, retval = 0;
-
- if (card == NULL) {
- dbg("%s - object is NULL", __func__);
- return CPC_ERR_NO_INTERFACE_PRESENT;
- }
-
- /* lock our device */
- down(&card->sem);
-
- if (!card->open) {
- dbg("%s - device not opened", __func__);
- retval = CPC_ERR_NO_INTERFACE_PRESENT;
- goto exit_not_opened;
- }
-
- /* if device wasn't unplugged kill all urbs */
- if (card->present) {
- /* kill read urbs */
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- usb_kill_urb(card->urbs[j].urb);
- }
-
- /* kill irq urb */
- usb_kill_urb(card->intr_in_urb);
-
- /* kill write urbs */
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- if (atomic_read(&card->wrUrbs[j].busy)) {
- usb_kill_urb(card->wrUrbs[j].urb);
- wait_for_completion(&card->wrUrbs[j].finished);
- }
- }
- }
-
- atomic_dec(&useCount);
-
- /* last process detached */
- if (atomic_read(&useCount) == 0) {
- wake_up(&rmmodWq);
- }
-
- if (!card->present && card->open) {
- /* the device was unplugged before the file was released */
- up(&card->sem);
- cpcusb_delete(card);
- return 0;
- }
-
- card->open = 0;
-
-exit_not_opened:
- up(&card->sem);
-
- return 0;
-}
-
-static ssize_t cpcusb_read(struct file *file, char *buffer, size_t count,
- loff_t *ppos)
-{
- CPC_USB_T *card = (CPC_USB_T *) file->private_data;
- CPC_CHAN_T *chan;
- int retval = 0;
-
- if (count < sizeof(CPC_MSG_T))
- return CPC_ERR_UNKNOWN;
-
- /* check if can read from the given address */
- if (!access_ok(VERIFY_WRITE, buffer, count))
- return CPC_ERR_UNKNOWN;
-
- /* lock this object */
- down(&card->sem);
-
- /* verify that the device wasn't unplugged */
- if (!card->present) {
- up(&card->sem);
- return CPC_ERR_NO_INTERFACE_PRESENT;
- }
-
- if (IsBufferEmpty(card->chan)) {
- retval = 0;
- } else {
- chan = card->chan;
-
-#if 0
- /* convert LPC2119 params back to SJA1000 params */
- if (card->deviceRevision >= 0x0200
- && chan->buf[chan->oidx].type == CPC_MSG_T_CAN_PRMS) {
- LPC2119_TO_SJA1000_Params(&chan->buf[chan->oidx]);
- }
-#endif
-
- if (copy_to_user(buffer, &chan->buf[chan->oidx], count) != 0) {
- retval = CPC_ERR_IO_TRANSFER;
- } else {
- chan->oidx = (chan->oidx + 1) % CPC_MSG_BUF_CNT;
- chan->WnR = 1;
- retval = sizeof(CPC_MSG_T);
- }
- }
-/* spin_unlock_irqrestore(&card->slock, flags); */
-
- /* unlock the device */
- up(&card->sem);
-
- return retval;
-}
-
-#define SHIFT 1
-static inline void cpcusb_align_buffer_alignment(unsigned char *buf)
-{
- /* CPC-USB uploads packed bytes. */
- CPC_MSG_T *cpc = (CPC_MSG_T *) buf;
- unsigned int i;
-
- for (i = 0; i < cpc->length + (2 * sizeof(unsigned long)); i++) {
- ((unsigned char *) &cpc->msgid)[1 + i] =
- ((unsigned char *) &cpc->msgid)[1 + SHIFT + i];
- }
-}
-
-static int cpc_get_buffer_count(CPC_CHAN_T *chan)
-{
- /* check the buffer parameters */
- if (chan->iidx == chan->oidx)
- return !chan->WnR ? CPC_MSG_BUF_CNT : 0;
- else if (chan->iidx >= chan->oidx)
- return (chan->iidx - chan->oidx) % CPC_MSG_BUF_CNT;
-
- return (chan->iidx + CPC_MSG_BUF_CNT - chan->oidx) % CPC_MSG_BUF_CNT;
-}
-
-static ssize_t cpcusb_write(struct file *file, const char *buffer,
- size_t count, loff_t *ppos)
-{
- CPC_USB_T *card = (CPC_USB_T *) file->private_data;
- CPC_USB_WRITE_URB_T *wrUrb = NULL;
-
- ssize_t bytes_written = 0;
- int retval = 0;
- int j;
-
- unsigned char *obuf = NULL;
- unsigned char type = 0;
- CPC_MSG_T *info = NULL;
-
- dbg("%s - entered minor %d, count = %zu, present = %d",
- __func__, card->minor, count, card->present);
-
- if (count > sizeof(CPC_MSG_T))
- return CPC_ERR_UNKNOWN;
-
- /* check if can read from the given address */
- if (!access_ok(VERIFY_READ, buffer, count))
- return CPC_ERR_UNKNOWN;
-
- /* lock this object */
- down(&card->sem);
-
- /* verify that the device wasn't unplugged */
- if (!card->present) {
- retval = CPC_ERR_NO_INTERFACE_PRESENT;
- goto exit;
- }
-
- /* verify that we actually have some data to write */
- if (count == 0) {
- dbg("%s - write request of 0 bytes", __func__);
- goto exit;
- }
-
- if (card->free_slots <= 5) {
- info = (CPC_MSG_T *) buffer;
-
- if (info->type != CPC_CMD_T_CLEAR_CMD_QUEUE
- || card->free_slots <= 0) {
- dbg("%s - send buffer full please try again %d",
- __func__, card->free_slots);
- retval = CPC_ERR_CAN_NO_TRANSMIT_BUF;
- goto exit;
- }
- }
-
- /* Find a free write urb */
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- if (!atomic_read(&card->wrUrbs[j].busy)) {
- wrUrb = &card->wrUrbs[j]; /* remember found URB */
- atomic_set(&wrUrb->busy, 1); /* lock this URB */
- init_completion(&wrUrb->finished); /* init completion */
- dbg("WR URB no. %d started", j);
- break;
- }
- }
-
- /* don't found write urb say error */
- if (!wrUrb) {
- dbg("%s - no free send urb available", __func__);
- retval = CPC_ERR_CAN_NO_TRANSMIT_BUF;
- goto exit;
- }
- dbg("URB write req");
-
- obuf = (unsigned char *) wrUrb->urb->transfer_buffer;
-
- /* copy the data from userspace into our transfer buffer;
- * this is the only copy required.
- */
- if (copy_from_user(&obuf[4], buffer, count) != 0) {
- atomic_set(&wrUrb->busy, 0); /* release urb */
- retval = CPC_ERR_IO_TRANSFER;
- goto exit;
- }
-
- /* check if it is a DRIVER information message, so we can
- * response to that message and not the USB
- */
- info = (CPC_MSG_T *) &obuf[4];
-
- bytes_written = 11 + info->length;
- if (bytes_written >= wrUrb->size) {
- retval = CPC_ERR_IO_TRANSFER;
- goto exit;
- }
-
- switch (info->type) {
- case CPC_CMD_T_CLEAR_MSG_QUEUE:
- ResetBuffer(card->chan);
- break;
-
- case CPC_CMD_T_INQ_MSG_QUEUE_CNT:
- retval = cpc_get_buffer_count(card->chan);
- atomic_set(&wrUrb->busy, 0);
-
- goto exit;
-
- case CPC_CMD_T_INQ_INFO:
- if (info->msg.info.source == CPC_INFOMSG_T_DRIVER) {
- /* release urb cause we'll use it for driver
- * information
- */
- atomic_set(&wrUrb->busy, 0);
- if (IsBufferFull(card->chan)) {
- retval = CPC_ERR_IO_TRANSFER;
- goto exit;
- }
-
- /* it is a driver information request message and we have
- * free rx slots to store the response
- */
- type = info->msg.info.type;
- info = &card->chan->buf[card->chan->iidx];
-
- info->type = CPC_MSG_T_INFO;
- info->msg.info.source = CPC_INFOMSG_T_DRIVER;
- info->msg.info.type = type;
-
- switch (type) {
- case CPC_INFOMSG_T_VERSION:
- info->length = strlen(CPC_DRIVER_VERSION) + 2;
- sprintf(info->msg.info.msg, "%s\n",
- CPC_DRIVER_VERSION);
- break;
-
- case CPC_INFOMSG_T_SERIAL:
- info->length = strlen(CPC_DRIVER_SERIAL) + 2;
- sprintf(info->msg.info.msg, "%s\n",
- CPC_DRIVER_SERIAL);
- break;
-
- default:
- info->length = 2;
- info->msg.info.type =
- CPC_INFOMSG_T_UNKNOWN_TYPE;
- }
-
- card->chan->WnR = 0;
- card->chan->iidx =
- (card->chan->iidx + 1) % CPC_MSG_BUF_CNT;
-
- retval = info->length;
- goto exit;
- }
- break;
- case CPC_CMD_T_CAN_PRMS:
- /* Check the controller type. If it's the new CPC-USB, make sure if these are SJA1000 params */
- if (info->msg.canparams.cc_type != SJA1000
- && info->msg.canparams.cc_type != M16C_BASIC
- && (card->productId == USB_CPCUSB_LPC2119_PRODUCT_ID
- && info->msg.canparams.cc_type != SJA1000)) {
- /* don't forget to release the urb */
- atomic_set(&wrUrb->busy, 0);
- retval = CPC_ERR_WRONG_CONTROLLER_TYPE;
- goto exit;
- }
- break;
- }
-
- /* just convert the params if it is an old CPC-USB with M16C controller */
- if (card->productId == USB_CPCUSB_M16C_PRODUCT_ID) {
- /* if it is a parameter message convert it from SJA1000 controller
- * settings to M16C Basic controller settings
- */
- SJA1000_TO_M16C_BASIC_Params((CPC_MSG_T *) &obuf[4]);
- }
-
- /* don't forget the byte alignment */
- cpcusb_align_buffer_alignment(&obuf[4]);
-
- /* setup a the 4 byte header */
- obuf[0] = obuf[1] = obuf[2] = obuf[3] = 0;
-
- /* this urb was already set up, except for this write size */
- wrUrb->urb->transfer_buffer_length = bytes_written + 4;
-
- /* send the data out the bulk port */
- /* a character device write uses GFP_KERNEL,
- unless a spinlock is held */
- retval = usb_submit_urb(wrUrb->urb, GFP_KERNEL);
- if (retval) {
- atomic_set(&wrUrb->busy, 0); /* release urb */
- err("%s - failed submitting write urb, error %d",
- __func__, retval);
- } else {
- retval = bytes_written;
- }
-
-exit:
- /* unlock the device */
- up(&card->sem);
-
- dbg("%s - leaved", __func__);
-
- return retval;
-}
-
-/*
- * callback for interrupt IN urb
- */
-static void cpcusb_read_interrupt_callback(struct urb *urb)
-{
- CPC_USB_T *card = (CPC_USB_T *) urb->context;
- int retval;
- unsigned long flags;
-
- spin_lock_irqsave(&card->slock, flags);
-
- if (!card->present) {
- spin_unlock_irqrestore(&card->slock, flags);
- info("%s - no such device", __func__);
- return;
- }
-
- switch (urb->status) {
- case 0: /* success */
- card->free_slots = card->intr_in_buffer[1];
- break;
- case -ECONNRESET:
- case -ENOENT:
- case -ESHUTDOWN:
- /* urb was killed */
- spin_unlock_irqrestore(&card->slock, flags);
- dbg("%s - intr urb killed", __func__);
- return;
- default:
- info("%s - nonzero urb status %d", __func__, urb->status);
- break;
- }
-
- retval = usb_submit_urb(urb, GFP_ATOMIC);
- if (retval) {
- err("%s - failed resubmitting intr urb, error %d",
- __func__, retval);
- }
-
- spin_unlock_irqrestore(&card->slock, flags);
- wake_up_interruptible(card->chan->CPCWait_q);
-
- return;
-}
-
-#define UN_SHIFT 1
-#define CPCMSG_HEADER_LEN_FIRMWARE 11
-static inline int cpcusb_unalign_and_copy_buffy(unsigned char *out,
- unsigned char *in)
-{
- unsigned int i, j;
-
- for (i = 0; i < 3; i++)
- out[i] = in[i];
-
- for (j = 0; j < (in[1] + (CPCMSG_HEADER_LEN_FIRMWARE - 3)); j++)
- out[j + i + UN_SHIFT] = in[j + i];
-
- return i + j;
-}
-
-/*
- * callback for bulk IN urb
- */
-static void cpcusb_read_bulk_callback(struct urb *urb)
-{
- CPC_USB_T *card = (CPC_USB_T *) urb->context;
- CPC_CHAN_T *chan;
- unsigned char *ibuf = urb->transfer_buffer;
- int retval, msgCnt, start, again = 0;
- unsigned long flags;
-
- if (!card) {
- err("%s - device object lost", __func__);
- return;
- }
-
- spin_lock_irqsave(&card->slock, flags);
-
- if (!card->present) {
- spin_unlock_irqrestore(&card->slock, flags);
- info("%s - no such device", __func__);
- return;
- }
-
- switch (urb->status) {
- case 0: /* success */
- break;
- case -ECONNRESET:
- case -ENOENT:
- case -ESHUTDOWN:
- /* urb was killed */
- spin_unlock_irqrestore(&card->slock, flags);
- dbg("%s - read urb killed", __func__);
- return;
- default:
- info("%s - nonzero urb status %d", __func__, urb->status);
- break;
- }
-
- if (urb->actual_length) {
- msgCnt = ibuf[0] & ~0x80;
- again = ibuf[0] & 0x80;
-
- /* we have a 4 byte header */
- start = 4;
- chan = card->chan;
- while (msgCnt) {
- if (!(IsBufferFull(card->chan))) {
- start +=
- cpcusb_unalign_and_copy_buffy((unsigned char *)
- &chan->buf[chan->iidx], &ibuf[start]);
-
- if (start > urb->transfer_buffer_length) {
- err("%d > %d", start, urb->transfer_buffer_length);
- break;
- }
-
- chan->WnR = 0;
- chan->iidx = (chan->iidx + 1) % CPC_MSG_BUF_CNT;
- msgCnt--;
- } else {
- break;
- }
- }
- }
-
- usb_fill_bulk_urb(urb, card->udev,
- usb_rcvbulkpipe(card->udev, card->num_bulk_in),
- urb->transfer_buffer,
- urb->transfer_buffer_length,
- cpcusb_read_bulk_callback, card);
-
- retval = usb_submit_urb(urb, GFP_ATOMIC);
-
- if (retval) {
- err("%s - failed resubmitting read urb, error %d", __func__, retval);
- }
-
- spin_unlock_irqrestore(&card->slock, flags);
-
- wake_up_interruptible(card->chan->CPCWait_q);
-}
-
-/*
- * callback for bulk IN urb
- */
-static void cpcusb_write_bulk_callback(struct urb *urb)
-{
- CPC_USB_T *card = (CPC_USB_T *) urb->context;
- unsigned long flags;
- int j;
-
- spin_lock_irqsave(&card->slock, flags);
-
- /* find this urb */
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- if (card->wrUrbs[j].urb == urb) {
- dbg("URB found no. %d", j);
- /* notify anyone waiting that the write has finished */
- complete(&card->wrUrbs[j].finished);
- atomic_set(&card->wrUrbs[j].busy, 0);
- break;
- }
- }
-
- switch (urb->status) {
- case 0: /* success */
- break;
- case -ECONNRESET:
- case -ENOENT:
- case -ESHUTDOWN:
- /* urb was killed */
- spin_unlock_irqrestore(&card->slock, flags);
- dbg("%s - write urb no. %d killed", __func__, j);
- return;
- default:
- info("%s - nonzero urb status %d", __func__, urb->status);
- break;
- }
-
- spin_unlock_irqrestore(&card->slock, flags);
-
- wake_up_interruptible(card->chan->CPCWait_q);
-}
-
-static inline int cpcusb_get_free_slot(void)
-{
- int i;
-
- for (i = 0; i < CPC_USB_CARD_CNT; i++) {
- if (!CPCUSB_Table[i])
- return i;
- }
-
- return -1;
-}
-
-/*
- * probe function for new CPC-USB devices
- */
-static int cpcusb_probe(struct usb_interface *interface,
- const struct usb_device_id *id)
-{
- CPC_USB_T *card = NULL;
- CPC_CHAN_T *chan = NULL;
-
- struct usb_device *udev = interface_to_usbdev(interface);
- struct usb_host_interface *iface_desc;
- struct usb_endpoint_descriptor *endpoint;
-
- int i, j, retval = -ENOMEM, slot;
-
- slot = cpcusb_get_free_slot();
- if (slot < 0) {
- info("No more devices supported");
- return -ENOMEM;
- }
-
- /* allocate memory for our device state and initialize it */
- card = kzalloc(sizeof(CPC_USB_T), GFP_KERNEL);
- if (!card) {
- err("Out of memory");
- return -ENOMEM;
- }
- CPCUSB_Table[slot] = card;
-
- /* allocate and initialize the channel struct */
- card->chan = kmalloc(sizeof(CPC_CHAN_T), GFP_KERNEL);
- if (!card->chan) {
- kfree(card);
- err("Out of memory");
- return -ENOMEM;
- }
-
- chan = card->chan;
- memset(chan, 0, sizeof(CPC_CHAN_T));
- ResetBuffer(chan);
-
- init_MUTEX(&card->sem);
- spin_lock_init(&card->slock);
-
- card->udev = udev;
- card->interface = interface;
- if (udev->descriptor.iSerialNumber) {
- usb_string(udev, udev->descriptor.iSerialNumber, card->serialNumber,
- 128);
- info("Serial %s", card->serialNumber);
- }
-
- card->productId = udev->descriptor.idProduct;
- info("Product %s",
- card->productId == USB_CPCUSB_LPC2119_PRODUCT_ID ?
- "CPC-USB/ARM7" : "CPC-USB/M16C");
-
- /* set up the endpoint information */
- /* check out the endpoints */
- /* use only the first bulk-in and bulk-out endpoints */
- iface_desc = &interface->altsetting[0];
- for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
- endpoint = &iface_desc->endpoint[i].desc;
-
- if (!card->num_intr_in &&
- (endpoint->bEndpointAddress & USB_DIR_IN) &&
- ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
- == USB_ENDPOINT_XFER_INT)) {
- card->intr_in_urb = usb_alloc_urb(0, GFP_KERNEL);
- card->num_intr_in = 1;
-
- if (!card->intr_in_urb) {
- err("No free urbs available");
- goto error;
- }
-
- dbg("intr_in urb %d", card->num_intr_in);
- }
-
- if (!card->num_bulk_in &&
- (endpoint->bEndpointAddress & USB_DIR_IN) &&
- ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
- == USB_ENDPOINT_XFER_BULK)) {
- card->num_bulk_in = 2;
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- card->urbs[j].size = endpoint->wMaxPacketSize;
- card->urbs[j].urb = usb_alloc_urb(0, GFP_KERNEL);
- if (!card->urbs[j].urb) {
- err("No free urbs available");
- goto error;
- }
- card->urbs[j].buffer =
- usb_buffer_alloc(udev,
- card->urbs[j].size,
- GFP_KERNEL,
- &card->urbs[j].urb->transfer_dma);
- if (!card->urbs[j].buffer) {
- err("Couldn't allocate bulk_in_buffer");
- goto error;
- }
- }
- info("%s - %d reading URB's allocated",
- __func__, CPC_USB_URB_CNT);
- }
-
- if (!card->num_bulk_out &&
- !(endpoint->bEndpointAddress & USB_DIR_IN) &&
- ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
- == USB_ENDPOINT_XFER_BULK)) {
-
- card->num_bulk_out = 2;
-
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- card->wrUrbs[j].size =
- endpoint->wMaxPacketSize;
- card->wrUrbs[j].urb =
- usb_alloc_urb(0, GFP_KERNEL);
- if (!card->wrUrbs[j].urb) {
- err("No free urbs available");
- goto error;
- }
- card->wrUrbs[j].buffer = usb_buffer_alloc(udev,
- card->wrUrbs[j].size, GFP_KERNEL,
- &card->wrUrbs[j].urb->transfer_dma);
-
- if (!card->wrUrbs[j].buffer) {
- err("Couldn't allocate bulk_out_buffer");
- goto error;
- }
-
- usb_fill_bulk_urb(card->wrUrbs[j].urb, udev,
- usb_sndbulkpipe(udev, endpoint->bEndpointAddress),
- card->wrUrbs[j].buffer,
- card->wrUrbs[j].size,
- cpcusb_write_bulk_callback,
- card);
- }
-
- info("%s - %d writing URB's allocated", __func__, CPC_USB_URB_CNT);
- }
- }
-
- if (!(card->num_bulk_in && card->num_bulk_out)) {
- err("Couldn't find both bulk-in and bulk-out endpoints");
- goto error;
- }
-
- /* allow device read, write and ioctl */
- card->present = 1;
-
- /* we can register the device now, as it is ready */
- usb_set_intfdata(interface, card);
- retval = usb_register_dev(interface, &cpcusb_class);
-
- if (retval) {
- /* something prevented us from registering this driver */
- err("Not able to get a minor for this device.");
- usb_set_intfdata(interface, NULL);
- goto error;
- }
-
- card->chan->minor = card->minor = interface->minor;
-
- chan->buf = vmalloc(sizeof(CPC_MSG_T) * CPC_MSG_BUF_CNT);
- if (chan->buf == NULL) {
- err("Out of memory");
- retval = -ENOMEM;
- goto error;
- }
- info("Allocated memory for %d messages (%lu kbytes)",
- CPC_MSG_BUF_CNT, (long unsigned int)(sizeof(CPC_MSG_T) * CPC_MSG_BUF_CNT) / 1000);
- memset(chan->buf, 0, sizeof(CPC_MSG_T) * CPC_MSG_BUF_CNT);
-
- ResetBuffer(chan);
-
- card->chan->CPCWait_q = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
- if (!card->chan->CPCWait_q) {
- err("Out of memory");
- retval = -ENOMEM;
- goto error;
- }
- init_waitqueue_head(card->chan->CPCWait_q);
-
- CPCUSB_Table[slot] = card;
- card->idx = slot;
- CPCUsbCnt++;
-
- /* let the user know what node this device is now attached to */
- info("Device now attached to USB-%d", card->minor);
- return 0;
-
-error:
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- if (card->urbs[j].buffer) {
- usb_buffer_free(card->udev, card->urbs[j].size,
- card->urbs[j].buffer,
- card->urbs[j].urb->transfer_dma);
- card->urbs[j].buffer = NULL;
- }
- if (card->urbs[j].urb) {
- usb_free_urb(card->urbs[j].urb);
- card->urbs[j].urb = NULL;
- }
- }
-
- cpcusb_delete(card);
- return retval;
-}
-
-/*
- * called by the usb core when the device is removed from the system
- */
-static void cpcusb_disconnect(struct usb_interface *interface)
-{
- CPC_USB_T *card = NULL;
- int minor, j;
-
- /* prevent races with open() */
- down(&disconnect_sem);
-
- card = usb_get_intfdata(interface);
- usb_set_intfdata(interface, NULL);
-
- down(&card->sem);
-
- /* prevent device read, write and ioctl */
- card->present = 0;
-
- minor = card->minor;
-
- /* free all urbs and their buffers */
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- /* terminate an ongoing write */
- if (atomic_read(&card->wrUrbs[j].busy)) {
- usb_kill_urb(card->wrUrbs[j].urb);
- wait_for_completion(&card->wrUrbs[j].finished);
- }
- usb_buffer_free(card->udev, card->wrUrbs[j].size,
- card->wrUrbs[j].buffer,
- card->wrUrbs[j].urb->transfer_dma);
- usb_free_urb(card->wrUrbs[j].urb);
- }
- info("%d write URBs freed", CPC_USB_URB_CNT);
-
- /* free all urbs and their buffers */
- for (j = 0; j < CPC_USB_URB_CNT; j++) {
- usb_buffer_free(card->udev, card->urbs[j].size,
- card->urbs[j].buffer,
- card->urbs[j].urb->transfer_dma);
- usb_free_urb(card->urbs[j].urb);
- }
- info("%d read URBs freed", CPC_USB_URB_CNT);
- usb_free_urb(card->intr_in_urb);
-
- /* give back our minor */
- usb_deregister_dev(interface, &cpcusb_class);
-
- up(&card->sem);
-
- /* if the device is opened, cpcusb_release will clean this up */
- if (!card->open)
- cpcusb_delete(card);
- else
- wake_up_interruptible(card->chan->CPCWait_q);
-
- up(&disconnect_sem);
-
- CPCUsbCnt--;
- info("USB-%d now disconnected", minor);
-}
-
-static int __init CPCUsb_Init(void)
-{
- int result, i;
-
- info(DRIVER_DESC " v" DRIVER_VERSION);
- info("Build on " __DATE__ " at " __TIME__);
-
- for (i = 0; i < CPC_USB_CARD_CNT; i++)
- CPCUSB_Table[i] = 0;
-
- /* register this driver with the USB subsystem */
- result = usb_register(&cpcusb_driver);
- if (result) {
- err("usb_register failed. Error number %d", result);
- return result;
- }
-
- procDir = proc_mkdir(CPC_USB_PROC_DIR, NULL);
- if (!procDir) {
- err("Could not create proc entry");
- } else {
- procEntry = create_proc_read_entry("info", 0444, procDir,
- cpcusb_proc_read_info,
- NULL);
- if (!procEntry) {
- err("Could not create proc entry %s", CPC_USB_PROC_DIR "/info");
- remove_proc_entry(CPC_USB_PROC_DIR, NULL);
- procDir = NULL;
- }
- }
-
- return 0;
-}
-
-static void __exit CPCUsb_Exit(void)
-{
- wait_event(rmmodWq, !atomic_read(&useCount));
-
- /* deregister this driver with the USB subsystem */
- usb_deregister(&cpcusb_driver);
-
- if (procDir) {
- if (procEntry)
- remove_proc_entry("info", procDir);
- remove_proc_entry(CPC_USB_PROC_DIR, NULL);
- }
-}
-
-module_init(CPCUsb_Init);
-module_exit(CPCUsb_Exit);
+++ /dev/null
-/*
- * CPC CAN Interface Definitions
- *
- * Copyright (C) 2000-2008 EMS Dr. Thomas Wuensche
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-#ifndef CPC_HEADER
-#define CPC_HEADER
-
-/*
- * the maximum length of the union members within a CPC_MSG
- * this value can be defined by the customer, but has to be
- * >= 64 bytes
- * however, if not defined before, we set a length of 64 byte
- */
-#if !defined(CPC_MSG_LEN) || (CPC_MSG_LEN < 64)
-#undef CPC_MSG_LEN
-#define CPC_MSG_LEN 64
-#endif
-
-/*
- * Transmission of events from CPC interfaces to PC can be individually
- * controlled per event type. Default state is: don't transmit
- * Control values are constructed by bit-or of Subject and Action
- * and passed to CPC_Control()
- */
-
-/* Control-Values for CPC_Control() Command Subject Selection */
-#define CONTR_CAN_Message 0x04
-#define CONTR_Busload 0x08
-#define CONTR_CAN_State 0x0C
-#define CONTR_SendAck 0x10
-#define CONTR_Filter 0x14
-#define CONTR_CmdQueue 0x18 /* reserved, do not use */
-#define CONTR_BusError 0x1C
-
-/* Control Command Actions */
-#define CONTR_CONT_OFF 0
-#define CONTR_CONT_ON 1
-#define CONTR_SING_ON 2
-/*
- * CONTR_SING_ON doesn't change CONTR_CONT_ON state, so it should be
- * read as: transmit at least once
- */
-
-/* defines for confirmed request */
-#define DO_NOT_CONFIRM 0
-#define DO_CONFIRM 1
-
-/* event flags */
-#define EVENT_READ 0x01
-#define EVENT_WRITE 0x02
-
-/*
- * Messages from CPC to PC contain a message object type field.
- * The following message types are sent by CPC and can be used in
- * handlers, others should be ignored.
- */
-#define CPC_MSG_T_RESYNC 0 /* Normally to be ignored */
-#define CPC_MSG_T_CAN 1 /* CAN data frame */
-#define CPC_MSG_T_BUSLOAD 2 /* Busload message */
-#define CPC_MSG_T_STRING 3 /* Normally to be ignored */
-#define CPC_MSG_T_CONTI 4 /* Normally to be ignored */
-#define CPC_MSG_T_MEM 7 /* Normally not to be handled */
-#define CPC_MSG_T_RTR 8 /* CAN remote frame */
-#define CPC_MSG_T_TXACK 9 /* Send acknowledge */
-#define CPC_MSG_T_POWERUP 10 /* Power-up message */
-#define CPC_MSG_T_CMD_NO 11 /* Normally to be ignored */
-#define CPC_MSG_T_CAN_PRMS 12 /* Actual CAN parameters */
-#define CPC_MSG_T_ABORTED 13 /* Command aborted message */
-#define CPC_MSG_T_CANSTATE 14 /* CAN state message */
-#define CPC_MSG_T_RESET 15 /* used to reset CAN-Controller */
-#define CPC_MSG_T_XCAN 16 /* XCAN data frame */
-#define CPC_MSG_T_XRTR 17 /* XCAN remote frame */
-#define CPC_MSG_T_INFO 18 /* information strings */
-#define CPC_MSG_T_CONTROL 19 /* used for control of interface/driver behaviour */
-#define CPC_MSG_T_CONFIRM 20 /* response type for confirmed requests */
-#define CPC_MSG_T_OVERRUN 21 /* response type for overrun conditions */
-#define CPC_MSG_T_KEEPALIVE 22 /* response type for keep alive conditions */
-#define CPC_MSG_T_CANERROR 23 /* response type for bus error conditions */
-#define CPC_MSG_T_DISCONNECTED 24 /* response type for a disconnected interface */
-#define CPC_MSG_T_ERR_COUNTER 25 /* RX/TX error counter of CAN controller */
-
-#define CPC_MSG_T_FIRMWARE 100 /* response type for USB firmware download */
-
-/*
- * Messages from the PC to the CPC interface contain a command field
- * Most of the command types are wrapped by the library functions and have therefore
- * normally not to be used.
- * However, programmers who wish to circumvent the library and talk directly
- * to the drivers (mainly Linux programmers) can use the following
- * command types:
- */
-#define CPC_CMD_T_CAN 1 /* CAN data frame */
-#define CPC_CMD_T_CONTROL 3 /* used for control of interface/driver behaviour */
-#define CPC_CMD_T_CAN_PRMS 6 /* set CAN parameters */
-#define CPC_CMD_T_CLEARBUF 8 /* clears input queue; this is depricated, use CPC_CMD_T_CLEAR_MSG_QUEUE instead */
-#define CPC_CMD_T_INQ_CAN_PARMS 11 /* inquire actual CAN parameters */
-#define CPC_CMD_T_FILTER_PRMS 12 /* set filter parameter */
-#define CPC_CMD_T_RTR 13 /* CAN remote frame */
-#define CPC_CMD_T_CANSTATE 14 /* CAN state message */
-#define CPC_CMD_T_XCAN 15 /* XCAN data frame */
-#define CPC_CMD_T_XRTR 16 /* XCAN remote frame */
-#define CPC_CMD_T_RESET 17 /* used to reset CAN-Controller */
-#define CPC_CMD_T_INQ_INFO 18 /* miscellanous information strings */
-#define CPC_CMD_T_OPEN_CHAN 19 /* open a channel */
-#define CPC_CMD_T_CLOSE_CHAN 20 /* close a channel */
-#define CPC_CMD_T_CNTBUF 21 /* this is depricated, use CPC_CMD_T_INQ_MSG_QUEUE_CNT instead */
-#define CPC_CMD_T_CAN_EXIT 200 /* exit the CAN (disable interrupts; reset bootrate; reset output_cntr; mode = 1) */
-
-#define CPC_CMD_T_INQ_MSG_QUEUE_CNT CPC_CMD_T_CNTBUF /* inquires the count of elements in the message queue */
-#define CPC_CMD_T_INQ_ERR_COUNTER 25 /* request the CAN controllers error counter */
-#define CPC_CMD_T_CLEAR_MSG_QUEUE CPC_CMD_T_CLEARBUF /* clear CPC_MSG queue */
-#define CPC_CMD_T_CLEAR_CMD_QUEUE 28 /* clear CPC_CMD queue */
-#define CPC_CMD_T_FIRMWARE 100 /* reserved, must not be used */
-#define CPC_CMD_T_USB_RESET 101 /* reserved, must not be used */
-#define CPC_CMD_T_WAIT_NOTIFY 102 /* reserved, must not be used */
-#define CPC_CMD_T_WAIT_SETUP 103 /* reserved, must not be used */
-#define CPC_CMD_T_ABORT 255 /* Normally not to be used */
-
-/* definitions for CPC_MSG_T_INFO information sources */
-#define CPC_INFOMSG_T_UNKNOWN_SOURCE 0
-#define CPC_INFOMSG_T_INTERFACE 1
-#define CPC_INFOMSG_T_DRIVER 2
-#define CPC_INFOMSG_T_LIBRARY 3
-
-/* information types */
-#define CPC_INFOMSG_T_UNKNOWN_TYPE 0
-#define CPC_INFOMSG_T_VERSION 1
-#define CPC_INFOMSG_T_SERIAL 2
-
-/* definitions for controller types */
-#define PCA82C200 1 /* Philips basic CAN controller, replaced by SJA1000 */
-#define SJA1000 2 /* Philips basic CAN controller */
-#define AN82527 3 /* Intel full CAN controller */
-#define M16C_BASIC 4 /* M16C controller running in basic CAN (not full CAN) mode */
-
-/* channel open error codes */
-#define CPC_ERR_NO_FREE_CHANNEL -1 /* no more free space within the channel array */
-#define CPC_ERR_CHANNEL_ALREADY_OPEN -2 /* the channel is already open */
-#define CPC_ERR_CHANNEL_NOT_ACTIVE -3 /* access to a channel not active failed */
-#define CPC_ERR_NO_DRIVER_PRESENT -4 /* no driver at the location searched by the library */
-#define CPC_ERR_NO_INIFILE_PRESENT -5 /* the library could not find the inifile */
-#define CPC_ERR_WRONG_PARAMETERS -6 /* wrong parameters in the inifile */
-#define CPC_ERR_NO_INTERFACE_PRESENT -7 /* 1. The specified interface is not connected */
- /* 2. The interface (mostly CPC-USB) was disconnected upon operation */
-#define CPC_ERR_NO_MATCHING_CHANNEL -8 /* the driver couldn't find a matching channel */
-#define CPC_ERR_NO_BUFFER_AVAILABLE -9 /* the driver couldn't allocate buffer for messages */
-#define CPC_ERR_NO_INTERRUPT -10 /* the requested interrupt couldn't be claimed */
-#define CPC_ERR_NO_MATCHING_INTERFACE -11 /* no interface type related to this channel was found */
-#define CPC_ERR_NO_RESOURCES -12 /* the requested resources could not be claimed */
-#define CPC_ERR_SOCKET -13 /* error concerning TCP sockets */
-
-/* init error codes */
-#define CPC_ERR_WRONG_CONTROLLER_TYPE -14 /* wrong CAN controller type within initialization */
-#define CPC_ERR_NO_RESET_MODE -15 /* the controller could not be set into reset mode */
-#define CPC_ERR_NO_CAN_ACCESS -16 /* the CAN controller could not be accessed */
-
-/* transmit error codes */
-#define CPC_ERR_CAN_WRONG_ID -20 /* the provided CAN id is too big */
-#define CPC_ERR_CAN_WRONG_LENGTH -21 /* the provided CAN length is too long */
-#define CPC_ERR_CAN_NO_TRANSMIT_BUF -22 /* the transmit buffer was occupied */
-#define CPC_ERR_CAN_TRANSMIT_TIMEOUT -23 /* The message could not be sent within a */
- /* specified time */
-
-/* other error codes */
-#define CPC_ERR_SERVICE_NOT_SUPPORTED -30 /* the requested service is not supported by the interface */
-#define CPC_ERR_IO_TRANSFER -31 /* a transmission error down to the driver occurred */
-#define CPC_ERR_TRANSMISSION_FAILED -32 /* a transmission error down to the interface occurred */
-#define CPC_ERR_TRANSMISSION_TIMEOUT -33 /* a timeout occurred within transmission to the interface */
-#define CPC_ERR_OP_SYS_NOT_SUPPORTED -35 /* the operating system is not supported */
-#define CPC_ERR_UNKNOWN -40 /* an unknown error ocurred (mostly IOCTL errors) */
-
-#define CPC_ERR_LOADING_DLL -50 /* the library 'cpcwin.dll' could not be loaded */
-#define CPC_ERR_ASSIGNING_FUNCTION -51 /* the specified function could not be assigned */
-#define CPC_ERR_DLL_INITIALIZATION -52 /* the DLL was not initialized correctly */
-#define CPC_ERR_MISSING_LICFILE -55 /* the file containing the licenses does not exist */
-#define CPC_ERR_MISSING_LICENSE -56 /* a required license was not found */
-
-/* CAN state bit values. Ignore any bits not listed */
-#define CPC_CAN_STATE_BUSOFF 0x80
-#define CPC_CAN_STATE_ERROR 0x40
-
-/* Mask to help ignore undefined bits */
-#define CPC_CAN_STATE_MASK 0xc0
-
-/*
- * CAN-Message representation in a CPC_MS
- * Message object type is CPC_MSG_T_CAN or CPC_MSG_T_RTR
- * or CPC_MSG_T_XCAN or CPC_MSG_T_XRTR
- */
-typedef struct CPC_CAN_MSG {
- u32 id;
- u8 length;
- u8 msg[8];
-} CPC_CAN_MSG_T;
-
-/* representation of the CAN parameters for the PCA82C200 controller */
-typedef struct CPC_PCA82C200_PARAMS {
- u8 acc_code; /* Acceptance-code for receive, Standard: 0 */
- u8 acc_mask; /* Acceptance-mask for receive, Standard: 0xff (everything) */
- u8 btr0; /* Bus-timing register 0 */
- u8 btr1; /* Bus-timing register 1 */
- u8 outp_contr; /* Output-control register */
-} CPC_PCA82C200_PARAMS_T;
-
-/* representation of the CAN parameters for the SJA1000 controller */
-typedef struct CPC_SJA1000_PARAMS {
- u8 mode; /* enables single or dual acceptance filtering */
- u8 acc_code0; /* Acceptance-code for receive, Standard: 0 */
- u8 acc_code1;
- u8 acc_code2;
- u8 acc_code3;
- u8 acc_mask0; /* Acceptance-mask for receive, Standard: 0xff (everything) */
- u8 acc_mask1;
- u8 acc_mask2;
- u8 acc_mask3;
- u8 btr0; /* Bus-timing register 0 */
- u8 btr1; /* Bus-timing register 1 */
- u8 outp_contr; /* Output-control register */
-} CPC_SJA1000_PARAMS_T;
-
-/*
- * representation of the CAN parameters for the M16C controller
- * in basic CAN mode (means no full CAN)
- */
-typedef struct CPC_M16C_BASIC_PARAMS {
- u8 con0;
- u8 con1;
- u8 ctlr0;
- u8 ctlr1;
- u8 clk;
- u8 acc_std_code0;
- u8 acc_std_code1;
- u8 acc_ext_code0;
- u8 acc_ext_code1;
- u8 acc_ext_code2;
- u8 acc_ext_code3;
- u8 acc_std_mask0;
- u8 acc_std_mask1;
- u8 acc_ext_mask0;
- u8 acc_ext_mask1;
- u8 acc_ext_mask2;
- u8 acc_ext_mask3;
-} CPC_M16C_BASIC_PARAMS_T;
-
-/* CAN params message representation */
-typedef struct CPC_CAN_PARAMS {
- u8 cc_type; /* represents the controller type */
- union {
- CPC_M16C_BASIC_PARAMS_T m16c_basic;
- CPC_SJA1000_PARAMS_T sja1000;
- CPC_PCA82C200_PARAMS_T pca82c200;
- } cc_params;
-} CPC_CAN_PARAMS_T;
-
-/* CHAN init params representation */
-typedef struct CPC_CHAN_PARAMS {
- int fd;
-} CPC_CHAN_PARAMS_T;
-
-/* CAN init params message representation */
-typedef struct CPC_INIT_PARAMS {
- CPC_CHAN_PARAMS_T chanparams;
- CPC_CAN_PARAMS_T canparams;
-} CPC_INIT_PARAMS_T;
-
-/* structure for confirmed message handling */
-typedef struct CPC_CONFIRM {
- u8 result; /* error code */
-} CPC_CONFIRM_T;
-
-/* structure for information requests */
-typedef struct CPC_INFO {
- u8 source; /* interface, driver or library */
- u8 type; /* version or serial number */
- char msg[CPC_MSG_LEN - 2]; /* string holding the requested information */
-} CPC_INFO_T;
-
-/*
- * OVERRUN
- * In general two types of overrun may occur.
- * A hardware overrun, where the CAN controller
- * lost a message, because the interrupt was
- * not handled before the next messgae comes in.
- * Or a software overrun, where i.e. a received
- * message could not be stored in the CPC_MSG
- * buffer.
- */
-
-/* After a software overrun has occurred
- * we wait until we have CPC_OVR_GAP slots
- * free in the CPC_MSG buffer.
- */
-#define CPC_OVR_GAP 10
-
-/*
- * Two types of software overrun may occur.
- * A received CAN message or a CAN state event
- * can cause an overrun.
- * Note: A CPC_CMD which would normally store
- * its result immediately in the CPC_MSG
- * queue may fail, because the message queue is full.
- * This will not generate an overrun message, but
- * will halt command execution, until this command
- * is able to store its message in the message queue.
- */
-#define CPC_OVR_EVENT_CAN 0x01
-#define CPC_OVR_EVENT_CANSTATE 0x02
-#define CPC_OVR_EVENT_BUSERROR 0x04
-
-/*
- * If the CAN controller lost a message
- * we indicate it with the highest bit
- * set in the count field.
- */
-#define CPC_OVR_HW 0x80
-
-/* structure for overrun conditions */
-typedef struct {
- u8 event;
- u8 count;
-} CPC_OVERRUN_T;
-
-/*
- * CAN errors
- * Each CAN controller type has different
- * registers to record errors.
- * Therefor a structure containing the specific
- * errors is set up for each controller here
- */
-
-/*
- * SJA1000 error structure
- * see the SJA1000 datasheet for detailed
- * explanation of the registers
- */
-typedef struct CPC_SJA1000_CAN_ERROR {
- u8 ecc; /* error capture code register */
- u8 rxerr; /* RX error counter register */
- u8 txerr; /* TX error counter register */
-} CPC_SJA1000_CAN_ERROR_T;
-
-/*
- * M16C error structure
- * see the M16C datasheet for detailed
- * explanation of the registers
- */
-typedef struct CPC_M16C_CAN_ERROR {
- u8 tbd; /* to be defined */
-} CPC_M16C_CAN_ERROR_T;
-
-/* structure for CAN error conditions */
-#define CPC_CAN_ECODE_ERRFRAME 0x01
-typedef struct CPC_CAN_ERROR {
- u8 ecode;
- struct {
- u8 cc_type; /* CAN controller type */
- union {
- CPC_SJA1000_CAN_ERROR_T sja1000;
- CPC_M16C_CAN_ERROR_T m16c;
- } regs;
- } cc;
-} CPC_CAN_ERROR_T;
-
-/*
- * Structure containing RX/TX error counter.
- * This structure is used to request the
- * values of the CAN controllers TX and RX
- * error counter.
- */
-typedef struct CPC_CAN_ERR_COUNTER {
- u8 rx;
- u8 tx;
-} CPC_CAN_ERR_COUNTER_T;
-
-/* If this flag is set, transmissions from PC to CPC are protected against loss */
-#define CPC_SECURE_TO_CPC 0x01
-
-/* If this flag is set, transmissions from CPC to PC are protected against loss */
-#define CPC_SECURE_TO_PC 0x02
-
-/* If this flag is set, the CAN-transmit buffer is checked to be free before sending a message */
-#define CPC_SECURE_SEND 0x04
-
-/*
- * If this flag is set, the transmission complete flag is checked
- * after sending a message
- * THIS IS CURRENTLY ONLY IMPLEMENTED IN THE PASSIVE INTERFACE DRIVERS
- */
-#define CPC_SECURE_TRANSMIT 0x08
-
-/* main message type used between library and application */
-typedef struct CPC_MSG {
- u8 type; /* type of message */
- u8 length; /* length of data within union 'msg' */
- u8 msgid; /* confirmation handle */
- u32 ts_sec; /* timestamp in seconds */
- u32 ts_nsec; /* timestamp in nano seconds */
- union {
- u8 generic[CPC_MSG_LEN];
- CPC_CAN_MSG_T canmsg;
- CPC_CAN_PARAMS_T canparams;
- CPC_CONFIRM_T confirmation;
- CPC_INFO_T info;
- CPC_OVERRUN_T overrun;
- CPC_CAN_ERROR_T error;
- CPC_CAN_ERR_COUNTER_T err_counter;
- u8 busload;
- u8 canstate;
- } msg;
-} CPC_MSG_T;
-
-#endif /* CPC_HEADER */
+++ /dev/null
-/*
- * CPCLIB
- *
- * Copyright (C) 2000-2008 EMS Dr. Thomas Wuensche
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- */
-#ifndef CPC_INT_H
-#define CPC_INT_H
-
-#include <linux/wait.h>
-
-#define CPC_MSG_BUF_CNT 1500
-
-#define CPC_PROC_DIR "driver/"
-
-#undef dbg
-#undef err
-#undef info
-
-/* Use our own dbg macro */
-#define dbg(format, arg...) do { if (debug) printk( KERN_INFO format "\n" , ## arg); } while (0)
-#define err(format, arg...) do { printk( KERN_INFO "ERROR " format "\n" , ## arg); } while (0)
-#define info(format, arg...) do { printk( KERN_INFO format "\n" , ## arg); } while (0)
-
-/* Macros help using of our buffers */
-#define IsBufferFull(x) (!(x)->WnR) && ((x)->iidx == (x)->oidx)
-#define IsBufferEmpty(x) ((x)->WnR) && ((x)->iidx == (x)->oidx)
-#define IsBufferNotEmpty(x) (!(x)->WnR) || ((x)->iidx != (x)->oidx)
-#define ResetBuffer(x) do { (x)->oidx = (x)->iidx=0; (x)->WnR = 1; } while(0);
-
-#define CPC_BufWriteAllowed ((chan->oidx != chan->iidx) || chan->WnR)
-
-typedef void (*chan_write_byte_t) (void *chan, unsigned int reg,
- unsigned char val);
-typedef unsigned char (*chan_read_byte_t) (void *chan, unsigned int reg);
-
-typedef struct CPC_CHAN {
- void __iomem * canBase; /* base address of SJA1000 */
- chan_read_byte_t read_byte; /* CAN controller read access routine */
- chan_write_byte_t write_byte; /* CAN controller write access routine */
- CPC_MSG_T *buf; /* buffer for CPC msg */
- unsigned int iidx;
- unsigned int oidx;
- unsigned int WnR;
- unsigned int minor;
- unsigned int locked;
- unsigned int irqDisabled;
-
- unsigned char cpcCtrlCANMessage;
- unsigned char cpcCtrlCANState;
- unsigned char cpcCtrlBUSState;
-
- unsigned char controllerType;
-
- unsigned long ovrTimeSec;
- unsigned long ovrTimeNSec;
- unsigned long ovrLockedBuffer;
- CPC_OVERRUN_T ovr;
-
- /* for debugging only */
- unsigned int handledIrqs;
- unsigned int lostMessages;
-
- unsigned int sentStdCan;
- unsigned int sentExtCan;
- unsigned int sentStdRtr;
- unsigned int sentExtRtr;
-
- unsigned int recvStdCan;
- unsigned int recvExtCan;
- unsigned int recvStdRtr;
- unsigned int recvExtRtr;
-
- wait_queue_head_t *CPCWait_q;
-
- void *private;
-} CPC_CHAN_T;
-
-#endif
+++ /dev/null
-/* Header for CPC-USB Driver ********************
- * Copyright 1999, 2000, 2001
- *
- * Company: EMS Dr. Thomas Wuensche
- * Sonnenhang 3
- * 85304 Ilmmuenster
- * Phone: +49-8441-490260
- * Fax: +49-8441-81860
- * email: support@ems-wuensche.com
- * WWW: www.ems-wuensche.com
- */
-
-#ifndef CPCUSB_H
-#define CPCUSB_H
-
-#undef err
-#undef dbg
-#undef info
-
-/* Use our own dbg macro */
-#define dbg(format, arg...) do { if (debug) printk(KERN_INFO "CPC-USB: " format "\n" , ## arg); } while (0)
-#define info(format, arg...) do { printk(KERN_INFO "CPC-USB: " format "\n" , ## arg); } while (0)
-#define err(format, arg...) do { printk(KERN_INFO "CPC-USB(ERROR): " format "\n" , ## arg); } while (0)
-
-#define CPC_USB_CARD_CNT 4
-
-typedef struct CPC_USB_READ_URB {
- unsigned char *buffer; /* the buffer to send data */
- size_t size; /* the size of the send buffer */
- struct urb *urb; /* the urb used to send data */
-} CPC_USB_READ_URB_T;
-
-typedef struct CPC_USB_WRITE_URB {
- unsigned char *buffer; /* the buffer to send data */
- size_t size; /* the size of the send buffer */
- struct urb *urb; /* the urb used to send data */
- atomic_t busy; /* true if write urb is busy */
- struct completion finished; /* wait for the write to finish */
-} CPC_USB_WRITE_URB_T;
-
-#define CPC_USB_URB_CNT 10
-
-typedef struct CPC_USB {
- struct usb_device *udev; /* save off the usb device pointer */
- struct usb_interface *interface; /* the interface for this device */
- unsigned char minor; /* the starting minor number for this device */
- unsigned char num_ports; /* the number of ports this device has */
- int num_intr_in; /* number of interrupt in endpoints we have */
- int num_bulk_in; /* number of bulk in endpoints we have */
- int num_bulk_out; /* number of bulk out endpoints we have */
-
- CPC_USB_READ_URB_T urbs[CPC_USB_URB_CNT];
-
- unsigned char intr_in_buffer[4]; /* interrupt transfer buffer */
- struct urb *intr_in_urb; /* interrupt transfer urb */
-
- CPC_USB_WRITE_URB_T wrUrbs[CPC_USB_URB_CNT];
-
- int open; /* if the port is open or not */
- int present; /* if the device is not disconnected */
- struct semaphore sem; /* locks this structure */
-
- int free_slots; /* free send slots of CPC-USB */
- int idx;
-
- spinlock_t slock;
-
- char serialNumber[128]; /* serial number */
- int productId; /* product id to differ between M16C and LPC2119 */
- CPC_CHAN_T *chan;
-} CPC_USB_T;
-
-#define CPCTable CPCUSB_Table
-
-#define CPC_DRIVER_VERSION "0.724"
-#define CPC_DRIVER_SERIAL "not applicable"
-
-#define OBUF_SIZE 255 /* 4096 */
-
-/* read timeouts -- RD_NAK_TIMEOUT * RD_EXPIRE = Number of seconds */
-#define RD_NAK_TIMEOUT (10*HZ) /* Default number of X seconds to wait */
-#define RD_EXPIRE 12 /* Number of attempts to wait X seconds */
-
-#define CPC_USB_BASE_MNR 0 /* CPC-USB start at minor 0 */
-
-#endif
+++ /dev/null
-#ifndef _SJA2M16C_H
-#define _SJA2M16C_H
-
-#include "cpc.h"
-
-#define BAUDRATE_TOLERANCE_PERCENT 1
-#define SAMPLEPOINT_TOLERANCE_PERCENT 5
-#define SAMPLEPOINT_UPPER_LIMIT 88
-
-/* M16C parameters */
-struct FIELD_C0CONR {
- unsigned int brp:4;
- unsigned int sam:1;
- unsigned int pr:3;
- unsigned int dummy:8;
-};
-struct FIELD_C1CONR {
- unsigned int ph1:3;
- unsigned int ph2:3;
- unsigned int sjw:2;
- unsigned int dummy:8;
-};
-typedef union C0CONR {
- unsigned char c0con;
- struct FIELD_C0CONR bc0con;
-} C0CONR_T;
-typedef union C1CONR {
- unsigned char c1con;
- struct FIELD_C1CONR bc1con;
-} C1CONR_T;
-
-#define SJA_TSEG1 ((pParams->btr1 & 0x0f)+1)
-#define SJA_TSEG2 (((pParams->btr1 & 0x70)>>4)+1)
-#define SJA_BRP ((pParams->btr0 & 0x3f)+1)
-#define SJA_SJW ((pParams->btr0 & 0xc0)>>6)
-#define SJA_SAM ((pParams->btr1 & 0x80)>>7)
-int baudrate_m16c(int clk, int brp, int pr, int ph1, int ph2);
-int samplepoint_m16c(int brp, int pr, int ph1, int ph2);
-int SJA1000_TO_M16C_BASIC_Params(CPC_MSG_T *pMsg);
-
-#endif
+++ /dev/null
-/****************************************************************************
-*
-* Copyright (c) 2003,2004 by EMS Dr. Thomas Wuensche
-*
-* - All rights reserved -
-*
-* This code is provided "as is" without warranty of any kind, either
-* expressed or implied, including but not limited to the liability
-* concerning the freedom from material defects, the fitness for parti-
-* cular purposes or the freedom of proprietary rights of third parties.
-*
-*****************************************************************************
-* Module name.: cpcusb
-*****************************************************************************
-* Include file: cpc.h
-*****************************************************************************
-* Project.....: Windows Driver Development Kit
-* Filename....: sja2m16c.cpp
-* Authors.....: (GU) Gerhard Uttenthaler
-* (CS) Christian Schoett
-*****************************************************************************
-* Short descr.: converts baudrate between SJA1000 and M16C
-*****************************************************************************
-* Description.: handles the baudrate conversion from SJA1000 parameters to
-* M16C parameters
-*****************************************************************************
-* Address : EMS Dr. Thomas Wuensche
-* Sonnenhang 3
-* D-85304 Ilmmuenster
-* Tel. : +49-8441-490260
-* Fax. : +49-8441-81860
-* email: support@ems-wuensche.com
-*****************************************************************************
-* History
-*****************************************************************************
-* Version Date Auth Remark
-*
-* 01.00 ?? GU - initial release
-* 01.10 ?????????? CS - adapted to fit into the USB Windows driver
-* 02.00 18.08.2004 GU - improved the baudrate calculating algorithm
-* - implemented acceptance filtering
-* 02.10 10.09.2004 CS - adapted to fit into the USB Windows driver
-*****************************************************************************
-* ToDo's
-*****************************************************************************
-*/
-
-/****************************************************************************/
-/* I N C L U D E S
-*/
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/poll.h>
-#include <linux/smp_lock.h>
-#include <linux/completion.h>
-#include <asm/uaccess.h>
-#include <linux/usb.h>
-
-#include "cpc.h"
-#include "cpc_int.h"
-#include "cpcusb.h"
-
-#include "sja2m16c.h"
-
-/*********************************************************************/
-int baudrate_m16c(int clk, int brp, int pr, int ph1, int ph2)
-{
- return (16000000 / (1 << clk)) / 2 / (brp + 1) / (1 + pr + 1 +
- ph1 + 1 + ph2 +
- 1);
-}
-
-
-/*********************************************************************/
-int samplepoint_m16c(int brp, int pr, int ph1, int ph2)
-{
- return (100 * (1 + pr + 1 + ph1 + 1)) / (1 + pr + 1 + ph1 + 1 +
- ph2 + 1);
-}
-
-
-/****************************************************************************
-* Function.....: SJA1000_TO_M16C_BASIC_Params
-*
-* Task.........: This routine converts SJA1000 CAN btr parameters into M16C
-* parameters based on the sample point and the error. In
-* addition it converts the acceptance filter parameters to
-* suit the M16C parameters
-*
-* Parameters...: None
-*
-* Return values: None
-*
-* Comments.....:
-*****************************************************************************
-* History
-*****************************************************************************
-* 19.01.2005 CS - modifed the conversion of SJA1000 filter params into
-* M16C params. Due to compatibility reasons with the
-* older 82C200 CAN controller the SJA1000
-****************************************************************************/
-int SJA1000_TO_M16C_BASIC_Params(CPC_MSG_T * in)
-{
- int sjaBaudrate;
- int sjaSamplepoint;
- int *baudrate_error; // BRP[0..15], PR[0..7], PH1[0..7], PH2[0..7]
- int *samplepoint_error; // BRP[0..15], PR[0..7], PH1[0..7], PH2[0..7]
- int baudrate_error_merk;
- int clk, brp, pr, ph1, ph2;
- int clk_merk, brp_merk, pr_merk, ph1_merk, ph2_merk;
- int index;
- unsigned char acc_code0, acc_code1, acc_code2, acc_code3;
- unsigned char acc_mask0, acc_mask1, acc_mask2, acc_mask3;
- CPC_MSG_T * out;
- C0CONR_T c0con;
- C1CONR_T c1con;
- int tmpAccCode;
- int tmpAccMask;
-
- // we have to convert the parameters into M16C parameters
- CPC_SJA1000_PARAMS_T * pParams;
-
- // check if the type is CAN parameters and if we have to convert the given params
- if (in->type != CPC_CMD_T_CAN_PRMS
- || in->msg.canparams.cc_type != SJA1000)
- return 0;
- pParams =
- (CPC_SJA1000_PARAMS_T *) & in->msg.canparams.cc_params.sja1000;
- acc_code0 = pParams->acc_code0;
- acc_code1 = pParams->acc_code1;
- acc_code2 = pParams->acc_code2;
- acc_code3 = pParams->acc_code3;
- acc_mask0 = pParams->acc_mask0;
- acc_mask1 = pParams->acc_mask1;
- acc_mask2 = pParams->acc_mask2;
- acc_mask3 = pParams->acc_mask3;
-
-#ifdef _DEBUG_OUTPUT_CAN_PARAMS
- info("acc_code0: %2.2Xh\n", acc_code0);
- info("acc_code1: %2.2Xh\n", acc_code1);
- info("acc_code2: %2.2Xh\n", acc_code2);
- info("acc_code3: %2.2Xh\n", acc_code3);
- info("acc_mask0: %2.2Xh\n", acc_mask0);
- info("acc_mask1: %2.2Xh\n", acc_mask1);
- info("acc_mask2: %2.2Xh\n", acc_mask2);
- info("acc_mask3: %2.2Xh\n", acc_mask3);
-
-#endif /* */
- if (!
- (baudrate_error =
- (int *) vmalloc(sizeof(int) * 16 * 8 * 8 * 8 * 5))) {
- err("Could not allocate memory\n");
- return -3;
- }
- if (!
- (samplepoint_error =
- (int *) vmalloc(sizeof(int) * 16 * 8 * 8 * 8 * 5))) {
- err("Could not allocate memory\n");
- vfree(baudrate_error);
- return -3;
- }
- memset(baudrate_error, 0xff, sizeof(baudrate_error));
- memset(samplepoint_error, 0xff, sizeof(baudrate_error));
- sjaBaudrate =
- 16000000 / 2 / SJA_BRP / (1 + SJA_TSEG1 + SJA_TSEG2);
- sjaSamplepoint =
- 100 * (1 + SJA_TSEG1) / (1 + SJA_TSEG1 + SJA_TSEG2);
- if (sjaBaudrate == 0) {
- vfree(baudrate_error);
- vfree(samplepoint_error);
- return -2;
- }
-
-#ifdef _DEBUG_OUTPUT_CAN_PARAMS
- info("\nStarting SJA CAN params\n");
- info("-------------------------\n");
- info("TS1 : %2.2Xh TS2 : %2.2Xh\n", SJA_TSEG1, SJA_TSEG2);
- info("BTR0 : %2.2Xh BTR1: %2.2Xh\n", pParams->btr0,
- pParams->btr1);
- info("Baudrate: %d.%dkBaud\n", sjaBaudrate / 1000,
- sjaBaudrate % 1000);
- info("Sample P: 0.%d\n", sjaSamplepoint);
- info("\n");
-
-#endif /* */
- c0con.bc0con.sam = SJA_SAM;
- c1con.bc1con.sjw = SJA_SJW;
-
- // calculate errors for all baudrates
- index = 0;
- for (clk = 0; clk < 5; clk++) {
- for (brp = 0; brp < 16; brp++) {
- for (pr = 0; pr < 8; pr++) {
- for (ph1 = 0; ph1 < 8; ph1++) {
- for (ph2 = 0; ph2 < 8; ph2++) {
- baudrate_error[index] =
- 100 *
- abs(baudrate_m16c
- (clk, brp, pr, ph1,
- ph2) -
- sjaBaudrate) /
- sjaBaudrate;
- samplepoint_error[index] =
- abs(samplepoint_m16c
- (brp, pr, ph1,
- ph2) -
- sjaSamplepoint);
-
-#if 0
- info
- ("Baudrate : %d kBaud\n",
- baudrate_m16c(clk,
- brp, pr,
- ph1,
- ph2));
- info
- ("Baudrate Error: %d\n",
- baudrate_error
- [index]);
- info
- ("Sample P Error: %d\n",
- samplepoint_error
- [index]);
- info
- ("clk : %d\n",
- clk);
-
-#endif /* */
- index++;
- }
- }
- }
- }
- }
-
- // mark all baudrate_error entries which are outer limits
- index = 0;
- for (clk = 0; clk < 5; clk++) {
- for (brp = 0; brp < 16; brp++) {
- for (pr = 0; pr < 8; pr++) {
- for (ph1 = 0; ph1 < 8; ph1++) {
- for (ph2 = 0; ph2 < 8; ph2++) {
- if ((baudrate_error[index]
- >
- BAUDRATE_TOLERANCE_PERCENT)
- ||
- (samplepoint_error
- [index] >
- SAMPLEPOINT_TOLERANCE_PERCENT)
- ||
- (samplepoint_m16c
- (brp, pr, ph1,
- ph2) >
- SAMPLEPOINT_UPPER_LIMIT))
- {
- baudrate_error
- [index] = -1;
- } else
- if (((1 + pr + 1 +
- ph1 + 1 + ph2 +
- 1) < 8)
- ||
- ((1 + pr + 1 +
- ph1 + 1 + ph2 +
- 1) > 25)) {
- baudrate_error
- [index] = -1;
- }
-
-#if 0
- else {
- info
- ("Baudrate : %d kBaud\n",
- baudrate_m16c
- (clk, brp, pr,
- ph1, ph2));
- info
- ("Baudrate Error: %d\n",
- baudrate_error
- [index]);
- info
- ("Sample P Error: %d\n",
- samplepoint_error
- [index]);
- }
-
-#endif /* */
- index++;
- }
- }
- }
- }
- }
-
- // find list of minimum of baudrate_error within unmarked entries
- clk_merk = brp_merk = pr_merk = ph1_merk = ph2_merk = 0;
- baudrate_error_merk = 100;
- index = 0;
- for (clk = 0; clk < 5; clk++) {
- for (brp = 0; brp < 16; brp++) {
- for (pr = 0; pr < 8; pr++) {
- for (ph1 = 0; ph1 < 8; ph1++) {
- for (ph2 = 0; ph2 < 8; ph2++) {
- if (baudrate_error[index]
- != -1) {
- if (baudrate_error
- [index] <
- baudrate_error_merk)
- {
- baudrate_error_merk
- =
- baudrate_error
- [index];
- brp_merk =
- brp;
- pr_merk =
- pr;
- ph1_merk =
- ph1;
- ph2_merk =
- ph2;
- clk_merk =
- clk;
-
-#if 0
- info
- ("brp: %2.2Xh pr: %2.2Xh ph1: %2.2Xh ph2: %2.2Xh\n",
- brp,
- pr,
- ph1,
- ph2);
- info
- ("Baudrate : %d kBaud\n",
- baudrate_m16c
- (clk,
- brp,
- pr,
- ph1,
- ph2));
- info
- ("Baudrate Error: %d\n",
- baudrate_error
- [index]);
- info
- ("Sample P Error: %d\n",
- samplepoint_error
- [index]);
-
-#endif /* */
- }
- }
- index++;
- }
- }
- }
- }
- }
- if (baudrate_error_merk == 100) {
- info("ERROR: Could not convert CAN init parameter\n");
- vfree(baudrate_error);
- vfree(samplepoint_error);
- return -1;
- }
-
- // setting m16c CAN parameter
- c0con.bc0con.brp = brp_merk;
- c0con.bc0con.pr = pr_merk;
- c1con.bc1con.ph1 = ph1_merk;
- c1con.bc1con.ph2 = ph2_merk;
-
-#ifdef _DEBUG_OUTPUT_CAN_PARAMS
- info("\nResulting M16C CAN params\n");
- info("-------------------------\n");
- info("clk : %2.2Xh\n", clk_merk);
- info("ph1 : %2.2Xh ph2: %2.2Xh\n", c1con.bc1con.ph1 + 1,
- c1con.bc1con.ph2 + 1);
- info("pr : %2.2Xh brp: %2.2Xh\n", c0con.bc0con.pr + 1,
- c0con.bc0con.brp + 1);
- info("sjw : %2.2Xh sam: %2.2Xh\n", c1con.bc1con.sjw,
- c0con.bc0con.sam);
- info("co1 : %2.2Xh co0: %2.2Xh\n", c1con.c1con, c0con.c0con);
- info("Baudrate: %d.%dBaud\n",
- baudrate_m16c(clk_merk, c0con.bc0con.brp, c0con.bc0con.pr,
- c1con.bc1con.ph1, c1con.bc1con.ph2) / 1000,
- baudrate_m16c(clk_merk, c0con.bc0con.brp, c0con.bc0con.pr,
- c1con.bc1con.ph1, c1con.bc1con.ph2) % 1000);
- info("Sample P: 0.%d\n",
- samplepoint_m16c(c0con.bc0con.brp, c0con.bc0con.pr,
- c1con.bc1con.ph1, c1con.bc1con.ph2));
- info("\n");
-
-#endif /* */
- out = in;
- out->type = 6;
- out->length = sizeof(CPC_M16C_BASIC_PARAMS_T) + 1;
- out->msg.canparams.cc_type = M16C_BASIC;
- out->msg.canparams.cc_params.m16c_basic.con0 = c0con.c0con;
- out->msg.canparams.cc_params.m16c_basic.con1 = c1con.c1con;
- out->msg.canparams.cc_params.m16c_basic.ctlr0 = 0x4C;
- out->msg.canparams.cc_params.m16c_basic.ctlr1 = 0x00;
- out->msg.canparams.cc_params.m16c_basic.clk = clk_merk;
- out->msg.canparams.cc_params.m16c_basic.acc_std_code0 =
- acc_code0;
- out->msg.canparams.cc_params.m16c_basic.acc_std_code1 = acc_code1;
-
-// info("code0: 0x%2.2X, code1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_code0, out->msg.canparams.cc_params.m16c_basic.acc_std_code1);
- tmpAccCode = (acc_code1 >> 5) + (acc_code0 << 3);
- out->msg.canparams.cc_params.m16c_basic.acc_std_code0 =
- (unsigned char) tmpAccCode;
- out->msg.canparams.cc_params.m16c_basic.acc_std_code1 =
- (unsigned char) (tmpAccCode >> 8);
-
-// info("code0: 0x%2.2X, code1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_code0, out->msg.canparams.cc_params.m16c_basic.acc_std_code1);
- out->msg.canparams.cc_params.m16c_basic.acc_std_mask0 =
- ~acc_mask0;
- out->msg.canparams.cc_params.m16c_basic.acc_std_mask1 =
- ~acc_mask1;
-
-// info("mask0: 0x%2.2X, mask1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_mask0, out->msg.canparams.cc_params.m16c_basic.acc_std_mask1);
- tmpAccMask = ((acc_mask1) >> 5) + ((acc_mask0) << 3);
-
-// info("tmpAccMask: 0x%4.4X\n", tmpAccMask);
- out->msg.canparams.cc_params.m16c_basic.acc_std_mask0 =
- (unsigned char) ~tmpAccMask;
- out->msg.canparams.cc_params.m16c_basic.acc_std_mask1 =
- (unsigned char) ~(tmpAccMask >> 8);
-
-// info("mask0: 0x%2.2X, mask1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_mask0, out->msg.canparams.cc_params.m16c_basic.acc_std_mask1);
- out->msg.canparams.cc_params.m16c_basic.acc_ext_code0 =
- (unsigned char) tmpAccCode;
- out->msg.canparams.cc_params.m16c_basic.acc_ext_code1 =
- (unsigned char) (tmpAccCode >> 8);
- out->msg.canparams.cc_params.m16c_basic.acc_ext_code2 = acc_code2;
- out->msg.canparams.cc_params.m16c_basic.acc_ext_code3 = acc_code3;
- out->msg.canparams.cc_params.m16c_basic.acc_ext_mask0 =
- (unsigned char) ~tmpAccMask;
- out->msg.canparams.cc_params.m16c_basic.acc_ext_mask1 =
- (unsigned char) ~(tmpAccMask >> 8);
- out->msg.canparams.cc_params.m16c_basic.acc_ext_mask2 =
- ~acc_mask2;
- out->msg.canparams.cc_params.m16c_basic.acc_ext_mask3 =
- ~acc_mask3;
- vfree(baudrate_error);
- vfree(samplepoint_error);
- return 0;
-}
-
-
EXTRA_CFLAGS += -Idrivers/media/dvb/frontends
EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core
-
-# Ubuntu 8.04 has CONFIG_SND undefined, so include lum sound/config.h too
-ifeq ($(CONFIG_SND),)
-EXTRA_CFLAGS += -include sound/config.h
-endif
default y if PCMCIA && !M32R # sl811_cs
default y if ARM # SL-811
default y if SUPERH # r8a66597-hcd
- default y if MICROBLAZE
default PCI
# many non-PCI SOC chips embed OHCI
/* #define VERBOSE_DEBUG */
#include <linux/kernel.h>
-#include <linux/utsname.h>
#include <linux/device.h>
#include "g_zero.h"
/* #define VERBOSE_DEBUG */
#include <linux/kernel.h>
-#include <linux/utsname.h>
#include <linux/device.h>
#include "u_serial.h"
/* #define VERBOSE_DEBUG */
#include <linux/kernel.h>
-#include <linux/utsname.h>
#include <linux/device.h>
#include "g_zero.h"
*/
#include <linux/kernel.h>
-#include <linux/utsname.h>
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/ctype.h>
/* #define VERBOSE_DEBUG */
#include <linux/kernel.h>
-#include <linux/utsname.h>
#include <linux/device.h>
#include <linux/ctype.h>
#include <linux/etherdevice.h>
}
}
+#ifdef CONFIG_PM
static void stop_read_write_urbs(struct usb_serial *serial)
{
int i, j;
return ec ? -EIO : 0;
}
+#else
+#define sierra_suspend NULL
+#define sierra_resume NULL
+#endif
static struct usb_serial_driver sierra_device = {
.driver = {
dev->mem_start = mem_res->start;
dev->mem_end = mem_res->end;
- len = regs_res->end - regs_res->start;
+ len = resource_size(regs_res);
if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) {
printk(KERN_ERR "%s: Can't request vlynq registers\n",
dev_name(&dev->dev));
See <http://v9fs.sf.net> for more information.
If unsure, say N.
+
+config 9P_FSCACHE
+ bool "Enable 9P client caching support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y
+ help
+ Choose Y here to enable persistent, read-only local
+ caching support for 9p clients using FS-Cache
+
vfs_dir.o \
vfs_dentry.o \
v9fs.o \
- fid.o \
+ fid.o
+9p-$(CONFIG_9P_FSCACHE) += cache.o
--- /dev/null
+/*
+ * V9FS cache definitions.
+ *
+ * Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/jiffies.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <net/9p/9p.h>
+
+#include "v9fs.h"
+#include "cache.h"
+
+#define CACHETAG_LEN 11
+
+struct kmem_cache *vcookie_cache;
+
+struct fscache_netfs v9fs_cache_netfs = {
+ .name = "9p",
+ .version = 0,
+};
+
+static void init_once(void *foo)
+{
+ struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo;
+ vcookie->fscache = NULL;
+ vcookie->qid = NULL;
+ inode_init_once(&vcookie->inode);
+}
+
+/**
+ * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain
+ * vcookie to inode mapping
+ *
+ * Returns 0 on success.
+ */
+
+static int v9fs_init_vcookiecache(void)
+{
+ vcookie_cache = kmem_cache_create("vcookie_cache",
+ sizeof(struct v9fs_cookie),
+ 0, (SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD),
+ init_once);
+ if (!vcookie_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * v9fs_destroy_vcookiecache - destroy the cache of vcookies
+ *
+ */
+
+static void v9fs_destroy_vcookiecache(void)
+{
+ kmem_cache_destroy(vcookie_cache);
+}
+
+int __v9fs_cache_register(void)
+{
+ int ret;
+ ret = v9fs_init_vcookiecache();
+ if (ret < 0)
+ return ret;
+
+ return fscache_register_netfs(&v9fs_cache_netfs);
+}
+
+void __v9fs_cache_unregister(void)
+{
+ v9fs_destroy_vcookiecache();
+ fscache_unregister_netfs(&v9fs_cache_netfs);
+}
+
+/**
+ * v9fs_random_cachetag - Generate a random tag to be associated
+ * with a new cache session.
+ *
+ * The value of jiffies is used for a fairly randomly cache tag.
+ */
+
+static
+int v9fs_random_cachetag(struct v9fs_session_info *v9ses)
+{
+ v9ses->cachetag = kmalloc(CACHETAG_LEN, GFP_KERNEL);
+ if (!v9ses->cachetag)
+ return -ENOMEM;
+
+ return scnprintf(v9ses->cachetag, CACHETAG_LEN, "%lu", jiffies);
+}
+
+static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ struct v9fs_session_info *v9ses;
+ uint16_t klen = 0;
+
+ v9ses = (struct v9fs_session_info *)cookie_netfs_data;
+ P9_DPRINTK(P9_DEBUG_FSC, "session %p buf %p size %u", v9ses,
+ buffer, bufmax);
+
+ if (v9ses->cachetag)
+ klen = strlen(v9ses->cachetag);
+
+ if (klen > bufmax)
+ return 0;
+
+ memcpy(buffer, v9ses->cachetag, klen);
+ P9_DPRINTK(P9_DEBUG_FSC, "cache session tag %s", v9ses->cachetag);
+ return klen;
+}
+
+const struct fscache_cookie_def v9fs_cache_session_index_def = {
+ .name = "9P.session",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = v9fs_cache_session_get_key,
+};
+
+void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
+{
+ /* If no cache session tag was specified, we generate a random one. */
+ if (!v9ses->cachetag)
+ v9fs_random_cachetag(v9ses);
+
+ v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index,
+ &v9fs_cache_session_index_def,
+ v9ses);
+ P9_DPRINTK(P9_DEBUG_FSC, "session %p get cookie %p", v9ses,
+ v9ses->fscache);
+}
+
+void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
+{
+ P9_DPRINTK(P9_DEBUG_FSC, "session %p put cookie %p", v9ses,
+ v9ses->fscache);
+ fscache_relinquish_cookie(v9ses->fscache, 0);
+ v9ses->fscache = NULL;
+}
+
+
+static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ const struct v9fs_cookie *vcookie = cookie_netfs_data;
+ memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path));
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode,
+ vcookie->qid->path);
+ return sizeof(vcookie->qid->path);
+}
+
+static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
+ uint64_t *size)
+{
+ const struct v9fs_cookie *vcookie = cookie_netfs_data;
+ *size = i_size_read(&vcookie->inode);
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode,
+ *size);
+}
+
+static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
+ void *buffer, uint16_t buflen)
+{
+ const struct v9fs_cookie *vcookie = cookie_netfs_data;
+ memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version));
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode,
+ vcookie->qid->version);
+ return sizeof(vcookie->qid->version);
+}
+
+static enum
+fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
+ const void *buffer,
+ uint16_t buflen)
+{
+ const struct v9fs_cookie *vcookie = cookie_netfs_data;
+
+ if (buflen != sizeof(vcookie->qid->version))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ if (memcmp(buffer, &vcookie->qid->version,
+ sizeof(vcookie->qid->version)))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ return FSCACHE_CHECKAUX_OKAY;
+}
+
+static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
+{
+ struct v9fs_cookie *vcookie = cookie_netfs_data;
+ struct pagevec pvec;
+ pgoff_t first;
+ int loop, nr_pages;
+
+ pagevec_init(&pvec, 0);
+ first = 0;
+
+ for (;;) {
+ nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping,
+ first,
+ PAGEVEC_SIZE - pagevec_count(&pvec));
+ if (!nr_pages)
+ break;
+
+ for (loop = 0; loop < nr_pages; loop++)
+ ClearPageFsCache(pvec.pages[loop]);
+
+ first = pvec.pages[nr_pages - 1]->index + 1;
+
+ pvec.nr = nr_pages;
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+}
+
+const struct fscache_cookie_def v9fs_cache_inode_index_def = {
+ .name = "9p.inode",
+ .type = FSCACHE_COOKIE_TYPE_DATAFILE,
+ .get_key = v9fs_cache_inode_get_key,
+ .get_attr = v9fs_cache_inode_get_attr,
+ .get_aux = v9fs_cache_inode_get_aux,
+ .check_aux = v9fs_cache_inode_check_aux,
+ .now_uncached = v9fs_cache_inode_now_uncached,
+};
+
+void v9fs_cache_inode_get_cookie(struct inode *inode)
+{
+ struct v9fs_cookie *vcookie;
+ struct v9fs_session_info *v9ses;
+
+ if (!S_ISREG(inode->i_mode))
+ return;
+
+ vcookie = v9fs_inode2cookie(inode);
+ if (vcookie->fscache)
+ return;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+ &v9fs_cache_inode_index_def,
+ vcookie);
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
+ vcookie->fscache);
+}
+
+void v9fs_cache_inode_put_cookie(struct inode *inode)
+{
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ if (!vcookie->fscache)
+ return;
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
+ vcookie->fscache);
+
+ fscache_relinquish_cookie(vcookie->fscache, 0);
+ vcookie->fscache = NULL;
+}
+
+void v9fs_cache_inode_flush_cookie(struct inode *inode)
+{
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ if (!vcookie->fscache)
+ return;
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
+ vcookie->fscache);
+
+ fscache_relinquish_cookie(vcookie->fscache, 1);
+ vcookie->fscache = NULL;
+}
+
+void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
+{
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct p9_fid *fid;
+
+ if (!vcookie->fscache)
+ return;
+
+ spin_lock(&vcookie->lock);
+ fid = filp->private_data;
+ if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+ v9fs_cache_inode_flush_cookie(inode);
+ else
+ v9fs_cache_inode_get_cookie(inode);
+
+ spin_unlock(&vcookie->lock);
+}
+
+void v9fs_cache_inode_reset_cookie(struct inode *inode)
+{
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_session_info *v9ses;
+ struct fscache_cookie *old;
+
+ if (!vcookie->fscache)
+ return;
+
+ old = vcookie->fscache;
+
+ spin_lock(&vcookie->lock);
+ fscache_relinquish_cookie(vcookie->fscache, 1);
+
+ v9ses = v9fs_inode2v9ses(inode);
+ vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+ &v9fs_cache_inode_index_def,
+ vcookie);
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
+ inode, old, vcookie->fscache);
+
+ spin_unlock(&vcookie->lock);
+}
+
+int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+ struct inode *inode = page->mapping->host;
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ BUG_ON(!vcookie->fscache);
+
+ if (PageFsCache(page)) {
+ if (fscache_check_page_write(vcookie->fscache, page)) {
+ if (!(gfp & __GFP_WAIT))
+ return 0;
+ fscache_wait_on_page_write(vcookie->fscache, page);
+ }
+
+ fscache_uncache_page(vcookie->fscache, page);
+ ClearPageFsCache(page);
+ }
+
+ return 1;
+}
+
+void __v9fs_fscache_invalidate_page(struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ BUG_ON(!vcookie->fscache);
+
+ if (PageFsCache(page)) {
+ fscache_wait_on_page_write(vcookie->fscache, page);
+ BUG_ON(!PageLocked(page));
+ fscache_uncache_page(vcookie->fscache, page);
+ ClearPageFsCache(page);
+ }
+}
+
+static void v9fs_vfs_readpage_complete(struct page *page, void *data,
+ int error)
+{
+ if (!error)
+ SetPageUptodate(page);
+
+ unlock_page(page);
+}
+
+/**
+ * __v9fs_readpage_from_fscache - read a page from cache
+ *
+ * Returns 0 if the pages are in cache and a BIO is submitted,
+ * 1 if the pages are not in cache and -error otherwise.
+ */
+
+int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+ int ret;
+ const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+ if (!vcookie->fscache)
+ return -ENOBUFS;
+
+ ret = fscache_read_or_alloc_page(vcookie->fscache,
+ page,
+ v9fs_vfs_readpage_complete,
+ NULL,
+ GFP_KERNEL);
+ switch (ret) {
+ case -ENOBUFS:
+ case -ENODATA:
+ P9_DPRINTK(P9_DEBUG_FSC, "page/inode not in cache %d", ret);
+ return 1;
+ case 0:
+ P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted");
+ return ret;
+ default:
+ P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret);
+ return ret;
+ }
+}
+
+/**
+ * __v9fs_readpages_from_fscache - read multiple pages from cache
+ *
+ * Returns 0 if the pages are in cache and a BIO is submitted,
+ * 1 if the pages are not in cache and -error otherwise.
+ */
+
+int __v9fs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ int ret;
+ const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
+ if (!vcookie->fscache)
+ return -ENOBUFS;
+
+ ret = fscache_read_or_alloc_pages(vcookie->fscache,
+ mapping, pages, nr_pages,
+ v9fs_vfs_readpage_complete,
+ NULL,
+ mapping_gfp_mask(mapping));
+ switch (ret) {
+ case -ENOBUFS:
+ case -ENODATA:
+ P9_DPRINTK(P9_DEBUG_FSC, "pages/inodes not in cache %d", ret);
+ return 1;
+ case 0:
+ BUG_ON(!list_empty(pages));
+ BUG_ON(*nr_pages != 0);
+ P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted");
+ return ret;
+ default:
+ P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret);
+ return ret;
+ }
+}
+
+/**
+ * __v9fs_readpage_to_fscache - write a page to the cache
+ *
+ */
+
+void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
+{
+ int ret;
+ const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+ ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL);
+ P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret);
+ if (ret != 0)
+ v9fs_uncache_page(inode, page);
+}
--- /dev/null
+/*
+ * V9FS cache definitions.
+ *
+ * Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#ifndef _9P_CACHE_H
+#ifdef CONFIG_9P_FSCACHE
+#include <linux/fscache.h>
+#include <linux/spinlock.h>
+
+extern struct kmem_cache *vcookie_cache;
+
+struct v9fs_cookie {
+ spinlock_t lock;
+ struct inode inode;
+ struct fscache_cookie *fscache;
+ struct p9_qid *qid;
+};
+
+static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode)
+{
+ return container_of(inode, struct v9fs_cookie, inode);
+}
+
+extern struct fscache_netfs v9fs_cache_netfs;
+extern const struct fscache_cookie_def v9fs_cache_session_index_def;
+extern const struct fscache_cookie_def v9fs_cache_inode_index_def;
+
+extern void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses);
+extern void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses);
+
+extern void v9fs_cache_inode_get_cookie(struct inode *inode);
+extern void v9fs_cache_inode_put_cookie(struct inode *inode);
+extern void v9fs_cache_inode_flush_cookie(struct inode *inode);
+extern void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp);
+extern void v9fs_cache_inode_reset_cookie(struct inode *inode);
+
+extern int __v9fs_cache_register(void);
+extern void __v9fs_cache_unregister(void);
+
+extern int __v9fs_fscache_release_page(struct page *page, gfp_t gfp);
+extern void __v9fs_fscache_invalidate_page(struct page *page);
+extern int __v9fs_readpage_from_fscache(struct inode *inode,
+ struct page *page);
+extern int __v9fs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages);
+extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page);
+
+
+/**
+ * v9fs_cache_register - Register v9fs file system with the cache
+ */
+static inline int v9fs_cache_register(void)
+{
+ return __v9fs_cache_register();
+}
+
+/**
+ * v9fs_cache_unregister - Unregister v9fs from the cache
+ */
+static inline void v9fs_cache_unregister(void)
+{
+ __v9fs_cache_unregister();
+}
+
+static inline int v9fs_fscache_release_page(struct page *page,
+ gfp_t gfp)
+{
+ return __v9fs_fscache_release_page(page, gfp);
+}
+
+static inline void v9fs_fscache_invalidate_page(struct page *page)
+{
+ __v9fs_fscache_invalidate_page(page);
+}
+
+static inline int v9fs_readpage_from_fscache(struct inode *inode,
+ struct page *page)
+{
+ return __v9fs_readpage_from_fscache(inode, page);
+}
+
+static inline int v9fs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ return __v9fs_readpages_from_fscache(inode, mapping, pages,
+ nr_pages);
+}
+
+static inline void v9fs_readpage_to_fscache(struct inode *inode,
+ struct page *page)
+{
+ if (PageFsCache(page))
+ __v9fs_readpage_to_fscache(inode, page);
+}
+
+static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
+{
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ fscache_uncache_page(vcookie->fscache, page);
+ BUG_ON(PageFsCache(page));
+}
+
+static inline void v9fs_vcookie_set_qid(struct inode *inode,
+ struct p9_qid *qid)
+{
+ struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ spin_lock(&vcookie->lock);
+ vcookie->qid = qid;
+ spin_unlock(&vcookie->lock);
+}
+
+#else /* CONFIG_9P_FSCACHE */
+
+static inline int v9fs_cache_register(void)
+{
+ return 1;
+}
+
+static inline void v9fs_cache_unregister(void) {}
+
+static inline int v9fs_fscache_release_page(struct page *page,
+ gfp_t gfp) {
+ return 1;
+}
+
+static inline void v9fs_fscache_invalidate_page(struct page *page) {}
+
+static inline int v9fs_readpage_from_fscache(struct inode *inode,
+ struct page *page)
+{
+ return -ENOBUFS;
+}
+
+static inline int v9fs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ return -ENOBUFS;
+}
+
+static inline void v9fs_readpage_to_fscache(struct inode *inode,
+ struct page *page)
+{}
+
+static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
+{}
+
+static inline void v9fs_vcookie_set_qid(struct inode *inode,
+ struct p9_qid *qid)
+{}
+
+#endif /* CONFIG_9P_FSCACHE */
+#endif /* _9P_CACHE_H */
#include <net/9p/transport.h>
#include "v9fs.h"
#include "v9fs_vfs.h"
+#include "cache.h"
+
+static DEFINE_SPINLOCK(v9fs_sessionlist_lock);
+static LIST_HEAD(v9fs_sessionlist);
/*
- * Option Parsing (code inspired by NFS code)
- * NOTE: each transport will parse its own options
- */
+ * Option Parsing (code inspired by NFS code)
+ * NOTE: each transport will parse its own options
+ */
enum {
/* Options that take integer arguments */
Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid,
/* String options */
- Opt_uname, Opt_remotename, Opt_trans,
+ Opt_uname, Opt_remotename, Opt_trans, Opt_cache, Opt_cachetag,
/* Options that take no arguments */
Opt_nodevmap,
/* Cache options */
- Opt_cache_loose,
+ Opt_cache_loose, Opt_fscache,
/* Access options */
Opt_access,
/* Error token */
{Opt_uname, "uname=%s"},
{Opt_remotename, "aname=%s"},
{Opt_nodevmap, "nodevmap"},
- {Opt_cache_loose, "cache=loose"},
+ {Opt_cache, "cache=%s"},
{Opt_cache_loose, "loose"},
+ {Opt_fscache, "fscache"},
+ {Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
{Opt_err, NULL}
};
v9ses->afid = ~0;
v9ses->debug = 0;
v9ses->cache = 0;
+#ifdef CONFIG_9P_FSCACHE
+ v9ses->cachetag = NULL;
+#endif
if (!opts)
return 0;
options = kstrdup(opts, GFP_KERNEL);
- if (!options) {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "failed to allocate copy of option string\n");
- return -ENOMEM;
- }
+ if (!options)
+ goto fail_option_alloc;
while ((p = strsep(&options, ",")) != NULL) {
int token;
case Opt_cache_loose:
v9ses->cache = CACHE_LOOSE;
break;
+ case Opt_fscache:
+ v9ses->cache = CACHE_FSCACHE;
+ break;
+ case Opt_cachetag:
+#ifdef CONFIG_9P_FSCACHE
+ v9ses->cachetag = match_strdup(&args[0]);
+#endif
+ break;
+ case Opt_cache:
+ s = match_strdup(&args[0]);
+ if (!s)
+ goto fail_option_alloc;
+
+ if (strcmp(s, "loose") == 0)
+ v9ses->cache = CACHE_LOOSE;
+ else if (strcmp(s, "fscache") == 0)
+ v9ses->cache = CACHE_FSCACHE;
+ else
+ v9ses->cache = CACHE_NONE;
+ kfree(s);
+ break;
case Opt_access:
s = match_strdup(&args[0]);
- if (!s) {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "failed to allocate copy"
- " of option argument\n");
- ret = -ENOMEM;
- break;
- }
+ if (!s)
+ goto fail_option_alloc;
+
v9ses->flags &= ~V9FS_ACCESS_MASK;
if (strcmp(s, "user") == 0)
v9ses->flags |= V9FS_ACCESS_USER;
}
kfree(options);
return ret;
+
+fail_option_alloc:
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "failed to allocate copy of option argument\n");
+ return -ENOMEM;
}
/**
return ERR_PTR(-ENOMEM);
}
+ spin_lock(&v9fs_sessionlist_lock);
+ list_add(&v9ses->slist, &v9fs_sessionlist);
+ spin_unlock(&v9fs_sessionlist_lock);
+
v9ses->flags = V9FS_EXTENDED | V9FS_ACCESS_USER;
strcpy(v9ses->uname, V9FS_DEFUSER);
strcpy(v9ses->aname, V9FS_DEFANAME);
else
fid->uid = ~0;
+#ifdef CONFIG_9P_FSCACHE
+ /* register the session for caching */
+ v9fs_cache_session_get_cookie(v9ses);
+#endif
+
return fid;
error:
v9ses->clnt = NULL;
}
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->fscache) {
+ v9fs_cache_session_put_cookie(v9ses);
+ kfree(v9ses->cachetag);
+ }
+#endif
__putname(v9ses->uname);
__putname(v9ses->aname);
+
+ spin_lock(&v9fs_sessionlist_lock);
+ list_del(&v9ses->slist);
+ spin_unlock(&v9fs_sessionlist_lock);
}
/**
extern int v9fs_error_init(void);
+static struct kobject *v9fs_kobj;
+
+#ifdef CONFIG_9P_FSCACHE
/**
- * v9fs_init - Initialize module
+ * caches_show - list caches associated with a session
+ *
+ * Returns the size of buffer written.
+ */
+
+static ssize_t caches_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ ssize_t n = 0, count = 0, limit = PAGE_SIZE;
+ struct v9fs_session_info *v9ses;
+
+ spin_lock(&v9fs_sessionlist_lock);
+ list_for_each_entry(v9ses, &v9fs_sessionlist, slist) {
+ if (v9ses->cachetag) {
+ n = snprintf(buf, limit, "%s\n", v9ses->cachetag);
+ if (n < 0) {
+ count = n;
+ break;
+ }
+
+ count += n;
+ limit -= n;
+ }
+ }
+
+ spin_unlock(&v9fs_sessionlist_lock);
+ return count;
+}
+
+static struct kobj_attribute v9fs_attr_cache = __ATTR_RO(caches);
+#endif /* CONFIG_9P_FSCACHE */
+
+static struct attribute *v9fs_attrs[] = {
+#ifdef CONFIG_9P_FSCACHE
+ &v9fs_attr_cache.attr,
+#endif
+ NULL,
+};
+
+static struct attribute_group v9fs_attr_group = {
+ .attrs = v9fs_attrs,
+};
+
+/**
+ * v9fs_sysfs_init - Initialize the v9fs sysfs interface
+ *
+ */
+
+static int v9fs_sysfs_init(void)
+{
+ v9fs_kobj = kobject_create_and_add("9p", fs_kobj);
+ if (!v9fs_kobj)
+ return -ENOMEM;
+
+ if (sysfs_create_group(v9fs_kobj, &v9fs_attr_group)) {
+ kobject_put(v9fs_kobj);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * v9fs_sysfs_cleanup - Unregister the v9fs sysfs interface
+ *
+ */
+
+static void v9fs_sysfs_cleanup(void)
+{
+ sysfs_remove_group(v9fs_kobj, &v9fs_attr_group);
+ kobject_put(v9fs_kobj);
+}
+
+/**
+ * init_v9fs - Initialize module
*
*/
static int __init init_v9fs(void)
{
+ int err;
printk(KERN_INFO "Installing v9fs 9p2000 file system support\n");
/* TODO: Setup list of registered trasnport modules */
- return register_filesystem(&v9fs_fs_type);
+ err = register_filesystem(&v9fs_fs_type);
+ if (err < 0) {
+ printk(KERN_ERR "Failed to register filesystem\n");
+ return err;
+ }
+
+ err = v9fs_cache_register();
+ if (err < 0) {
+ printk(KERN_ERR "Failed to register v9fs for caching\n");
+ goto out_fs_unreg;
+ }
+
+ err = v9fs_sysfs_init();
+ if (err < 0) {
+ printk(KERN_ERR "Failed to register with sysfs\n");
+ goto out_sysfs_cleanup;
+ }
+
+ return 0;
+
+out_sysfs_cleanup:
+ v9fs_sysfs_cleanup();
+
+out_fs_unreg:
+ unregister_filesystem(&v9fs_fs_type);
+
+ return err;
}
/**
- * v9fs_init - shutdown module
+ * exit_v9fs - shutdown module
*
*/
static void __exit exit_v9fs(void)
{
+ v9fs_sysfs_cleanup();
+ v9fs_cache_unregister();
unregister_filesystem(&v9fs_fs_type);
}
enum p9_cache_modes {
CACHE_NONE,
CACHE_LOOSE,
+ CACHE_FSCACHE,
};
/**
* @debug: debug level
* @afid: authentication handle
* @cache: cache mode of type &p9_cache_modes
+ * @cachetag: the tag of the cache associated with this session
+ * @fscache: session cookie associated with FS-Cache
* @options: copy of options string given by user
* @uname: string user name to mount hierarchy as
* @aname: mount specifier for remote hierarchy
* @dfltgid: default numeric groupid to mount hierarchy as
* @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy
* @clnt: reference to 9P network client instantiated for this session
- * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug
+ * @slist: reference to list of registered 9p sessions
*
* This structure holds state for each session instance established during
* a sys_mount() .
unsigned short debug;
unsigned int afid;
unsigned int cache;
+#ifdef CONFIG_9P_FSCACHE
+ char *cachetag;
+ struct fscache_cookie *fscache;
+#endif
char *uname; /* user name to mount as */
char *aname; /* name of remote hierarchy being mounted */
unsigned int dfltgid; /* default gid for legacy support */
u32 uid; /* if ACCESS_SINGLE, the uid that has access */
struct p9_client *clnt; /* 9p client */
- struct dentry *debugfs_dir;
+ struct list_head slist; /* list of sessions registered with v9fs */
};
-extern struct dentry *v9fs_debugfs_root;
-
struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
char *);
void v9fs_session_close(struct v9fs_session_info *v9ses);
extern const struct dentry_operations v9fs_dentry_operations;
extern const struct dentry_operations v9fs_cached_dentry_operations;
+#ifdef CONFIG_9P_FSCACHE
+struct inode *v9fs_alloc_inode(struct super_block *sb);
+void v9fs_destroy_inode(struct inode *inode);
+#endif
+
struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+void v9fs_clear_inode(struct inode *inode);
ino_t v9fs_qid2ino(struct p9_qid *qid);
void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
int v9fs_dir_release(struct inode *inode, struct file *filp);
#include "v9fs.h"
#include "v9fs_vfs.h"
+#include "cache.h"
/**
* v9fs_vfs_readpage - read an entire page in from 9P
int retval;
loff_t offset;
char *buffer;
+ struct inode *inode;
+ inode = page->mapping->host;
P9_DPRINTK(P9_DEBUG_VFS, "\n");
+
+ BUG_ON(!PageLocked(page));
+
+ retval = v9fs_readpage_from_fscache(inode, page);
+ if (retval == 0)
+ return retval;
+
buffer = kmap(page);
offset = page_offset(page);
retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset);
- if (retval < 0)
+ if (retval < 0) {
+ v9fs_uncache_page(inode, page);
goto done;
+ }
memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval);
flush_dcache_page(page);
SetPageUptodate(page);
+
+ v9fs_readpage_to_fscache(inode, page);
retval = 0;
done:
return retval;
}
+/**
+ * v9fs_vfs_readpages - read a set of pages from 9P
+ *
+ * @filp: file being read
+ * @mapping: the address space
+ * @pages: list of pages to read
+ * @nr_pages: count of pages to read
+ *
+ */
+
+static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ int ret = 0;
+ struct inode *inode;
+
+ inode = mapping->host;
+ P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp);
+
+ ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages);
+ if (ret == 0)
+ return ret;
+
+ ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp);
+ P9_DPRINTK(P9_DEBUG_VFS, " = %d\n", ret);
+ return ret;
+}
+
+/**
+ * v9fs_release_page - release the private state associated with a page
+ *
+ * Returns 1 if the page can be released, false otherwise.
+ */
+
+static int v9fs_release_page(struct page *page, gfp_t gfp)
+{
+ if (PagePrivate(page))
+ return 0;
+
+ return v9fs_fscache_release_page(page, gfp);
+}
+
+/**
+ * v9fs_invalidate_page - Invalidate a page completely or partially
+ *
+ * @page: structure to page
+ * @offset: offset in the page
+ */
+
+static void v9fs_invalidate_page(struct page *page, unsigned long offset)
+{
+ if (offset == 0)
+ v9fs_fscache_invalidate_page(page);
+}
+
+/**
+ * v9fs_launder_page - Writeback a dirty page
+ * Since the writes go directly to the server, we simply return a 0
+ * here to indicate success.
+ *
+ * Returns 0 on success.
+ */
+
+static int v9fs_launder_page(struct page *page)
+{
+ return 0;
+}
+
const struct address_space_operations v9fs_addr_operations = {
.readpage = v9fs_vfs_readpage,
+ .readpages = v9fs_vfs_readpages,
+ .releasepage = v9fs_release_page,
+ .invalidatepage = v9fs_invalidate_page,
+ .launder_page = v9fs_launder_page,
};
#include <linux/string.h>
#include <linux/inet.h>
#include <linux/list.h>
+#include <linux/pagemap.h>
#include <asm/uaccess.h>
#include <linux/idr.h>
#include <net/9p/9p.h>
#include "v9fs.h"
#include "v9fs_vfs.h"
#include "fid.h"
+#include "cache.h"
static const struct file_operations v9fs_cached_file_operations;
return err;
}
if (omode & P9_OTRUNC) {
- inode->i_size = 0;
+ i_size_write(inode, 0);
inode->i_blocks = 0;
}
if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses)))
/* enable cached file options */
if(file->f_op == &v9fs_file_operations)
file->f_op = &v9fs_cached_file_operations;
+
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_cache_inode_set_cookie(inode, file);
+#endif
}
return 0;
struct p9_client *clnt;
struct inode *inode = filp->f_path.dentry->d_inode;
int origin = *offset;
+ unsigned long pg_start, pg_end;
P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
(int)count, (int)*offset);
if (count < rsize)
rsize = count;
- n = p9_client_write(fid, NULL, data+total, *offset+total,
+ n = p9_client_write(fid, NULL, data+total, origin+total,
rsize);
if (n <= 0)
break;
} while (count > 0);
if (total > 0) {
- invalidate_inode_pages2_range(inode->i_mapping, origin,
- origin+total);
+ pg_start = origin >> PAGE_CACHE_SHIFT;
+ pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
+ if (inode->i_mapping && inode->i_mapping->nrpages)
+ invalidate_inode_pages2_range(inode->i_mapping,
+ pg_start, pg_end);
*offset += total;
- }
-
- if (*offset > inode->i_size) {
- inode->i_size = *offset;
- inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
+ i_size_write(inode, i_size_read(inode) + total);
+ inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
}
if (n < 0)
#include "v9fs.h"
#include "v9fs_vfs.h"
#include "fid.h"
+#include "cache.h"
static const struct inode_operations v9fs_dir_inode_operations;
static const struct inode_operations v9fs_dir_inode_operations_ext;
wstat->extension = NULL;
}
+#ifdef CONFIG_9P_FSCACHE
+/**
+ * v9fs_alloc_inode - helper function to allocate an inode
+ * This callback is executed before setting up the inode so that we
+ * can associate a vcookie with each inode.
+ *
+ */
+
+struct inode *v9fs_alloc_inode(struct super_block *sb)
+{
+ struct v9fs_cookie *vcookie;
+ vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache,
+ GFP_KERNEL);
+ if (!vcookie)
+ return NULL;
+
+ vcookie->fscache = NULL;
+ vcookie->qid = NULL;
+ spin_lock_init(&vcookie->lock);
+ return &vcookie->inode;
+}
+
+/**
+ * v9fs_destroy_inode - destroy an inode
+ *
+ */
+
+void v9fs_destroy_inode(struct inode *inode)
+{
+ kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
+}
+#endif
+
/**
* v9fs_get_inode - helper function to setup an inode
* @sb: superblock
}
*/
+
+/**
+ * v9fs_clear_inode - release an inode
+ * @inode: inode to release
+ *
+ */
+void v9fs_clear_inode(struct inode *inode)
+{
+ filemap_fdatawrite(inode->i_mapping);
+
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_cache_inode_put_cookie(inode);
+#endif
+}
+
/**
* v9fs_inode_from_fid - populate an inode by issuing a attribute request
* @v9ses: session information
v9fs_stat2inode(st, ret, sb);
ret->i_ino = v9fs_qid2ino(&st->qid);
+
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_vcookie_set_qid(ret, &st->qid);
+ v9fs_cache_inode_get_cookie(ret);
+#endif
p9stat_free(st);
kfree(st);
+
return ret;
error:
P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
err = -EPERM;
v9ses = v9fs_inode2v9ses(dentry->d_inode);
- if (v9ses->cache == CACHE_LOOSE)
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
return simple_getattr(mnt, dentry, stat);
fid = v9fs_fid_lookup(dentry);
} else
inode->i_rdev = 0;
- inode->i_size = stat->length;
+ i_size_write(inode, stat->length);
/* not real number of blocks, but 512 byte ones ... */
- inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
+ inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
}
/**
#include "v9fs_vfs.h"
#include "fid.h"
-static void v9fs_clear_inode(struct inode *);
static const struct super_operations v9fs_super_ops;
-/**
- * v9fs_clear_inode - release an inode
- * @inode: inode to release
- *
- */
-
-static void v9fs_clear_inode(struct inode *inode)
-{
- filemap_fdatawrite(inode->i_mapping);
-}
-
/**
* v9fs_set_super - set the superblock
* @s: super block
}
static const struct super_operations v9fs_super_ops = {
+#ifdef CONFIG_9P_FSCACHE
+ .alloc_inode = v9fs_alloc_inode,
+ .destroy_inode = v9fs_destroy_inode,
+#endif
.statfs = simple_statfs,
.clear_inode = v9fs_clear_inode,
.show_options = generic_show_options,
adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh,
int create)
{
- if (block < 0)
- goto abort_negative;
-
if (!create) {
if (block >= inode->i_blocks)
goto abort_toobig;
/* don't support allocation of blocks yet */
return -EIO;
-abort_negative:
- adfs_error(inode->i_sb, "block %d < 0", block);
- return -EIO;
-
abort_toobig:
return 0;
}
/* Taken over from the old code... */
/* POSIX UID/GID verification for setting inode attributes. */
-int inode_change_ok(struct inode *inode, struct iattr *attr)
+int inode_change_ok(const struct inode *inode, struct iattr *attr)
{
int retval = -EPERM;
unsigned int ia_valid = attr->ia_valid;
error:
return retval;
}
-
EXPORT_SYMBOL(inode_change_ok);
+/**
+ * inode_newsize_ok - may this inode be truncated to a given size
+ * @inode: the inode to be truncated
+ * @offset: the new size to assign to the inode
+ * @Returns: 0 on success, -ve errno on failure
+ *
+ * inode_newsize_ok will check filesystem limits and ulimits to check that the
+ * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
+ * when necessary. Caller must not proceed with inode size change if failure is
+ * returned. @inode must be a file (not directory), with appropriate
+ * permissions to allow truncate (inode_newsize_ok does NOT check these
+ * conditions).
+ *
+ * inode_newsize_ok must be called with i_mutex held.
+ */
+int inode_newsize_ok(const struct inode *inode, loff_t offset)
+{
+ if (inode->i_size < offset) {
+ unsigned long limit;
+
+ limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+ if (limit != RLIM_INFINITY && offset > limit)
+ goto out_sig;
+ if (offset > inode->i_sb->s_maxbytes)
+ goto out_big;
+ } else {
+ /*
+ * truncation of in-use swapfiles is disallowed - it would
+ * cause subsequent swapout to scribble on the now-freed
+ * blocks.
+ */
+ if (IS_SWAPFILE(inode))
+ return -ETXTBSY;
+ }
+
+ return 0;
+out_sig:
+ send_sig(SIGXFSZ, current, 0);
+out_big:
+ return -EFBIG;
+}
+EXPORT_SYMBOL(inode_newsize_ok);
+
int inode_setattr(struct inode * inode, struct iattr * attr)
{
unsigned int ia_valid = attr->ia_valid;
{
kfree(BEFS_SB(sb)->mount_opts.iocharset);
BEFS_SB(sb)->mount_opts.iocharset = NULL;
-
- if (BEFS_SB(sb)->nls) {
- unload_nls(BEFS_SB(sb)->nls);
- BEFS_SB(sb)->nls = NULL;
- }
-
+ unload_nls(BEFS_SB(sb)->nls);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
}
int numnote;
};
-static int fill_note_info(struct elfhdr *elf, int phdrs,
- struct elf_note_info *info,
- long signr, struct pt_regs *regs)
+static int elf_note_info_init(struct elf_note_info *info)
{
-#define NUM_NOTES 6
- struct list_head *t;
-
- info->notes = NULL;
- info->prstatus = NULL;
- info->psinfo = NULL;
- info->fpu = NULL;
-#ifdef ELF_CORE_COPY_XFPREGS
- info->xfpu = NULL;
-#endif
+ memset(info, 0, sizeof(*info));
INIT_LIST_HEAD(&info->thread_list);
- info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
- GFP_KERNEL);
+ /* Allocate space for six ELF notes */
+ info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
if (!info->notes)
return 0;
info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
if (!info->psinfo)
- return 0;
+ goto notes_free;
info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
if (!info->prstatus)
- return 0;
+ goto psinfo_free;
info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
if (!info->fpu)
- return 0;
+ goto prstatus_free;
#ifdef ELF_CORE_COPY_XFPREGS
info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
if (!info->xfpu)
- return 0;
+ goto fpu_free;
+#endif
+ return 1;
+#ifdef ELF_CORE_COPY_XFPREGS
+ fpu_free:
+ kfree(info->fpu);
#endif
+ prstatus_free:
+ kfree(info->prstatus);
+ psinfo_free:
+ kfree(info->psinfo);
+ notes_free:
+ kfree(info->notes);
+ return 0;
+}
+
+static int fill_note_info(struct elfhdr *elf, int phdrs,
+ struct elf_note_info *info,
+ long signr, struct pt_regs *regs)
+{
+ struct list_head *t;
+
+ if (!elf_note_info_init(info))
+ return 0;
- info->thread_status_size = 0;
if (signr) {
struct core_thread *ct;
struct elf_thread_status *ets;
#endif
return 1;
-
-#undef NUM_NOTES
}
static size_t get_note_info_size(struct elf_note_info *info)
}
stack_size = exec_params.stack_size;
- if (stack_size < interp_params.stack_size)
- stack_size = interp_params.stack_size;
-
if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
executable_stack = EXSTACK_ENABLE_X;
else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
executable_stack = EXSTACK_DISABLE_X;
- else if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
- executable_stack = EXSTACK_ENABLE_X;
- else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
- executable_stack = EXSTACK_DISABLE_X;
else
executable_stack = EXSTACK_DEFAULT;
+ if (stack_size == 0) {
+ stack_size = interp_params.stack_size;
+ if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
+ executable_stack = EXSTACK_ENABLE_X;
+ else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
+ executable_stack = EXSTACK_DISABLE_X;
+ else
+ executable_stack = EXSTACK_DEFAULT;
+ }
+
retval = -ENOEXEC;
if (stack_size == 0)
goto error;
ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos);
if (ret <= 0)
break;
- if (ret >= (unsigned long) -4096)
- break;
len -= ret;
strm.next_in = buf;
"(%d != %d)", (unsigned) r, curid, id);
goto failed;
} else if ( ! p->lib_list[id].loaded &&
- load_flat_shared_library(id, p) > (unsigned long) -4096) {
+ IS_ERR_VALUE(load_flat_shared_library(id, p))) {
printk("BINFMT_FLAT: failed to load library %d", id);
goto failed;
}
textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_EXECUTABLE, 0);
up_write(¤t->mm->mmap_sem);
- if (!textpos || textpos >= (unsigned long) -4096) {
+ if (!textpos || IS_ERR_VALUE(textpos)) {
if (!textpos)
textpos = (unsigned long) -ENOMEM;
printk("Unable to mmap process text, errno %d\n", (int)-textpos);
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
up_write(¤t->mm->mmap_sem);
- if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) {
+ if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) {
if (!realdatastart)
realdatastart = (unsigned long) -ENOMEM;
printk("Unable to allocate RAM for process data, errno %d\n",
result = bprm->file->f_op->read(bprm->file, (char *) datapos,
data_len + (relocs * sizeof(unsigned long)), &fpos);
}
- if (result >= (unsigned long)-4096) {
+ if (IS_ERR_VALUE(result)) {
printk("Unable to read data+bss, errno %d\n", (int)-result);
do_munmap(current->mm, textpos, text_len);
do_munmap(current->mm, realdatastart, data_len + extra);
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
up_write(¤t->mm->mmap_sem);
- if (!textpos || textpos >= (unsigned long) -4096) {
+ if (!textpos || IS_ERR_VALUE(textpos)) {
if (!textpos)
textpos = (unsigned long) -ENOMEM;
printk("Unable to allocate RAM for process text/data, errno %d\n",
fpos = 0;
result = bprm->file->f_op->read(bprm->file,
(char *) textpos, text_len, &fpos);
- if (result < (unsigned long) -4096)
+ if (!IS_ERR_VALUE(result))
result = decompress_exec(bprm, text_len, (char *) datapos,
data_len + (relocs * sizeof(unsigned long)), 0);
}
fpos = 0;
result = bprm->file->f_op->read(bprm->file,
(char *) textpos, text_len, &fpos);
- if (result < (unsigned long) -4096) {
+ if (!IS_ERR_VALUE(result)) {
fpos = ntohl(hdr->data_start);
result = bprm->file->f_op->read(bprm->file, (char *) datapos,
data_len + (relocs * sizeof(unsigned long)), &fpos);
}
}
- if (result >= (unsigned long)-4096) {
+ if (IS_ERR_VALUE(result)) {
printk("Unable to read code+data+bss, errno %d\n",(int)-result);
do_munmap(current->mm, textpos, text_len + data_len + extra +
MAX_SHARED_LIBS * sizeof(unsigned long));
res = prepare_binprm(&bprm);
- if (res <= (unsigned long)-4096)
+ if (!IS_ERR_VALUE(res))
res = load_flat_file(&bprm, libs, id, NULL);
abort_creds(bprm.cred);
stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */
res = load_flat_file(bprm, &libinfo, 0, &stack_len);
- if (res > (unsigned long)-4096)
+ if (IS_ERR_VALUE(res))
return res;
/* Update data segment pointers for all libraries */
* freeze_bdev -- lock a filesystem and force it into a consistent state
* @bdev: blockdevice to lock
*
- * This takes the block device bd_mount_sem to make sure no new mounts
- * happen on bdev until thaw_bdev() is called.
* If a superblock is found on this device, we take the s_umount semaphore
* on it to make sure nobody unmounts until the snapshot creation is done.
* The reference counter (bd_fsfreeze_count) guarantees that only the last
int error = 0;
mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (bdev->bd_fsfreeze_count > 0) {
- bdev->bd_fsfreeze_count++;
+ if (++bdev->bd_fsfreeze_count > 1) {
+ /*
+ * We don't even need to grab a reference - the first call
+ * to freeze_bdev grab an active reference and only the last
+ * thaw_bdev drops it.
+ */
sb = get_super(bdev);
+ drop_super(sb);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return sb;
}
- bdev->bd_fsfreeze_count++;
-
- down(&bdev->bd_mount_sem);
- sb = get_super(bdev);
- if (sb && !(sb->s_flags & MS_RDONLY)) {
- sb->s_frozen = SB_FREEZE_WRITE;
- smp_wmb();
-
- sync_filesystem(sb);
-
- sb->s_frozen = SB_FREEZE_TRANS;
- smp_wmb();
-
- sync_blockdev(sb->s_bdev);
-
- if (sb->s_op->freeze_fs) {
- error = sb->s_op->freeze_fs(sb);
- if (error) {
- printk(KERN_ERR
- "VFS:Filesystem freeze failed\n");
- sb->s_frozen = SB_UNFROZEN;
- drop_super(sb);
- up(&bdev->bd_mount_sem);
- bdev->bd_fsfreeze_count--;
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return ERR_PTR(error);
- }
+
+ sb = get_active_super(bdev);
+ if (!sb)
+ goto out;
+ if (sb->s_flags & MS_RDONLY) {
+ deactivate_locked_super(sb);
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ return sb;
+ }
+
+ sb->s_frozen = SB_FREEZE_WRITE;
+ smp_wmb();
+
+ sync_filesystem(sb);
+
+ sb->s_frozen = SB_FREEZE_TRANS;
+ smp_wmb();
+
+ sync_blockdev(sb->s_bdev);
+
+ if (sb->s_op->freeze_fs) {
+ error = sb->s_op->freeze_fs(sb);
+ if (error) {
+ printk(KERN_ERR
+ "VFS:Filesystem freeze failed\n");
+ sb->s_frozen = SB_UNFROZEN;
+ deactivate_locked_super(sb);
+ bdev->bd_fsfreeze_count--;
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ return ERR_PTR(error);
}
}
+ up_write(&sb->s_umount);
+ out:
sync_blockdev(bdev);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
-
- return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */
+ return sb; /* thaw_bdev releases s->s_umount */
}
EXPORT_SYMBOL(freeze_bdev);
*/
int thaw_bdev(struct block_device *bdev, struct super_block *sb)
{
- int error = 0;
+ int error = -EINVAL;
mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (!bdev->bd_fsfreeze_count) {
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return -EINVAL;
- }
-
- bdev->bd_fsfreeze_count--;
- if (bdev->bd_fsfreeze_count > 0) {
- if (sb)
- drop_super(sb);
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return 0;
- }
-
- if (sb) {
- BUG_ON(sb->s_bdev != bdev);
- if (!(sb->s_flags & MS_RDONLY)) {
- if (sb->s_op->unfreeze_fs) {
- error = sb->s_op->unfreeze_fs(sb);
- if (error) {
- printk(KERN_ERR
- "VFS:Filesystem thaw failed\n");
- sb->s_frozen = SB_FREEZE_TRANS;
- bdev->bd_fsfreeze_count++;
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return error;
- }
- }
- sb->s_frozen = SB_UNFROZEN;
- smp_wmb();
- wake_up(&sb->s_wait_unfrozen);
+ if (!bdev->bd_fsfreeze_count)
+ goto out_unlock;
+
+ error = 0;
+ if (--bdev->bd_fsfreeze_count > 0)
+ goto out_unlock;
+
+ if (!sb)
+ goto out_unlock;
+
+ BUG_ON(sb->s_bdev != bdev);
+ down_write(&sb->s_umount);
+ if (sb->s_flags & MS_RDONLY)
+ goto out_deactivate;
+
+ if (sb->s_op->unfreeze_fs) {
+ error = sb->s_op->unfreeze_fs(sb);
+ if (error) {
+ printk(KERN_ERR
+ "VFS:Filesystem thaw failed\n");
+ sb->s_frozen = SB_FREEZE_TRANS;
+ bdev->bd_fsfreeze_count++;
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ return error;
}
- drop_super(sb);
}
- up(&bdev->bd_mount_sem);
+ sb->s_frozen = SB_UNFROZEN;
+ smp_wmb();
+ wake_up(&sb->s_wait_unfrozen);
+
+out_deactivate:
+ if (sb)
+ deactivate_locked_super(sb);
+out_unlock:
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return 0;
}
memset(bdev, 0, sizeof(*bdev));
mutex_init(&bdev->bd_mutex);
- sema_init(&bdev->bd_mount_sem, 1);
INIT_LIST_HEAD(&bdev->bd_inodes);
INIT_LIST_HEAD(&bdev->bd_list);
#ifdef CONFIG_SYSFS
/* number of things on the pending list */
atomic_t num_pending;
+ /* reference counter for this struct */
+ atomic_t refs;
+
unsigned long sequence;
/* protects the pending list. */
unsigned long flags;
spin_lock_irqsave(&worker->workers->lock, flags);
worker->idle = 1;
- list_move(&worker->worker_list, &worker->workers->idle_list);
+
+ /* the list may be empty if the worker is just starting */
+ if (!list_empty(&worker->worker_list)) {
+ list_move(&worker->worker_list,
+ &worker->workers->idle_list);
+ }
spin_unlock_irqrestore(&worker->workers->lock, flags);
}
}
unsigned long flags;
spin_lock_irqsave(&worker->workers->lock, flags);
worker->idle = 0;
- list_move_tail(&worker->worker_list,
- &worker->workers->worker_list);
+
+ if (!list_empty(&worker->worker_list)) {
+ list_move_tail(&worker->worker_list,
+ &worker->workers->worker_list);
+ }
spin_unlock_irqrestore(&worker->workers->lock, flags);
}
}
-static noinline int run_ordered_completions(struct btrfs_workers *workers,
- struct btrfs_work *work)
+static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
{
+ struct btrfs_workers *workers = worker->workers;
unsigned long flags;
+ rmb();
+ if (!workers->atomic_start_pending)
+ return;
+
+ spin_lock_irqsave(&workers->lock, flags);
+ if (!workers->atomic_start_pending)
+ goto out;
+
+ workers->atomic_start_pending = 0;
+ if (workers->num_workers >= workers->max_workers)
+ goto out;
+
+ spin_unlock_irqrestore(&workers->lock, flags);
+ btrfs_start_workers(workers, 1);
+ return;
+
+out:
+ spin_unlock_irqrestore(&workers->lock, flags);
+}
+
+static noinline int run_ordered_completions(struct btrfs_workers *workers,
+ struct btrfs_work *work)
+{
if (!workers->ordered)
return 0;
set_bit(WORK_DONE_BIT, &work->flags);
- spin_lock_irqsave(&workers->lock, flags);
+ spin_lock(&workers->order_lock);
while (1) {
if (!list_empty(&workers->prio_order_list)) {
if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
break;
- spin_unlock_irqrestore(&workers->lock, flags);
+ spin_unlock(&workers->order_lock);
work->ordered_func(work);
/* now take the lock again and call the freeing code */
- spin_lock_irqsave(&workers->lock, flags);
+ spin_lock(&workers->order_lock);
list_del(&work->order_list);
work->ordered_free(work);
}
- spin_unlock_irqrestore(&workers->lock, flags);
+ spin_unlock(&workers->order_lock);
return 0;
}
+static void put_worker(struct btrfs_worker_thread *worker)
+{
+ if (atomic_dec_and_test(&worker->refs))
+ kfree(worker);
+}
+
+static int try_worker_shutdown(struct btrfs_worker_thread *worker)
+{
+ int freeit = 0;
+
+ spin_lock_irq(&worker->lock);
+ spin_lock(&worker->workers->lock);
+ if (worker->workers->num_workers > 1 &&
+ worker->idle &&
+ !worker->working &&
+ !list_empty(&worker->worker_list) &&
+ list_empty(&worker->prio_pending) &&
+ list_empty(&worker->pending) &&
+ atomic_read(&worker->num_pending) == 0) {
+ freeit = 1;
+ list_del_init(&worker->worker_list);
+ worker->workers->num_workers--;
+ }
+ spin_unlock(&worker->workers->lock);
+ spin_unlock_irq(&worker->lock);
+
+ if (freeit)
+ put_worker(worker);
+ return freeit;
+}
+
+static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
+ struct list_head *prio_head,
+ struct list_head *head)
+{
+ struct btrfs_work *work = NULL;
+ struct list_head *cur = NULL;
+
+ if(!list_empty(prio_head))
+ cur = prio_head->next;
+
+ smp_mb();
+ if (!list_empty(&worker->prio_pending))
+ goto refill;
+
+ if (!list_empty(head))
+ cur = head->next;
+
+ if (cur)
+ goto out;
+
+refill:
+ spin_lock_irq(&worker->lock);
+ list_splice_tail_init(&worker->prio_pending, prio_head);
+ list_splice_tail_init(&worker->pending, head);
+
+ if (!list_empty(prio_head))
+ cur = prio_head->next;
+ else if (!list_empty(head))
+ cur = head->next;
+ spin_unlock_irq(&worker->lock);
+
+ if (!cur)
+ goto out_fail;
+
+out:
+ work = list_entry(cur, struct btrfs_work, list);
+
+out_fail:
+ return work;
+}
+
/*
* main loop for servicing work items
*/
static int worker_loop(void *arg)
{
struct btrfs_worker_thread *worker = arg;
- struct list_head *cur;
+ struct list_head head;
+ struct list_head prio_head;
struct btrfs_work *work;
+
+ INIT_LIST_HEAD(&head);
+ INIT_LIST_HEAD(&prio_head);
+
do {
- spin_lock_irq(&worker->lock);
-again_locked:
+again:
while (1) {
- if (!list_empty(&worker->prio_pending))
- cur = worker->prio_pending.next;
- else if (!list_empty(&worker->pending))
- cur = worker->pending.next;
- else
+
+
+ work = get_next_work(worker, &prio_head, &head);
+ if (!work)
break;
- work = list_entry(cur, struct btrfs_work, list);
list_del(&work->list);
clear_bit(WORK_QUEUED_BIT, &work->flags);
work->worker = worker;
- spin_unlock_irq(&worker->lock);
work->func(work);
*/
run_ordered_completions(worker->workers, work);
- spin_lock_irq(&worker->lock);
- check_idle_worker(worker);
+ check_pending_worker_creates(worker);
+
}
+
+ spin_lock_irq(&worker->lock);
+ check_idle_worker(worker);
+
if (freezing(current)) {
worker->working = 0;
spin_unlock_irq(&worker->lock);
spin_lock_irq(&worker->lock);
set_current_state(TASK_INTERRUPTIBLE);
if (!list_empty(&worker->pending) ||
- !list_empty(&worker->prio_pending))
- goto again_locked;
+ !list_empty(&worker->prio_pending)) {
+ spin_unlock_irq(&worker->lock);
+ goto again;
+ }
/*
* this makes sure we get a wakeup when someone
worker->working = 0;
spin_unlock_irq(&worker->lock);
- if (!kthread_should_stop())
- schedule();
+ if (!kthread_should_stop()) {
+ schedule_timeout(HZ * 120);
+ if (!worker->working &&
+ try_worker_shutdown(worker)) {
+ return 0;
+ }
+ }
}
__set_current_state(TASK_RUNNING);
}
{
struct list_head *cur;
struct btrfs_worker_thread *worker;
+ int can_stop;
+ spin_lock_irq(&workers->lock);
list_splice_init(&workers->idle_list, &workers->worker_list);
while (!list_empty(&workers->worker_list)) {
cur = workers->worker_list.next;
worker = list_entry(cur, struct btrfs_worker_thread,
worker_list);
- kthread_stop(worker->task);
- list_del(&worker->worker_list);
- kfree(worker);
+
+ atomic_inc(&worker->refs);
+ workers->num_workers -= 1;
+ if (!list_empty(&worker->worker_list)) {
+ list_del_init(&worker->worker_list);
+ put_worker(worker);
+ can_stop = 1;
+ } else
+ can_stop = 0;
+ spin_unlock_irq(&workers->lock);
+ if (can_stop)
+ kthread_stop(worker->task);
+ spin_lock_irq(&workers->lock);
+ put_worker(worker);
}
+ spin_unlock_irq(&workers->lock);
return 0;
}
INIT_LIST_HEAD(&workers->order_list);
INIT_LIST_HEAD(&workers->prio_order_list);
spin_lock_init(&workers->lock);
+ spin_lock_init(&workers->order_lock);
workers->max_workers = max;
workers->idle_thresh = 32;
workers->name = name;
workers->ordered = 0;
+ workers->atomic_start_pending = 0;
+ workers->atomic_worker_start = 0;
}
/*
INIT_LIST_HEAD(&worker->prio_pending);
INIT_LIST_HEAD(&worker->worker_list);
spin_lock_init(&worker->lock);
+
atomic_set(&worker->num_pending, 0);
+ atomic_set(&worker->refs, 1);
worker->workers = workers;
worker->task = kthread_run(worker_loop, worker,
"btrfs-%s-%d", workers->name,
kfree(worker);
goto fail;
}
-
spin_lock_irq(&workers->lock);
list_add_tail(&worker->worker_list, &workers->idle_list);
worker->idle = 1;
*/
next = workers->worker_list.next;
worker = list_entry(next, struct btrfs_worker_thread, worker_list);
- atomic_inc(&worker->num_pending);
worker->sequence++;
if (worker->sequence % workers->idle_thresh == 0)
{
struct btrfs_worker_thread *worker;
unsigned long flags;
+ struct list_head *fallback;
again:
spin_lock_irqsave(&workers->lock, flags);
worker = next_worker(workers);
- spin_unlock_irqrestore(&workers->lock, flags);
if (!worker) {
- spin_lock_irqsave(&workers->lock, flags);
if (workers->num_workers >= workers->max_workers) {
- struct list_head *fallback = NULL;
- /*
- * we have failed to find any workers, just
- * return the force one
- */
- if (!list_empty(&workers->worker_list))
- fallback = workers->worker_list.next;
- if (!list_empty(&workers->idle_list))
- fallback = workers->idle_list.next;
- BUG_ON(!fallback);
- worker = list_entry(fallback,
- struct btrfs_worker_thread, worker_list);
- spin_unlock_irqrestore(&workers->lock, flags);
+ goto fallback;
+ } else if (workers->atomic_worker_start) {
+ workers->atomic_start_pending = 1;
+ goto fallback;
} else {
spin_unlock_irqrestore(&workers->lock, flags);
/* we're below the limit, start another worker */
goto again;
}
}
+ goto found;
+
+fallback:
+ fallback = NULL;
+ /*
+ * we have failed to find any workers, just
+ * return the first one we can find.
+ */
+ if (!list_empty(&workers->worker_list))
+ fallback = workers->worker_list.next;
+ if (!list_empty(&workers->idle_list))
+ fallback = workers->idle_list.next;
+ BUG_ON(!fallback);
+ worker = list_entry(fallback,
+ struct btrfs_worker_thread, worker_list);
+found:
+ /*
+ * this makes sure the worker doesn't exit before it is placed
+ * onto a busy/idle list
+ */
+ atomic_inc(&worker->num_pending);
+ spin_unlock_irqrestore(&workers->lock, flags);
return worker;
}
spin_lock(&worker->workers->lock);
worker->idle = 0;
list_move_tail(&worker->worker_list,
- &worker->workers->worker_list);
+ &worker->workers->worker_list);
spin_unlock(&worker->workers->lock);
}
if (!worker->working) {
worker->working = 1;
}
- spin_unlock_irqrestore(&worker->lock, flags);
if (wake)
wake_up_process(worker->task);
+ spin_unlock_irqrestore(&worker->lock, flags);
out:
return 0;
worker = find_worker(workers);
if (workers->ordered) {
- spin_lock_irqsave(&workers->lock, flags);
+ /*
+ * you're not allowed to do ordered queues from an
+ * interrupt handler
+ */
+ spin_lock(&workers->order_lock);
if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
list_add_tail(&work->order_list,
&workers->prio_order_list);
} else {
list_add_tail(&work->order_list, &workers->order_list);
}
- spin_unlock_irqrestore(&workers->lock, flags);
+ spin_unlock(&workers->order_lock);
} else {
INIT_LIST_HEAD(&work->order_list);
}
list_add_tail(&work->list, &worker->prio_pending);
else
list_add_tail(&work->list, &worker->pending);
- atomic_inc(&worker->num_pending);
check_busy_worker(worker);
/*
wake = 1;
worker->working = 1;
- spin_unlock_irqrestore(&worker->lock, flags);
-
if (wake)
wake_up_process(worker->task);
+ spin_unlock_irqrestore(&worker->lock, flags);
+
out:
return 0;
}
/* force completions in the order they were queued */
int ordered;
+ /* more workers required, but in an interrupt handler */
+ int atomic_start_pending;
+
+ /*
+ * are we allowed to sleep while starting workers or are we required
+ * to start them at a later time?
+ */
+ int atomic_worker_start;
+
/* list with all the work threads. The workers on the idle thread
* may be actively servicing jobs, but they haven't yet hit the
* idle thresh limit above.
/* lock for finding the next worker thread to queue on */
spinlock_t lock;
+ /* lock for the ordered lists */
+ spinlock_t order_lock;
+
/* extra name for this worker, used for current->name */
char *name;
};
* of these.
*/
unsigned ordered_data_close:1;
+ unsigned dummy_inode:1;
struct inode vfs_inode;
};
*/
set_page_extent_mapped(page);
lock_extent(tree, last_offset, end, GFP_NOFS);
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, last_offset,
PAGE_CACHE_SIZE);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (!em || last_offset < em->start ||
(last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
em_tree = &BTRFS_I(inode)->extent_tree;
/* we need the actual starting offset of this extent in the file */
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree,
page_offset(bio->bi_io_vec->bv_page),
PAGE_CACHE_SIZE);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
int split;
int num_doubles = 0;
+ l = path->nodes[0];
+ slot = path->slots[0];
+ if (extend && data_size + btrfs_item_size_nr(l, slot) +
+ sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
+ return -EOVERFLOW;
+
/* first try to make some room by pushing left and right */
if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
wret = push_leaf_right(trans, root, path, data_size, 0);
*/
#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
+#define BTRFS_BTREE_INODE_OBJECTID 1
+
+#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
+
/*
* we can actually store much bigger names, but lets not confuse the rest
* of linux
u64 bytes_reserved; /* total bytes the allocator has reserved for
current allocations */
u64 bytes_readonly; /* total bytes that are read only */
+ u64 bytes_super; /* total bytes reserved for the super blocks */
/* delalloc accounting */
u64 bytes_delalloc; /* number of bytes reserved for allocation,
BTRFS_CACHE_FINISHED = 2,
};
+struct btrfs_caching_control {
+ struct list_head list;
+ struct mutex mutex;
+ wait_queue_head_t wait;
+ struct btrfs_block_group_cache *block_group;
+ u64 progress;
+ atomic_t count;
+};
+
struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
spinlock_t lock;
u64 pinned;
u64 reserved;
+ u64 bytes_super;
u64 flags;
u64 sectorsize;
int extents_thresh;
int dirty;
/* cache tracking stuff */
- wait_queue_head_t caching_q;
int cached;
+ struct btrfs_caching_control *caching_ctl;
+ u64 last_byte_to_unpin;
struct btrfs_space_info *space_info;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
+
+ spinlock_t fs_roots_radix_lock;
struct radix_tree_root fs_roots_radix;
/* block group cache stuff */
spinlock_t block_group_cache_lock;
struct rb_root block_group_cache_tree;
- struct extent_io_tree pinned_extents;
+ struct extent_io_tree freed_extents[2];
+ struct extent_io_tree *pinned_extents;
/* logical->physical extent mapping */
struct btrfs_mapping_tree mapping_tree;
struct mutex transaction_kthread_mutex;
struct mutex cleaner_mutex;
struct mutex chunk_mutex;
- struct mutex drop_mutex;
struct mutex volume_mutex;
- struct mutex tree_reloc_mutex;
- struct rw_semaphore extent_commit_sem;
-
/*
* this protects the ordered operations list only while we are
* processing all of the entries on it. This way we make
* before jumping into the main commit.
*/
struct mutex ordered_operations_mutex;
+ struct rw_semaphore extent_commit_sem;
+
+ struct rw_semaphore subvol_sem;
+
+ struct srcu_struct subvol_srcu;
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
+ struct list_head caching_block_groups;
atomic_t nr_async_submits;
atomic_t async_submit_draining;
u32 stripesize;
u32 type;
- u64 highest_inode;
- u64 last_inode_alloc;
+
+ u64 highest_objectid;
int ref_cows;
int track_dirty;
+ int in_radix;
+
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
-int btrfs_update_pinned_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int pin);
+int btrfs_pin_extent(struct btrfs_root *root,
+ u64 bytenr, u64 num, int reserved);
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *leaf);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
u64 root_objectid, u64 owner, u64 offset);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_io_tree *unpin);
+ struct btrfs_root *root);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_root *root);
+int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr);
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytes_used,
u64 type, u64 chunk_objectid, u64 chunk_offset,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
-void btrfs_free_pinned_extents(struct btrfs_fs_info *info);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
struct extent_buffer *parent);
/* root-item.c */
int btrfs_find_root_ref(struct btrfs_root *tree_root,
- struct btrfs_path *path,
- u64 root_id, u64 ref_id);
+ struct btrfs_path *path,
+ u64 root_id, u64 ref_id);
int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root,
- u64 root_id, u8 type, u64 ref_id,
- u64 dirid, u64 sequence,
+ u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
+ const char *name, int name_len);
+int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *tree_root,
+ u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
const char *name, int name_len);
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_key *key);
int btrfs_search_root(struct btrfs_root *root, u64 search_start,
u64 *found_objectid);
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
+int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
int btrfs_set_root_node(struct btrfs_root_item *item,
struct extent_buffer *node);
/* dir-item.c */
struct btrfs_path *path, u64 dir,
u64 objectid, const char *name, int name_len,
int mod);
+struct btrfs_dir_item *
+btrfs_search_dir_index_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 dirid,
+ const char *name, int name_len);
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len);
struct btrfs_root *root, u64 offset);
int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset);
+int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
/* inode-map.c */
int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
int btrfs_add_link(struct btrfs_trans_handle *trans,
struct inode *parent_inode, struct inode *inode,
const char *name, int name_len, int add_backref, u64 index);
+int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *dir, u64 objectid,
+ const char *name, int name_len);
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode, u64 new_size,
int btrfs_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *new_root, struct dentry *dentry,
+ struct btrfs_root *new_root,
u64 new_dirid, u64 alloc_hint);
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio, unsigned long bio_flags);
void btrfs_dirty_inode(struct inode *inode);
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
+void btrfs_drop_inode(struct inode *inode);
int btrfs_init_cachep(void);
void btrfs_destroy_cachep(void);
long btrfs_ioctl_trans_end(struct file *file);
int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
void btrfs_orphan_cleanup(struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t size);
+int btrfs_invalidate_inodes(struct btrfs_root *root);
+extern struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
u64 start, u64 end, u64 locked_end,
- u64 inline_limit, u64 *hint_block);
+ u64 inline_limit, u64 *hint_block, int drop_cache);
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode, u64 start, u64 end);
return btrfs_match_dir_item_name(root, path, name, name_len);
}
+struct btrfs_dir_item *
+btrfs_search_dir_index_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 dirid,
+ const char *name, int name_len)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_dir_item *di;
+ struct btrfs_key key;
+ u32 nritems;
+ int ret;
+
+ key.objectid = dirid;
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+
+ while (1) {
+ if (path->slots[0] >= nritems) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0)
+ break;
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY)
+ break;
+
+ di = btrfs_match_dir_item_name(root, path, name, name_len);
+ if (di)
+ return di;
+
+ path->slots[0]++;
+ }
+ return NULL;
+}
+
struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
+static void free_fs_root(struct btrfs_root *root);
static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
struct extent_map *em;
int ret;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (em) {
em->bdev =
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
goto out;
}
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
em = alloc_extent_map(GFP_NOFS);
if (!em) {
em->block_start = 0;
em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
if (ret == -EEXIST) {
u64 failed_start = em->start;
free_extent_map(em);
em = NULL;
}
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret)
em = ERR_PTR(ret);
root->fs_info = fs_info;
root->objectid = objectid;
root->last_trans = 0;
- root->highest_inode = 0;
- root->last_inode_alloc = 0;
+ root->highest_objectid = 0;
root->name = NULL;
root->in_sysfs = 0;
root->inode_tree.rb_node = NULL;
root, fs_info, objectid);
ret = btrfs_find_last_root(tree_root, objectid,
&root->root_item, &root->root_key);
+ if (ret > 0)
+ return -ENOENT;
BUG_ON(ret);
generation = btrfs_root_generation(&root->root_item);
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
- root->commit_root = btrfs_root_node(root);
BUG_ON(!root->node);
+ root->commit_root = btrfs_root_node(root);
return 0;
}
struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_path *path;
struct extent_buffer *l;
- u64 highest_inode;
u64 generation;
u32 blocksize;
int ret = 0;
kfree(root);
return ERR_PTR(ret);
}
- goto insert;
+ goto out;
}
__setup_root(tree_root->nodesize, tree_root->leafsize,
path = btrfs_alloc_path();
BUG_ON(!path);
ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
- if (ret != 0) {
- if (ret > 0)
- ret = -ENOENT;
- goto out;
+ if (ret == 0) {
+ l = path->nodes[0];
+ read_extent_buffer(l, &root->root_item,
+ btrfs_item_ptr_offset(l, path->slots[0]),
+ sizeof(root->root_item));
+ memcpy(&root->root_key, location, sizeof(*location));
}
- l = path->nodes[0];
- read_extent_buffer(l, &root->root_item,
- btrfs_item_ptr_offset(l, path->slots[0]),
- sizeof(root->root_item));
- memcpy(&root->root_key, location, sizeof(*location));
- ret = 0;
-out:
- btrfs_release_path(root, path);
btrfs_free_path(path);
if (ret) {
- kfree(root);
+ if (ret > 0)
+ ret = -ENOENT;
return ERR_PTR(ret);
}
+
generation = btrfs_root_generation(&root->root_item);
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
root->commit_root = btrfs_root_node(root);
BUG_ON(!root->node);
-insert:
- if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
+out:
+ if (location->objectid != BTRFS_TREE_LOG_OBJECTID)
root->ref_cows = 1;
- ret = btrfs_find_highest_inode(root, &highest_inode);
- if (ret == 0) {
- root->highest_inode = highest_inode;
- root->last_inode_alloc = highest_inode;
- }
- }
+
return root;
}
return fs_info->dev_root;
if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
return fs_info->csum_root;
-
+again:
+ spin_lock(&fs_info->fs_roots_radix_lock);
root = radix_tree_lookup(&fs_info->fs_roots_radix,
(unsigned long)location->objectid);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
if (root)
return root;
+ ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
+ if (ret == 0)
+ ret = -ENOENT;
+ if (ret < 0)
+ return ERR_PTR(ret);
+
root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
if (IS_ERR(root))
return root;
+ WARN_ON(btrfs_root_refs(&root->root_item) == 0);
set_anon_super(&root->anon_super, NULL);
+ ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+ if (ret)
+ goto fail;
+
+ spin_lock(&fs_info->fs_roots_radix_lock);
ret = radix_tree_insert(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
root);
+ if (ret == 0)
+ root->in_radix = 1;
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+ radix_tree_preload_end();
if (ret) {
- free_extent_buffer(root->node);
- kfree(root);
- return ERR_PTR(ret);
+ if (ret == -EEXIST) {
+ free_fs_root(root);
+ goto again;
+ }
+ goto fail;
}
- if (!(fs_info->sb->s_flags & MS_RDONLY)) {
- ret = btrfs_find_dead_roots(fs_info->tree_root,
- root->root_key.objectid);
- BUG_ON(ret);
+
+ ret = btrfs_find_dead_roots(fs_info->tree_root,
+ root->root_key.objectid);
+ WARN_ON(ret);
+
+ if (!(fs_info->sb->s_flags & MS_RDONLY))
btrfs_orphan_cleanup(root);
- }
+
return root;
+fail:
+ free_fs_root(root);
+ return ERR_PTR(ret);
}
struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *location,
const char *name, int namelen)
{
+ return btrfs_read_fs_root_no_name(fs_info, location);
+#if 0
struct btrfs_root *root;
int ret;
kfree(root);
return ERR_PTR(ret);
}
-#if 0
+
ret = btrfs_sysfs_add_root(root);
if (ret) {
free_extent_buffer(root->node);
kfree(root);
return ERR_PTR(ret);
}
-#endif
root->in_sysfs = 1;
return root;
+#endif
}
static int btrfs_congested_fn(void *congested_data, int bdi_bits)
offset = page_offset(page);
em_tree = &BTRFS_I(inode)->extent_tree;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (!em) {
__unplug_io_fn(bdi, page);
return;
err = bdi_register(bdi, NULL, "btrfs-%d",
atomic_inc_return(&btrfs_bdi_num));
- if (err)
+ if (err) {
+ bdi_destroy(bdi);
return err;
+ }
bdi->ra_pages = default_backing_dev_info.ra_pages;
bdi->unplug_io_fn = btrfs_unplug_io_fn;
break;
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
- mutex_lock(&root->fs_info->cleaner_mutex);
- btrfs_clean_old_snapshots(root);
- mutex_unlock(&root->fs_info->cleaner_mutex);
+
+ if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
+ mutex_trylock(&root->fs_info->cleaner_mutex)) {
+ btrfs_clean_old_snapshots(root);
+ mutex_unlock(&root->fs_info->cleaner_mutex);
+ }
if (freezing(current)) {
refrigerator();
err = -ENOMEM;
goto fail;
}
- INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
+
+ ret = init_srcu_struct(&fs_info->subvol_srcu);
+ if (ret) {
+ err = ret;
+ goto fail;
+ }
+
+ ret = setup_bdi(fs_info, &fs_info->bdi);
+ if (ret) {
+ err = ret;
+ goto fail_srcu;
+ }
+
+ fs_info->btree_inode = new_inode(sb);
+ if (!fs_info->btree_inode) {
+ err = -ENOMEM;
+ goto fail_bdi;
+ }
+
+ INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->hashers);
INIT_LIST_HEAD(&fs_info->delalloc_inodes);
INIT_LIST_HEAD(&fs_info->ordered_operations);
+ INIT_LIST_HEAD(&fs_info->caching_block_groups);
spin_lock_init(&fs_info->delalloc_lock);
spin_lock_init(&fs_info->new_trans_lock);
spin_lock_init(&fs_info->ref_cache_lock);
+ spin_lock_init(&fs_info->fs_roots_radix_lock);
init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root;
fs_info->sb = sb;
fs_info->max_extent = (u64)-1;
fs_info->max_inline = 8192 * 1024;
- if (setup_bdi(fs_info, &fs_info->bdi))
- goto fail_bdi;
- fs_info->btree_inode = new_inode(sb);
- fs_info->btree_inode->i_ino = 1;
- fs_info->btree_inode->i_nlink = 1;
fs_info->metadata_ratio = 8;
fs_info->thread_pool_size = min_t(unsigned long,
sb->s_blocksize_bits = blksize_bits(4096);
sb->s_bdi = &fs_info->bdi;
+ fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
+ fs_info->btree_inode->i_nlink = 1;
/*
* we set the i_size on the btree inode to the max possible int.
* the real end of the address space is determined by all of
BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
+ BTRFS_I(fs_info->btree_inode)->root = tree_root;
+ memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
+ sizeof(struct btrfs_key));
+ BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
+ insert_inode_hash(fs_info->btree_inode);
+
spin_lock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree.rb_node = NULL;
- extent_io_tree_init(&fs_info->pinned_extents,
+ extent_io_tree_init(&fs_info->freed_extents[0],
fs_info->btree_inode->i_mapping, GFP_NOFS);
+ extent_io_tree_init(&fs_info->freed_extents[1],
+ fs_info->btree_inode->i_mapping, GFP_NOFS);
+ fs_info->pinned_extents = &fs_info->freed_extents[0];
fs_info->do_barriers = 1;
- BTRFS_I(fs_info->btree_inode)->root = tree_root;
- memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
- sizeof(struct btrfs_key));
- insert_inode_hash(fs_info->btree_inode);
mutex_init(&fs_info->trans_mutex);
mutex_init(&fs_info->ordered_operations_mutex);
mutex_init(&fs_info->tree_log_mutex);
- mutex_init(&fs_info->drop_mutex);
mutex_init(&fs_info->chunk_mutex);
mutex_init(&fs_info->transaction_kthread_mutex);
mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex);
- mutex_init(&fs_info->tree_reloc_mutex);
init_rwsem(&fs_info->extent_commit_sem);
+ init_rwsem(&fs_info->subvol_sem);
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
err = -EINVAL;
goto fail_iput;
}
-
+printk("thread pool is %d\n", fs_info->thread_pool_size);
/*
* we need to start all the end_io workers up front because the
* queue work function gets called at interrupt time, and so it
fs_info->endio_workers.idle_thresh = 4;
fs_info->endio_meta_workers.idle_thresh = 4;
- fs_info->endio_write_workers.idle_thresh = 64;
- fs_info->endio_meta_write_workers.idle_thresh = 64;
+ fs_info->endio_write_workers.idle_thresh = 2;
+ fs_info->endio_meta_write_workers.idle_thresh = 2;
+
+ fs_info->endio_workers.atomic_worker_start = 1;
+ fs_info->endio_meta_workers.atomic_worker_start = 1;
+ fs_info->endio_write_workers.atomic_worker_start = 1;
+ fs_info->endio_meta_write_workers.atomic_worker_start = 1;
btrfs_start_workers(&fs_info->workers, 1);
btrfs_start_workers(&fs_info->submit_workers, 1);
btrfs_start_workers(&fs_info->delalloc_workers, 1);
btrfs_start_workers(&fs_info->fixup_workers, 1);
- btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
- btrfs_start_workers(&fs_info->endio_meta_workers,
- fs_info->thread_pool_size);
- btrfs_start_workers(&fs_info->endio_meta_write_workers,
- fs_info->thread_pool_size);
- btrfs_start_workers(&fs_info->endio_write_workers,
- fs_info->thread_pool_size);
+ btrfs_start_workers(&fs_info->endio_workers, 1);
+ btrfs_start_workers(&fs_info->endio_meta_workers, 1);
+ btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
+ btrfs_start_workers(&fs_info->endio_write_workers, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
}
}
+ ret = btrfs_find_orphan_roots(tree_root);
+ BUG_ON(ret);
+
if (!(sb->s_flags & MS_RDONLY)) {
ret = btrfs_recover_relocation(tree_root);
BUG_ON(ret);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
fail_bdi:
bdi_destroy(&fs_info->bdi);
+fail_srcu:
+ cleanup_srcu_struct(&fs_info->subvol_srcu);
fail:
kfree(extent_root);
kfree(tree_root);
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
{
- WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
+ spin_lock(&fs_info->fs_roots_radix_lock);
radix_tree_delete(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+
+ if (btrfs_root_refs(&root->root_item) == 0)
+ synchronize_srcu(&fs_info->subvol_srcu);
+
+ free_fs_root(root);
+ return 0;
+}
+
+static void free_fs_root(struct btrfs_root *root)
+{
+ WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
if (root->anon_super.s_dev) {
down_write(&root->anon_super.s_umount);
kill_anon_super(&root->anon_super);
}
- if (root->node)
- free_extent_buffer(root->node);
- if (root->commit_root)
- free_extent_buffer(root->commit_root);
+ free_extent_buffer(root->node);
+ free_extent_buffer(root->commit_root);
kfree(root->name);
kfree(root);
- return 0;
}
static int del_fs_roots(struct btrfs_fs_info *fs_info)
struct btrfs_root *gang[8];
int i;
+ while (!list_empty(&fs_info->dead_roots)) {
+ gang[0] = list_entry(fs_info->dead_roots.next,
+ struct btrfs_root, root_list);
+ list_del(&gang[0]->root_list);
+
+ if (gang[0]->in_radix) {
+ btrfs_free_fs_root(fs_info, gang[0]);
+ } else {
+ free_extent_buffer(gang[0]->node);
+ free_extent_buffer(gang[0]->commit_root);
+ kfree(gang[0]);
+ }
+ }
+
while (1) {
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
(void **)gang, 0,
root_objectid = gang[ret - 1]->root_key.objectid + 1;
for (i = 0; i < ret; i++) {
root_objectid = gang[i]->root_key.objectid;
- ret = btrfs_find_dead_roots(fs_info->tree_root,
- root_objectid);
- BUG_ON(ret);
btrfs_orphan_cleanup(gang[i]);
}
root_objectid++;
free_extent_buffer(root->fs_info->csum_root->commit_root);
btrfs_free_block_groups(root->fs_info);
- btrfs_free_pinned_extents(root->fs_info);
del_fs_roots(fs_info);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
bdi_destroy(&fs_info->bdi);
+ cleanup_srcu_struct(&fs_info->subvol_srcu);
kfree(fs_info->extent_root);
kfree(fs_info->tree_root);
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
type = FILEID_BTRFS_WITHOUT_PARENT;
- fid->objectid = BTRFS_I(inode)->location.objectid;
+ fid->objectid = inode->i_ino;
fid->root_objectid = BTRFS_I(inode)->root->objectid;
fid->gen = inode->i_generation;
}
static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
- u64 root_objectid, u32 generation)
+ u64 root_objectid, u32 generation,
+ int check_generation)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info;
struct btrfs_root *root;
+ struct dentry *dentry;
struct inode *inode;
struct btrfs_key key;
+ int index;
+ int err = 0;
+
+ if (objectid < BTRFS_FIRST_FREE_OBJECTID)
+ return ERR_PTR(-ESTALE);
key.objectid = root_objectid;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
key.offset = (u64)-1;
- root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key);
- if (IS_ERR(root))
- return ERR_CAST(root);
+ index = srcu_read_lock(&fs_info->subvol_srcu);
+
+ root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(root)) {
+ err = PTR_ERR(root);
+ goto fail;
+ }
+
+ if (btrfs_root_refs(&root->root_item) == 0) {
+ err = -ENOENT;
+ goto fail;
+ }
key.objectid = objectid;
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
key.offset = 0;
inode = btrfs_iget(sb, &key, root);
- if (IS_ERR(inode))
- return (void *)inode;
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto fail;
+ }
+
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
- if (generation != inode->i_generation) {
+ if (check_generation && generation != inode->i_generation) {
iput(inode);
return ERR_PTR(-ESTALE);
}
- return d_obtain_alias(inode);
+ dentry = d_obtain_alias(inode);
+ if (!IS_ERR(dentry))
+ dentry->d_op = &btrfs_dentry_operations;
+ return dentry;
+fail:
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ return ERR_PTR(err);
}
static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
objectid = fid->parent_objectid;
generation = fid->parent_gen;
- return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+ return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
}
static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
root_objectid = fid->root_objectid;
generation = fid->gen;
- return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+ return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
}
static struct dentry *btrfs_get_parent(struct dentry *child)
{
struct inode *dir = child->d_inode;
+ static struct dentry *dentry;
struct btrfs_root *root = BTRFS_I(dir)->root;
- struct btrfs_key key;
struct btrfs_path *path;
struct extent_buffer *leaf;
- int slot;
- u64 objectid;
+ struct btrfs_root_ref *ref;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
int ret;
path = btrfs_alloc_path();
- key.objectid = dir->i_ino;
- btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
- key.offset = (u64)-1;
+ if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+ key.objectid = root->root_key.objectid;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = (u64)-1;
+ root = root->fs_info->tree_root;
+ } else {
+ key.objectid = dir->i_ino;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = (u64)-1;
+ }
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0) {
- /* Error */
- btrfs_free_path(path);
- return ERR_PTR(ret);
+ if (ret < 0)
+ goto fail;
+
+ BUG_ON(ret == 0);
+ if (path->slots[0] == 0) {
+ ret = -ENOENT;
+ goto fail;
}
+
+ path->slots[0]--;
leaf = path->nodes[0];
- slot = path->slots[0];
- if (ret) {
- /* btrfs_search_slot() returns the slot where we'd want to
- insert a backref for parent inode #0xFFFFFFFFFFFFFFFF.
- The _real_ backref, telling us what the parent inode
- _actually_ is, will be in the slot _before_ the one
- that btrfs_search_slot() returns. */
- if (!slot) {
- /* Unless there is _no_ key in the tree before... */
- btrfs_free_path(path);
- return ERR_PTR(-EIO);
- }
- slot--;
+
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid != key.objectid || found_key.type != key.type) {
+ ret = -ENOENT;
+ goto fail;
}
- btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
+ ref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_root_ref);
+ key.objectid = btrfs_root_ref_dirid(leaf, ref);
+ } else {
+ key.objectid = found_key.offset;
+ }
btrfs_free_path(path);
- if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
- return ERR_PTR(-EINVAL);
-
- objectid = key.offset;
-
- /* If we are already at the root of a subvol, return the real root */
- if (objectid == dir->i_ino)
- return dget(dir->i_sb->s_root);
+ if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
+ return btrfs_get_dentry(root->fs_info->sb, key.objectid,
+ found_key.offset, 0, 0);
+ }
- /* Build a new key for the inode item */
- key.objectid = objectid;
- btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+ key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
-
- return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
+ dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
+ if (!IS_ERR(dentry))
+ dentry->d_op = &btrfs_dentry_operations;
+ return dentry;
+fail:
+ btrfs_free_path(path);
+ return ERR_PTR(ret);
}
const struct export_operations btrfs_export_ops = {
#include "locking.h"
#include "free-space-cache.h"
-static int update_reserved_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int reserve);
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free);
+static int update_reserved_extents(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 parent, u64 root_objectid,
u64 flags, struct btrfs_disk_key *key,
int level, struct btrfs_key *ins);
-
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force);
+static int pin_down_bytes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 num_bytes,
+ int is_data, int reserved,
+ struct extent_buffer **must_clean);
+static int find_next_key(struct btrfs_path *path, int level,
+ struct btrfs_key *key);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
return ret;
}
-/*
- * We always set EXTENT_LOCKED for the super mirror extents so we don't
- * overwrite them, so those bits need to be unset. Also, if we are unmounting
- * with pinned extents still sitting there because we had a block group caching,
- * we need to clear those now, since we are done.
- */
-void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
+static int add_excluded_extent(struct btrfs_root *root,
+ u64 start, u64 num_bytes)
{
- u64 start, end, last = 0;
- int ret;
+ u64 end = start + num_bytes - 1;
+ set_extent_bits(&root->fs_info->freed_extents[0],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ set_extent_bits(&root->fs_info->freed_extents[1],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ return 0;
+}
- while (1) {
- ret = find_first_extent_bit(&info->pinned_extents, last,
- &start, &end,
- EXTENT_LOCKED|EXTENT_DIRTY);
- if (ret)
- break;
+static void free_excluded_extents(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ u64 start, end;
- clear_extent_bits(&info->pinned_extents, start, end,
- EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS);
- last = end+1;
- }
+ start = cache->key.objectid;
+ end = start + cache->key.offset - 1;
+
+ clear_extent_bits(&root->fs_info->freed_extents[0],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ clear_extent_bits(&root->fs_info->freed_extents[1],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
}
-static int remove_sb_from_cache(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache)
+static int exclude_super_stripes(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
u64 bytenr;
u64 *logical;
int stripe_len;
cache->key.objectid, bytenr,
0, &logical, &nr, &stripe_len);
BUG_ON(ret);
+
while (nr--) {
- try_lock_extent(&fs_info->pinned_extents,
- logical[nr],
- logical[nr] + stripe_len - 1, GFP_NOFS);
+ cache->bytes_super += stripe_len;
+ ret = add_excluded_extent(root, logical[nr],
+ stripe_len);
+ BUG_ON(ret);
}
+
kfree(logical);
}
-
return 0;
}
+static struct btrfs_caching_control *
+get_caching_control(struct btrfs_block_group_cache *cache)
+{
+ struct btrfs_caching_control *ctl;
+
+ spin_lock(&cache->lock);
+ if (cache->cached != BTRFS_CACHE_STARTED) {
+ spin_unlock(&cache->lock);
+ return NULL;
+ }
+
+ ctl = cache->caching_ctl;
+ atomic_inc(&ctl->count);
+ spin_unlock(&cache->lock);
+ return ctl;
+}
+
+static void put_caching_control(struct btrfs_caching_control *ctl)
+{
+ if (atomic_dec_and_test(&ctl->count))
+ kfree(ctl);
+}
+
/*
* this is only called by cache_block_group, since we could have freed extents
* we need to check the pinned_extents for any extents that can't be used yet
int ret;
while (start < end) {
- ret = find_first_extent_bit(&info->pinned_extents, start,
+ ret = find_first_extent_bit(info->pinned_extents, start,
&extent_start, &extent_end,
- EXTENT_DIRTY|EXTENT_LOCKED);
+ EXTENT_DIRTY | EXTENT_UPTODATE);
if (ret)
break;
{
struct btrfs_block_group_cache *block_group = data;
struct btrfs_fs_info *fs_info = block_group->fs_info;
- u64 last = 0;
+ struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
+ struct btrfs_root *extent_root = fs_info->extent_root;
struct btrfs_path *path;
- int ret = 0;
- struct btrfs_key key;
struct extent_buffer *leaf;
- int slot;
+ struct btrfs_key key;
u64 total_found = 0;
-
- BUG_ON(!fs_info);
+ u64 last = 0;
+ u32 nritems;
+ int ret = 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- atomic_inc(&block_group->space_info->caching_threads);
+ exclude_super_stripes(extent_root, block_group);
+ spin_lock(&block_group->space_info->lock);
+ block_group->space_info->bytes_super += block_group->bytes_super;
+ spin_unlock(&block_group->space_info->lock);
+
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+
/*
* We don't want to deadlock with somebody trying to allocate a new
* extent for the extent root while also trying to search the extent
key.objectid = last;
key.offset = 0;
- btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+ key.type = BTRFS_EXTENT_ITEM_KEY;
again:
+ mutex_lock(&caching_ctl->mutex);
/* need to make sure the commit_root doesn't disappear */
down_read(&fs_info->extent_commit_sem);
- ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
goto err;
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+
while (1) {
smp_mb();
- if (block_group->fs_info->closing > 1) {
+ if (fs_info->closing > 1) {
last = (u64)-1;
break;
}
- leaf = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(fs_info->extent_root, path);
- if (ret < 0)
- goto err;
- else if (ret)
+ if (path->slots[0] < nritems) {
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ } else {
+ ret = find_next_key(path, 0, &key);
+ if (ret)
break;
- if (need_resched() ||
- btrfs_transaction_in_commit(fs_info)) {
- leaf = path->nodes[0];
-
- /* this shouldn't happen, but if the
- * leaf is empty just move on.
- */
- if (btrfs_header_nritems(leaf) == 0)
- break;
- /*
- * we need to copy the key out so that
- * we are sure the next search advances
- * us forward in the btree.
- */
- btrfs_item_key_to_cpu(leaf, &key, 0);
- btrfs_release_path(fs_info->extent_root, path);
- up_read(&fs_info->extent_commit_sem);
+ caching_ctl->progress = last;
+ btrfs_release_path(extent_root, path);
+ up_read(&fs_info->extent_commit_sem);
+ mutex_unlock(&caching_ctl->mutex);
+ if (btrfs_transaction_in_commit(fs_info))
schedule_timeout(1);
- goto again;
- }
+ else
+ cond_resched();
+ goto again;
+ }
+ if (key.objectid < block_group->key.objectid) {
+ path->slots[0]++;
continue;
}
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid < block_group->key.objectid)
- goto next;
if (key.objectid >= block_group->key.objectid +
block_group->key.offset)
break;
- if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
+ if (key.type == BTRFS_EXTENT_ITEM_KEY) {
total_found += add_new_free_space(block_group,
fs_info, last,
key.objectid);
last = key.objectid + key.offset;
- }
- if (total_found > (1024 * 1024 * 2)) {
- total_found = 0;
- wake_up(&block_group->caching_q);
+ if (total_found > (1024 * 1024 * 2)) {
+ total_found = 0;
+ wake_up(&caching_ctl->wait);
+ }
}
-next:
path->slots[0]++;
}
ret = 0;
total_found += add_new_free_space(block_group, fs_info, last,
block_group->key.objectid +
block_group->key.offset);
+ caching_ctl->progress = (u64)-1;
spin_lock(&block_group->lock);
+ block_group->caching_ctl = NULL;
block_group->cached = BTRFS_CACHE_FINISHED;
spin_unlock(&block_group->lock);
err:
btrfs_free_path(path);
up_read(&fs_info->extent_commit_sem);
- atomic_dec(&block_group->space_info->caching_threads);
- wake_up(&block_group->caching_q);
+ free_excluded_extents(extent_root, block_group);
+
+ mutex_unlock(&caching_ctl->mutex);
+ wake_up(&caching_ctl->wait);
+
+ put_caching_control(caching_ctl);
+ atomic_dec(&block_group->space_info->caching_threads);
return 0;
}
static int cache_block_group(struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
+ struct btrfs_caching_control *caching_ctl;
struct task_struct *tsk;
int ret = 0;
+ smp_mb();
+ if (cache->cached != BTRFS_CACHE_NO)
+ return 0;
+
+ caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
+ BUG_ON(!caching_ctl);
+
+ INIT_LIST_HEAD(&caching_ctl->list);
+ mutex_init(&caching_ctl->mutex);
+ init_waitqueue_head(&caching_ctl->wait);
+ caching_ctl->block_group = cache;
+ caching_ctl->progress = cache->key.objectid;
+ /* one for caching kthread, one for caching block group list */
+ atomic_set(&caching_ctl->count, 2);
+
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
- return ret;
+ kfree(caching_ctl);
+ return 0;
}
+ cache->caching_ctl = caching_ctl;
cache->cached = BTRFS_CACHE_STARTED;
spin_unlock(&cache->lock);
+ down_write(&fs_info->extent_commit_sem);
+ list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
+ up_write(&fs_info->extent_commit_sem);
+
+ atomic_inc(&cache->space_info->caching_threads);
+
tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
cache->key.objectid);
if (IS_ERR(tsk)) {
parent, ref_root, flags,
ref->objectid, ref->offset,
&ins, node->ref_mod);
- update_reserved_extents(root, ins.objectid, ins.offset, 0);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
node->num_bytes, parent,
extent_op->flags_to_set,
&extent_op->key,
ref->level, &ins);
- update_reserved_extents(root, ins.objectid, ins.offset, 0);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
node->num_bytes, parent, ref_root,
BUG_ON(extent_op);
head = btrfs_delayed_node_to_head(node);
if (insert_reserved) {
+ int mark_free = 0;
+ struct extent_buffer *must_clean = NULL;
+
+ ret = pin_down_bytes(trans, root, NULL,
+ node->bytenr, node->num_bytes,
+ head->is_data, 1, &must_clean);
+ if (ret > 0)
+ mark_free = 1;
+
+ if (must_clean) {
+ clean_tree_block(NULL, root, must_clean);
+ btrfs_tree_unlock(must_clean);
+ free_extent_buffer(must_clean);
+ }
if (head->is_data) {
ret = btrfs_del_csums(trans, root,
node->bytenr,
node->num_bytes);
BUG_ON(ret);
}
- btrfs_update_pinned_extents(root, node->bytenr,
- node->num_bytes, 1);
- update_reserved_extents(root, node->bytenr,
- node->num_bytes, 0);
+ if (mark_free) {
+ ret = btrfs_free_reserved_extent(root,
+ node->bytenr,
+ node->num_bytes);
+ BUG_ON(ret);
+ }
}
mutex_unlock(&head->mutex);
return 0;
/* get the space info for where the metadata will live */
alloc_target = btrfs_get_alloc_profile(root, 0);
meta_sinfo = __find_space_info(info, alloc_target);
+ if (!meta_sinfo)
+ goto alloc;
again:
spin_lock(&meta_sinfo->lock);
do_div(thresh, 100);
if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
- meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
+ meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
+ meta_sinfo->bytes_super > thresh) {
struct btrfs_trans_handle *trans;
if (!meta_sinfo->full) {
meta_sinfo->force_alloc = 1;
spin_unlock(&meta_sinfo->lock);
-
+alloc:
trans = btrfs_start_transaction(root, 1);
if (!trans)
return -ENOMEM;
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2 * 1024 * 1024, alloc_target, 0);
btrfs_end_transaction(trans, root);
+ if (!meta_sinfo) {
+ meta_sinfo = __find_space_info(info,
+ alloc_target);
+ }
goto again;
}
spin_unlock(&meta_sinfo->lock);
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
data_sinfo = BTRFS_I(inode)->space_info;
+ if (!data_sinfo)
+ goto alloc;
+
again:
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
if (data_sinfo->total_bytes - data_sinfo->bytes_used -
data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
- data_sinfo->bytes_may_use < bytes) {
+ data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) {
struct btrfs_trans_handle *trans;
/*
data_sinfo->force_alloc = 1;
spin_unlock(&data_sinfo->lock);
-
+alloc:
alloc_target = btrfs_get_alloc_profile(root, 1);
trans = btrfs_start_transaction(root, 1);
if (!trans)
btrfs_end_transaction(trans, root);
if (ret)
return ret;
+
+ if (!data_sinfo) {
+ btrfs_set_inode_space_info(root, inode);
+ data_sinfo = BTRFS_I(inode)->space_info;
+ }
goto again;
}
spin_unlock(&data_sinfo->lock);
num_bytes = min(total, cache->key.offset - byte_in_group);
if (alloc) {
old_val += num_bytes;
+ btrfs_set_block_group_used(&cache->item, old_val);
+ cache->reserved -= num_bytes;
cache->space_info->bytes_used += num_bytes;
+ cache->space_info->bytes_reserved -= num_bytes;
if (cache->ro)
cache->space_info->bytes_readonly -= num_bytes;
- btrfs_set_block_group_used(&cache->item, old_val);
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
} else {
return bytenr;
}
-int btrfs_update_pinned_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int pin)
+/*
+ * this function must be called within transaction
+ */
+int btrfs_pin_extent(struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, int reserved)
{
- u64 len;
- struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_group_cache *cache;
- if (pin)
- set_extent_dirty(&fs_info->pinned_extents,
- bytenr, bytenr + num - 1, GFP_NOFS);
-
- while (num > 0) {
- cache = btrfs_lookup_block_group(fs_info, bytenr);
- BUG_ON(!cache);
- len = min(num, cache->key.offset -
- (bytenr - cache->key.objectid));
- if (pin) {
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- cache->pinned += len;
- cache->space_info->bytes_pinned += len;
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
- fs_info->total_pinned += len;
- } else {
- int unpin = 0;
+ cache = btrfs_lookup_block_group(fs_info, bytenr);
+ BUG_ON(!cache);
- /*
- * in order to not race with the block group caching, we
- * only want to unpin the extent if we are cached. If
- * we aren't cached, we want to start async caching this
- * block group so we can free the extent the next time
- * around.
- */
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- unpin = (cache->cached == BTRFS_CACHE_FINISHED);
- if (likely(unpin)) {
- cache->pinned -= len;
- cache->space_info->bytes_pinned -= len;
- fs_info->total_pinned -= len;
- }
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ cache->pinned += num_bytes;
+ cache->space_info->bytes_pinned += num_bytes;
+ if (reserved) {
+ cache->reserved -= num_bytes;
+ cache->space_info->bytes_reserved -= num_bytes;
+ }
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
- if (likely(unpin))
- clear_extent_dirty(&fs_info->pinned_extents,
- bytenr, bytenr + len -1,
- GFP_NOFS);
- else
- cache_block_group(cache);
+ btrfs_put_block_group(cache);
- if (unpin)
- btrfs_add_free_space(cache, bytenr, len);
- }
- btrfs_put_block_group(cache);
- bytenr += len;
- num -= len;
+ set_extent_dirty(fs_info->pinned_extents,
+ bytenr, bytenr + num_bytes - 1, GFP_NOFS);
+ return 0;
+}
+
+static int update_reserved_extents(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve)
+{
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ if (reserve) {
+ cache->reserved += num_bytes;
+ cache->space_info->bytes_reserved += num_bytes;
+ } else {
+ cache->reserved -= num_bytes;
+ cache->space_info->bytes_reserved -= num_bytes;
}
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
return 0;
}
-static int update_reserved_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int reserve)
+int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
{
- u64 len;
- struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_caching_control *next;
+ struct btrfs_caching_control *caching_ctl;
+ struct btrfs_block_group_cache *cache;
- while (num > 0) {
- cache = btrfs_lookup_block_group(fs_info, bytenr);
- BUG_ON(!cache);
- len = min(num, cache->key.offset -
- (bytenr - cache->key.objectid));
+ down_write(&fs_info->extent_commit_sem);
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- if (reserve) {
- cache->reserved += len;
- cache->space_info->bytes_reserved += len;
+ list_for_each_entry_safe(caching_ctl, next,
+ &fs_info->caching_block_groups, list) {
+ cache = caching_ctl->block_group;
+ if (block_group_cache_done(cache)) {
+ cache->last_byte_to_unpin = (u64)-1;
+ list_del_init(&caching_ctl->list);
+ put_caching_control(caching_ctl);
} else {
- cache->reserved -= len;
- cache->space_info->bytes_reserved -= len;
+ cache->last_byte_to_unpin = caching_ctl->progress;
}
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
- btrfs_put_block_group(cache);
- bytenr += len;
- num -= len;
}
+
+ if (fs_info->pinned_extents == &fs_info->freed_extents[0])
+ fs_info->pinned_extents = &fs_info->freed_extents[1];
+ else
+ fs_info->pinned_extents = &fs_info->freed_extents[0];
+
+ up_write(&fs_info->extent_commit_sem);
return 0;
}
-int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
{
- u64 last = 0;
- u64 start;
- u64 end;
- struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
- int ret;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_group_cache *cache = NULL;
+ u64 len;
- while (1) {
- ret = find_first_extent_bit(pinned_extents, last,
- &start, &end, EXTENT_DIRTY);
- if (ret)
- break;
+ while (start <= end) {
+ if (!cache ||
+ start >= cache->key.objectid + cache->key.offset) {
+ if (cache)
+ btrfs_put_block_group(cache);
+ cache = btrfs_lookup_block_group(fs_info, start);
+ BUG_ON(!cache);
+ }
+
+ len = cache->key.objectid + cache->key.offset - start;
+ len = min(len, end + 1 - start);
+
+ if (start < cache->last_byte_to_unpin) {
+ len = min(len, cache->last_byte_to_unpin - start);
+ btrfs_add_free_space(cache, start, len);
+ }
+
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ cache->pinned -= len;
+ cache->space_info->bytes_pinned -= len;
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
- set_extent_dirty(copy, start, end, GFP_NOFS);
- last = end + 1;
+ start += len;
}
+
+ if (cache)
+ btrfs_put_block_group(cache);
return 0;
}
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_io_tree *unpin)
+ struct btrfs_root *root)
{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_io_tree *unpin;
u64 start;
u64 end;
int ret;
+ if (fs_info->pinned_extents == &fs_info->freed_extents[0])
+ unpin = &fs_info->freed_extents[1];
+ else
+ unpin = &fs_info->freed_extents[0];
+
while (1) {
ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY);
ret = btrfs_discard_extent(root, start, end + 1 - start);
- /* unlocks the pinned mutex */
- btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
clear_extent_dirty(unpin, start, end, GFP_NOFS);
-
+ unpin_extent_range(root, start, end);
cond_resched();
}
static int pin_down_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- u64 bytenr, u64 num_bytes, int is_data,
+ u64 bytenr, u64 num_bytes,
+ int is_data, int reserved,
struct extent_buffer **must_clean)
{
int err = 0;
}
free_extent_buffer(buf);
pinit:
- btrfs_set_path_blocking(path);
+ if (path)
+ btrfs_set_path_blocking(path);
/* unlocks the pinned mutex */
- btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
+ btrfs_pin_extent(root, bytenr, num_bytes, reserved);
BUG_ON(err < 0);
return 0;
}
-
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
}
ret = pin_down_bytes(trans, root, path, bytenr,
- num_bytes, is_data, &must_clean);
+ num_bytes, is_data, 0, &must_clean);
if (ret > 0)
mark_free = 1;
BUG_ON(ret < 0);
if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
/* unlocks the pinned mutex */
- btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
- update_reserved_extents(root, bytenr, num_bytes, 0);
+ btrfs_pin_extent(root, bytenr, num_bytes, 1);
ret = 0;
} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
u64 num_bytes)
{
+ struct btrfs_caching_control *caching_ctl;
DEFINE_WAIT(wait);
- prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE);
-
- if (block_group_cache_done(cache)) {
- finish_wait(&cache->caching_q, &wait);
+ caching_ctl = get_caching_control(cache);
+ if (!caching_ctl)
return 0;
- }
- schedule();
- finish_wait(&cache->caching_q, &wait);
- wait_event(cache->caching_q, block_group_cache_done(cache) ||
+ wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
(cache->free_space >= num_bytes));
+
+ put_caching_control(caching_ctl);
+ return 0;
+}
+
+static noinline int
+wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
+{
+ struct btrfs_caching_control *caching_ctl;
+ DEFINE_WAIT(wait);
+
+ caching_ctl = get_caching_control(cache);
+ if (!caching_ctl)
+ return 0;
+
+ wait_event(caching_ctl->wait, block_group_cache_done(cache));
+
+ put_caching_control(caching_ctl);
return 0;
}
int last_ptr_loop = 0;
int loop = 0;
bool found_uncached_bg = false;
+ bool failed_cluster_refill = false;
WARN_ON(num_bytes < root->sectorsize);
btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
if (unlikely(block_group->ro))
goto loop;
- if (last_ptr) {
+ /*
+ * Ok we want to try and use the cluster allocator, so lets look
+ * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
+ * have tried the cluster allocator plenty of times at this
+ * point and not have found anything, so we are likely way too
+ * fragmented for the clustering stuff to find anything, so lets
+ * just skip it and let the allocator find whatever block it can
+ * find
+ */
+ if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
/*
* the refill lock keeps out other
* people trying to start a new cluster
spin_unlock(&last_ptr->refill_lock);
goto checks;
}
- } else if (!cached && loop > LOOP_CACHING_NOWAIT) {
+ } else if (!cached && loop > LOOP_CACHING_NOWAIT
+ && !failed_cluster_refill) {
spin_unlock(&last_ptr->refill_lock);
+ failed_cluster_refill = true;
wait_block_group_cache_progress(block_group,
num_bytes + empty_cluster + empty_size);
goto have_block_group;
* cluster. Free the cluster we've been trying
* to use, and go to the next block group
*/
- if (loop < LOOP_NO_EMPTY_SIZE) {
- btrfs_return_cluster_to_free_space(NULL,
- last_ptr);
- spin_unlock(&last_ptr->refill_lock);
- goto loop;
- }
+ btrfs_return_cluster_to_free_space(NULL, last_ptr);
spin_unlock(&last_ptr->refill_lock);
+ goto loop;
}
offset = btrfs_find_space_for_alloc(block_group, search_start,
search_start - offset);
BUG_ON(offset > search_start);
+ update_reserved_extents(block_group, num_bytes, 1);
+
/* we are all good, lets return */
break;
loop:
+ failed_cluster_refill = false;
btrfs_put_block_group(block_group);
}
up_read(&space_info->groups_sem);
up_read(&info->groups_sem);
}
-static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 num_bytes, u64 min_alloc_size,
- u64 empty_size, u64 hint_byte,
- u64 search_end, struct btrfs_key *ins,
- u64 data)
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins,
+ u64 data)
{
int ret;
u64 search_start = 0;
ret = btrfs_discard_extent(root, start, len);
btrfs_add_free_space(cache, start, len);
+ update_reserved_extents(cache, len, 0);
btrfs_put_block_group(cache);
- update_reserved_extents(root, start, len, 0);
-
- return ret;
-}
-
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 num_bytes, u64 min_alloc_size,
- u64 empty_size, u64 hint_byte,
- u64 search_end, struct btrfs_key *ins,
- u64 data)
-{
- int ret;
- ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
- empty_size, hint_byte, search_end, ins,
- data);
- if (!ret)
- update_reserved_extents(root, ins->objectid, ins->offset, 1);
return ret;
}
{
int ret;
struct btrfs_block_group_cache *block_group;
+ struct btrfs_caching_control *caching_ctl;
+ u64 start = ins->objectid;
+ u64 num_bytes = ins->offset;
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
cache_block_group(block_group);
- wait_event(block_group->caching_q,
- block_group_cache_done(block_group));
+ caching_ctl = get_caching_control(block_group);
- ret = btrfs_remove_free_space(block_group, ins->objectid,
- ins->offset);
- BUG_ON(ret);
+ if (!caching_ctl) {
+ BUG_ON(!block_group_cache_done(block_group));
+ ret = btrfs_remove_free_space(block_group, start, num_bytes);
+ BUG_ON(ret);
+ } else {
+ mutex_lock(&caching_ctl->mutex);
+
+ if (start >= caching_ctl->progress) {
+ ret = add_excluded_extent(root, start, num_bytes);
+ BUG_ON(ret);
+ } else if (start + num_bytes <= caching_ctl->progress) {
+ ret = btrfs_remove_free_space(block_group,
+ start, num_bytes);
+ BUG_ON(ret);
+ } else {
+ num_bytes = caching_ctl->progress - start;
+ ret = btrfs_remove_free_space(block_group,
+ start, num_bytes);
+ BUG_ON(ret);
+
+ start = caching_ctl->progress;
+ num_bytes = ins->objectid + ins->offset -
+ caching_ctl->progress;
+ ret = add_excluded_extent(root, start, num_bytes);
+ BUG_ON(ret);
+ }
+
+ mutex_unlock(&caching_ctl->mutex);
+ put_caching_control(caching_ctl);
+ }
+
+ update_reserved_extents(block_group, ins->offset, 1);
btrfs_put_block_group(block_group);
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
int ret;
u64 flags = 0;
- ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
- empty_size, hint_byte, search_end,
- ins, 0);
+ ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
+ empty_size, hint_byte, search_end,
+ ins, 0);
if (ret)
return ret;
} else
BUG_ON(parent > 0);
- update_reserved_extents(root, ins->objectid, ins->offset, 1);
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_delayed_extent_op *extent_op;
extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
return buf;
}
-#if 0
-int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *leaf)
+struct walk_control {
+ u64 refs[BTRFS_MAX_LEVEL];
+ u64 flags[BTRFS_MAX_LEVEL];
+ struct btrfs_key update_progress;
+ int stage;
+ int level;
+ int shared_level;
+ int update_ref;
+ int keep_locks;
+ int reada_slot;
+ int reada_count;
+};
+
+#define DROP_REFERENCE 1
+#define UPDATE_BACKREF 2
+
+static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct walk_control *wc,
+ struct btrfs_path *path)
{
- u64 disk_bytenr;
- u64 num_bytes;
- struct btrfs_key key;
- struct btrfs_file_extent_item *fi;
+ u64 bytenr;
+ u64 generation;
+ u64 refs;
+ u64 last = 0;
u32 nritems;
- int i;
+ u32 blocksize;
+ struct btrfs_key key;
+ struct extent_buffer *eb;
int ret;
+ int slot;
+ int nread = 0;
- BUG_ON(!btrfs_is_leaf(leaf));
- nritems = btrfs_header_nritems(leaf);
+ if (path->slots[wc->level] < wc->reada_slot) {
+ wc->reada_count = wc->reada_count * 2 / 3;
+ wc->reada_count = max(wc->reada_count, 2);
+ } else {
+ wc->reada_count = wc->reada_count * 3 / 2;
+ wc->reada_count = min_t(int, wc->reada_count,
+ BTRFS_NODEPTRS_PER_BLOCK(root));
+ }
- for (i = 0; i < nritems; i++) {
- cond_resched();
- btrfs_item_key_to_cpu(leaf, &key, i);
+ eb = path->nodes[wc->level];
+ nritems = btrfs_header_nritems(eb);
+ blocksize = btrfs_level_size(root, wc->level - 1);
- /* only extents have references, skip everything else */
- if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
- continue;
-
- fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
-
- /* inline extents live in the btree, they don't have refs */
- if (btrfs_file_extent_type(leaf, fi) ==
- BTRFS_FILE_EXTENT_INLINE)
- continue;
-
- disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
-
- /* holes don't have refs */
- if (disk_bytenr == 0)
- continue;
-
- num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
- ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
- leaf->start, 0, key.objectid, 0);
- BUG_ON(ret);
- }
- return 0;
-}
-
-static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_leaf_ref *ref)
-{
- int i;
- int ret;
- struct btrfs_extent_info *info;
- struct refsort *sorted;
-
- if (ref->nritems == 0)
- return 0;
-
- sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS);
- for (i = 0; i < ref->nritems; i++) {
- sorted[i].bytenr = ref->extents[i].bytenr;
- sorted[i].slot = i;
- }
- sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL);
+ for (slot = path->slots[wc->level]; slot < nritems; slot++) {
+ if (nread >= wc->reada_count)
+ break;
- /*
- * the items in the ref were sorted when the ref was inserted
- * into the ref cache, so this is already in order
- */
- for (i = 0; i < ref->nritems; i++) {
- info = ref->extents + sorted[i].slot;
- ret = btrfs_free_extent(trans, root, info->bytenr,
- info->num_bytes, ref->bytenr,
- ref->owner, ref->generation,
- info->objectid, 0);
-
- atomic_inc(&root->fs_info->throttle_gen);
- wake_up(&root->fs_info->transaction_throttle);
cond_resched();
+ bytenr = btrfs_node_blockptr(eb, slot);
+ generation = btrfs_node_ptr_generation(eb, slot);
- BUG_ON(ret);
- info++;
- }
-
- kfree(sorted);
- return 0;
-}
-
-
-static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 start,
- u64 len, u32 *refs)
-{
- int ret;
-
- ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
- BUG_ON(ret);
-
-#if 0 /* some debugging code in case we see problems here */
- /* if the refs count is one, it won't get increased again. But
- * if the ref count is > 1, someone may be decreasing it at
- * the same time we are.
- */
- if (*refs != 1) {
- struct extent_buffer *eb = NULL;
- eb = btrfs_find_create_tree_block(root, start, len);
- if (eb)
- btrfs_tree_lock(eb);
-
- mutex_lock(&root->fs_info->alloc_mutex);
- ret = lookup_extent_ref(NULL, root, start, len, refs);
- BUG_ON(ret);
- mutex_unlock(&root->fs_info->alloc_mutex);
-
- if (eb) {
- btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
- }
- if (*refs == 1) {
- printk(KERN_ERR "btrfs block %llu went down to one "
- "during drop_snap\n", (unsigned long long)start);
- }
-
- }
-#endif
-
- cond_resched();
- return ret;
-}
-
+ if (slot == path->slots[wc->level])
+ goto reada;
-/*
- * this is used while deleting old snapshots, and it drops the refs
- * on a whole subtree starting from a level 1 node.
- *
- * The idea is to sort all the leaf pointers, and then drop the
- * ref on all the leaves in order. Most of the time the leaves
- * will have ref cache entries, so no leaf IOs will be required to
- * find the extents they have references on.
- *
- * For each leaf, any references it has are also dropped in order
- *
- * This ends up dropping the references in something close to optimal
- * order for reading and modifying the extent allocation tree.
- */
-static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path)
-{
- u64 bytenr;
- u64 root_owner;
- u64 root_gen;
- struct extent_buffer *eb = path->nodes[1];
- struct extent_buffer *leaf;
- struct btrfs_leaf_ref *ref;
- struct refsort *sorted = NULL;
- int nritems = btrfs_header_nritems(eb);
- int ret;
- int i;
- int refi = 0;
- int slot = path->slots[1];
- u32 blocksize = btrfs_level_size(root, 0);
- u32 refs;
-
- if (nritems == 0)
- goto out;
-
- root_owner = btrfs_header_owner(eb);
- root_gen = btrfs_header_generation(eb);
- sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
-
- /*
- * step one, sort all the leaf pointers so we don't scribble
- * randomly into the extent allocation tree
- */
- for (i = slot; i < nritems; i++) {
- sorted[refi].bytenr = btrfs_node_blockptr(eb, i);
- sorted[refi].slot = i;
- refi++;
- }
-
- /*
- * nritems won't be zero, but if we're picking up drop_snapshot
- * after a crash, slot might be > 0, so double check things
- * just in case.
- */
- if (refi == 0)
- goto out;
-
- sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
-
- /*
- * the first loop frees everything the leaves point to
- */
- for (i = 0; i < refi; i++) {
- u64 ptr_gen;
-
- bytenr = sorted[i].bytenr;
-
- /*
- * check the reference count on this leaf. If it is > 1
- * we just decrement it below and don't update any
- * of the refs the leaf points to.
- */
- ret = drop_snap_lookup_refcount(trans, root, bytenr,
- blocksize, &refs);
- BUG_ON(ret);
- if (refs != 1)
+ if (wc->stage == UPDATE_BACKREF &&
+ generation <= root->root_key.offset)
continue;
- ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot);
-
- /*
- * the leaf only had one reference, which means the
- * only thing pointing to this leaf is the snapshot
- * we're deleting. It isn't possible for the reference
- * count to increase again later
- *
- * The reference cache is checked for the leaf,
- * and if found we'll be able to drop any refs held by
- * the leaf without needing to read it in.
- */
- ref = btrfs_lookup_leaf_ref(root, bytenr);
- if (ref && ref->generation != ptr_gen) {
- btrfs_free_leaf_ref(root, ref);
- ref = NULL;
- }
- if (ref) {
- ret = cache_drop_leaf_ref(trans, root, ref);
- BUG_ON(ret);
- btrfs_remove_leaf_ref(root, ref);
- btrfs_free_leaf_ref(root, ref);
- } else {
- /*
- * the leaf wasn't in the reference cache, so
- * we have to read it.
- */
- leaf = read_tree_block(root, bytenr, blocksize,
- ptr_gen);
- ret = btrfs_drop_leaf_ref(trans, root, leaf);
+ if (wc->stage == DROP_REFERENCE) {
+ ret = btrfs_lookup_extent_info(trans, root,
+ bytenr, blocksize,
+ &refs, NULL);
BUG_ON(ret);
- free_extent_buffer(leaf);
- }
- atomic_inc(&root->fs_info->throttle_gen);
- wake_up(&root->fs_info->transaction_throttle);
- cond_resched();
- }
-
- /*
- * run through the loop again to free the refs on the leaves.
- * This is faster than doing it in the loop above because
- * the leaves are likely to be clustered together. We end up
- * working in nice chunks on the extent allocation tree.
- */
- for (i = 0; i < refi; i++) {
- bytenr = sorted[i].bytenr;
- ret = btrfs_free_extent(trans, root, bytenr,
- blocksize, eb->start,
- root_owner, root_gen, 0, 1);
- BUG_ON(ret);
+ BUG_ON(refs == 0);
+ if (refs == 1)
+ goto reada;
- atomic_inc(&root->fs_info->throttle_gen);
- wake_up(&root->fs_info->transaction_throttle);
- cond_resched();
- }
-out:
- kfree(sorted);
-
- /*
- * update the path to show we've processed the entire level 1
- * node. This will get saved into the root's drop_snapshot_progress
- * field so these drops are not repeated again if this transaction
- * commits.
- */
- path->slots[1] = nritems;
- return 0;
-}
-
-/*
- * helper function for drop_snapshot, this walks down the tree dropping ref
- * counts as it goes.
- */
-static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, int *level)
-{
- u64 root_owner;
- u64 root_gen;
- u64 bytenr;
- u64 ptr_gen;
- struct extent_buffer *next;
- struct extent_buffer *cur;
- struct extent_buffer *parent;
- u32 blocksize;
- int ret;
- u32 refs;
-
- WARN_ON(*level < 0);
- WARN_ON(*level >= BTRFS_MAX_LEVEL);
- ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
- path->nodes[*level]->len, &refs);
- BUG_ON(ret);
- if (refs > 1)
- goto out;
-
- /*
- * walk down to the last node level and free all the leaves
- */
- while (*level >= 0) {
- WARN_ON(*level < 0);
- WARN_ON(*level >= BTRFS_MAX_LEVEL);
- cur = path->nodes[*level];
-
- if (btrfs_header_level(cur) != *level)
- WARN_ON(1);
-
- if (path->slots[*level] >=
- btrfs_header_nritems(cur))
- break;
-
- /* the new code goes down to level 1 and does all the
- * leaves pointed to that node in bulk. So, this check
- * for level 0 will always be false.
- *
- * But, the disk format allows the drop_snapshot_progress
- * field in the root to leave things in a state where
- * a leaf will need cleaning up here. If someone crashes
- * with the old code and then boots with the new code,
- * we might find a leaf here.
- */
- if (*level == 0) {
- ret = btrfs_drop_leaf_ref(trans, root, cur);
- BUG_ON(ret);
- break;
+ if (!wc->update_ref ||
+ generation <= root->root_key.offset)
+ continue;
+ btrfs_node_key_to_cpu(eb, &key, slot);
+ ret = btrfs_comp_cpu_keys(&key,
+ &wc->update_progress);
+ if (ret < 0)
+ continue;
}
-
- /*
- * once we get to level one, process the whole node
- * at once, including everything below it.
- */
- if (*level == 1) {
- ret = drop_level_one_refs(trans, root, path);
- BUG_ON(ret);
+reada:
+ ret = readahead_tree_block(root, bytenr, blocksize,
+ generation);
+ if (ret)
break;
- }
-
- bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
- ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
- blocksize = btrfs_level_size(root, *level - 1);
-
- ret = drop_snap_lookup_refcount(trans, root, bytenr,
- blocksize, &refs);
- BUG_ON(ret);
-
- /*
- * if there is more than one reference, we don't need
- * to read that node to drop any references it has. We
- * just drop the ref we hold on that node and move on to the
- * next slot in this level.
- */
- if (refs != 1) {
- parent = path->nodes[*level];
- root_owner = btrfs_header_owner(parent);
- root_gen = btrfs_header_generation(parent);
- path->slots[*level]++;
-
- ret = btrfs_free_extent(trans, root, bytenr,
- blocksize, parent->start,
- root_owner, root_gen,
- *level - 1, 1);
- BUG_ON(ret);
-
- atomic_inc(&root->fs_info->throttle_gen);
- wake_up(&root->fs_info->transaction_throttle);
- cond_resched();
-
- continue;
- }
-
- /*
- * we need to keep freeing things in the next level down.
- * read the block and loop around to process it
- */
- next = read_tree_block(root, bytenr, blocksize, ptr_gen);
- WARN_ON(*level <= 0);
- if (path->nodes[*level-1])
- free_extent_buffer(path->nodes[*level-1]);
- path->nodes[*level-1] = next;
- *level = btrfs_header_level(next);
- path->slots[*level] = 0;
- cond_resched();
+ last = bytenr + blocksize;
+ nread++;
}
-out:
- WARN_ON(*level < 0);
- WARN_ON(*level >= BTRFS_MAX_LEVEL);
-
- if (path->nodes[*level] == root->node) {
- parent = path->nodes[*level];
- bytenr = path->nodes[*level]->start;
- } else {
- parent = path->nodes[*level + 1];
- bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
- }
-
- blocksize = btrfs_level_size(root, *level);
- root_owner = btrfs_header_owner(parent);
- root_gen = btrfs_header_generation(parent);
-
- /*
- * cleanup and free the reference on the last node
- * we processed
- */
- ret = btrfs_free_extent(trans, root, bytenr, blocksize,
- parent->start, root_owner, root_gen,
- *level, 1);
- free_extent_buffer(path->nodes[*level]);
- path->nodes[*level] = NULL;
-
- *level += 1;
- BUG_ON(ret);
-
- cond_resched();
- return 0;
+ wc->reada_slot = slot;
}
-#endif
-
-struct walk_control {
- u64 refs[BTRFS_MAX_LEVEL];
- u64 flags[BTRFS_MAX_LEVEL];
- struct btrfs_key update_progress;
- int stage;
- int level;
- int shared_level;
- int update_ref;
- int keep_locks;
-};
-
-#define DROP_REFERENCE 1
-#define UPDATE_BACKREF 2
/*
* hepler to process tree block while walking down the tree.
*
- * when wc->stage == DROP_REFERENCE, this function checks
- * reference count of the block. if the block is shared and
- * we need update back refs for the subtree rooted at the
- * block, this function changes wc->stage to UPDATE_BACKREF
- *
* when wc->stage == UPDATE_BACKREF, this function updates
* back refs for pointers in the block.
*
{
int level = wc->level;
struct extent_buffer *eb = path->nodes[level];
- struct btrfs_key key;
u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
int ret;
BUG_ON(wc->refs[level] == 0);
}
- if (wc->stage == DROP_REFERENCE &&
- wc->update_ref && wc->refs[level] > 1) {
- BUG_ON(eb == root->node);
- BUG_ON(path->slots[level] > 0);
- if (level == 0)
- btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
- else
- btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
- if (btrfs_header_owner(eb) == root->root_key.objectid &&
- btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
- wc->stage = UPDATE_BACKREF;
- wc->shared_level = level;
- }
- }
-
if (wc->stage == DROP_REFERENCE) {
if (wc->refs[level] > 1)
return 1;
return 0;
}
+/*
+ * hepler to process tree block pointer.
+ *
+ * when wc->stage == DROP_REFERENCE, this function checks
+ * reference count of the block pointed to. if the block
+ * is shared and we need update back refs for the subtree
+ * rooted at the block, this function changes wc->stage to
+ * UPDATE_BACKREF. if the block is shared and there is no
+ * need to update back, this function drops the reference
+ * to the block.
+ *
+ * NOTE: return value 1 means we should stop walking down.
+ */
+static noinline int do_walk_down(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct walk_control *wc)
+{
+ u64 bytenr;
+ u64 generation;
+ u64 parent;
+ u32 blocksize;
+ struct btrfs_key key;
+ struct extent_buffer *next;
+ int level = wc->level;
+ int reada = 0;
+ int ret = 0;
+
+ generation = btrfs_node_ptr_generation(path->nodes[level],
+ path->slots[level]);
+ /*
+ * if the lower level block was created before the snapshot
+ * was created, we know there is no need to update back refs
+ * for the subtree
+ */
+ if (wc->stage == UPDATE_BACKREF &&
+ generation <= root->root_key.offset)
+ return 1;
+
+ bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
+ blocksize = btrfs_level_size(root, level - 1);
+
+ next = btrfs_find_tree_block(root, bytenr, blocksize);
+ if (!next) {
+ next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+ reada = 1;
+ }
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+
+ if (wc->stage == DROP_REFERENCE) {
+ ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+ &wc->refs[level - 1],
+ &wc->flags[level - 1]);
+ BUG_ON(ret);
+ BUG_ON(wc->refs[level - 1] == 0);
+
+ if (wc->refs[level - 1] > 1) {
+ if (!wc->update_ref ||
+ generation <= root->root_key.offset)
+ goto skip;
+
+ btrfs_node_key_to_cpu(path->nodes[level], &key,
+ path->slots[level]);
+ ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
+ if (ret < 0)
+ goto skip;
+
+ wc->stage = UPDATE_BACKREF;
+ wc->shared_level = level - 1;
+ }
+ }
+
+ if (!btrfs_buffer_uptodate(next, generation)) {
+ btrfs_tree_unlock(next);
+ free_extent_buffer(next);
+ next = NULL;
+ }
+
+ if (!next) {
+ if (reada && level == 1)
+ reada_walk_down(trans, root, wc, path);
+ next = read_tree_block(root, bytenr, blocksize, generation);
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ }
+
+ level--;
+ BUG_ON(level != btrfs_header_level(next));
+ path->nodes[level] = next;
+ path->slots[level] = 0;
+ path->locks[level] = 1;
+ wc->level = level;
+ if (wc->level == 1)
+ wc->reada_slot = 0;
+ return 0;
+skip:
+ wc->refs[level - 1] = 0;
+ wc->flags[level - 1] = 0;
+
+ if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+ parent = path->nodes[level]->start;
+ } else {
+ BUG_ON(root->root_key.objectid !=
+ btrfs_header_owner(path->nodes[level]));
+ parent = 0;
+ }
+
+ ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
+ root->root_key.objectid, level - 1, 0);
+ BUG_ON(ret);
+
+ btrfs_tree_unlock(next);
+ free_extent_buffer(next);
+ return 1;
+}
+
/*
* hepler to process tree block while walking up the tree.
*
if (level < wc->shared_level)
goto out;
- BUG_ON(wc->refs[level] <= 1);
ret = find_next_key(path, level + 1, &wc->update_progress);
if (ret > 0)
wc->update_ref = 0;
path->locks[level] = 0;
return 1;
}
- } else {
- BUG_ON(level != 0);
}
}
struct btrfs_path *path,
struct walk_control *wc)
{
- struct extent_buffer *next;
- struct extent_buffer *cur;
- u64 bytenr;
- u64 ptr_gen;
- u32 blocksize;
int level = wc->level;
int ret;
while (level >= 0) {
- cur = path->nodes[level];
- BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
+ if (path->slots[level] >=
+ btrfs_header_nritems(path->nodes[level]))
+ break;
ret = walk_down_proc(trans, root, path, wc);
if (ret > 0)
if (level == 0)
break;
- bytenr = btrfs_node_blockptr(cur, path->slots[level]);
- blocksize = btrfs_level_size(root, level - 1);
- ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
-
- next = read_tree_block(root, bytenr, blocksize, ptr_gen);
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
-
- level--;
- BUG_ON(level != btrfs_header_level(next));
- path->nodes[level] = next;
- path->slots[level] = 0;
- path->locks[level] = 1;
- wc->level = level;
+ ret = do_walk_down(trans, root, path, wc);
+ if (ret > 0) {
+ path->slots[level]++;
+ continue;
+ }
+ level = wc->level;
}
return 0;
}
err = ret;
goto out;
}
- btrfs_node_key_to_cpu(path->nodes[level], &key,
- path->slots[level]);
- WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
+ WARN_ON(ret > 0);
/*
* unlock our path, this is safe because only this
wc->stage = DROP_REFERENCE;
wc->update_ref = update_ref;
wc->keep_locks = 0;
+ wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
ret = walk_down_tree(trans, root, path, wc);
ret = btrfs_del_root(trans, tree_root, &root->root_key);
BUG_ON(ret);
- free_extent_buffer(root->node);
- free_extent_buffer(root->commit_root);
- kfree(root);
+ if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+ ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
+ NULL, NULL);
+ BUG_ON(ret < 0);
+ if (ret > 0) {
+ ret = btrfs_del_orphan_item(trans, tree_root,
+ root->root_key.objectid);
+ BUG_ON(ret);
+ }
+ }
+
+ if (root->in_radix) {
+ btrfs_free_fs_root(tree_root->fs_info, root);
+ } else {
+ free_extent_buffer(root->node);
+ free_extent_buffer(root->commit_root);
+ kfree(root);
+ }
out:
btrfs_end_transaction(trans, tree_root);
kfree(wc);
wc->stage = DROP_REFERENCE;
wc->update_ref = 0;
wc->keep_locks = 1;
+ wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
wret = walk_down_tree(trans, root, path, wc);
lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
while (1) {
int ret;
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
return 0;
}
-#if 0
-static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 objectid, u64 size)
-{
- struct btrfs_path *path;
- struct btrfs_inode_item *item;
- struct extent_buffer *leaf;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->leave_spinning = 1;
- ret = btrfs_insert_empty_inode(trans, root, path, objectid);
- if (ret)
- goto out;
-
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
- memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
- btrfs_set_inode_generation(leaf, item, 1);
- btrfs_set_inode_size(leaf, item, size);
- btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
- btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
- btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(root, path);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *group)
+/*
+ * checks to see if its even possible to relocate this block group.
+ *
+ * @return - -1 if it's not a good idea to relocate this block group, 0 if its
+ * ok to go ahead and try.
+ */
+int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
{
- struct inode *inode = NULL;
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root;
- struct btrfs_key root_key;
- u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
- int err = 0;
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_space_info *space_info;
+ struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+ struct btrfs_device *device;
+ int full = 0;
+ int ret = 0;
- root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
- root_key.type = BTRFS_ROOT_ITEM_KEY;
- root_key.offset = (u64)-1;
- root = btrfs_read_fs_root_no_name(fs_info, &root_key);
- if (IS_ERR(root))
- return ERR_CAST(root);
+ block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
- trans = btrfs_start_transaction(root, 1);
- BUG_ON(!trans);
+ /* odd, couldn't find the block group, leave it alone */
+ if (!block_group)
+ return -1;
- err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
- if (err)
+ /* no bytes used, we're good */
+ if (!btrfs_block_group_used(&block_group->item))
goto out;
- err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
- BUG_ON(err);
-
- err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
- group->key.offset, 0, group->key.offset,
- 0, 0, 0);
- BUG_ON(err);
-
- inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
- if (inode->i_state & I_NEW) {
- BTRFS_I(inode)->root = root;
- BTRFS_I(inode)->location.objectid = objectid;
- BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
- BTRFS_I(inode)->location.offset = 0;
- btrfs_read_locked_inode(inode);
- unlock_new_inode(inode);
- BUG_ON(is_bad_inode(inode));
- } else {
- BUG_ON(1);
- }
- BTRFS_I(inode)->index_cnt = group->key.objectid;
-
- err = btrfs_orphan_add(trans, inode);
-out:
- btrfs_end_transaction(trans, root);
- if (err) {
- if (inode)
- iput(inode);
- inode = ERR_PTR(err);
- }
- return inode;
-}
-
-int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
-{
-
- struct btrfs_ordered_sum *sums;
- struct btrfs_sector_sum *sector_sum;
- struct btrfs_ordered_extent *ordered;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct list_head list;
- size_t offset;
- int ret;
- u64 disk_bytenr;
-
- INIT_LIST_HEAD(&list);
-
- ordered = btrfs_lookup_ordered_extent(inode, file_pos);
- BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
-
- disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
- ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
- disk_bytenr + len - 1, &list);
-
- while (!list_empty(&list)) {
- sums = list_entry(list.next, struct btrfs_ordered_sum, list);
- list_del_init(&sums->list);
-
- sector_sum = sums->sums;
- sums->bytenr = ordered->start;
+ space_info = block_group->space_info;
+ spin_lock(&space_info->lock);
- offset = 0;
- while (offset < sums->len) {
- sector_sum->bytenr += ordered->start - disk_bytenr;
- sector_sum++;
- offset += root->sectorsize;
- }
+ full = space_info->full;
- btrfs_add_ordered_sum(inode, ordered, sums);
+ /*
+ * if this is the last block group we have in this space, we can't
+ * relocate it unless we're able to allocate a new chunk below.
+ *
+ * Otherwise, we need to make sure we have room in the space to handle
+ * all of the extents from this block group. If we can, we're good
+ */
+ if ((space_info->total_bytes != block_group->key.offset) &&
+ (space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ btrfs_block_group_used(&block_group->item) <
+ space_info->total_bytes)) {
+ spin_unlock(&space_info->lock);
+ goto out;
}
- btrfs_put_ordered_extent(ordered);
- return 0;
-}
-
-int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_path *path;
- struct btrfs_fs_info *info = root->fs_info;
- struct extent_buffer *leaf;
- struct inode *reloc_inode;
- struct btrfs_block_group_cache *block_group;
- struct btrfs_key key;
- u64 skipped;
- u64 cur_byte;
- u64 total_found;
- u32 nritems;
- int ret;
- int progress;
- int pass = 0;
-
- root = root->fs_info->extent_root;
-
- block_group = btrfs_lookup_block_group(info, group_start);
- BUG_ON(!block_group);
-
- printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n",
- (unsigned long long)block_group->key.objectid,
- (unsigned long long)block_group->flags);
-
- path = btrfs_alloc_path();
- BUG_ON(!path);
-
- reloc_inode = create_reloc_inode(info, block_group);
- BUG_ON(IS_ERR(reloc_inode));
-
- __alloc_chunk_for_shrink(root, block_group, 1);
- set_block_group_readonly(block_group);
-
- btrfs_start_delalloc_inodes(info->tree_root);
- btrfs_wait_ordered_extents(info->tree_root, 0);
-again:
- skipped = 0;
- total_found = 0;
- progress = 0;
- key.objectid = block_group->key.objectid;
- key.offset = 0;
- key.type = 0;
- cur_byte = key.objectid;
-
- trans = btrfs_start_transaction(info->tree_root, 1);
- btrfs_commit_transaction(trans, info->tree_root);
+ spin_unlock(&space_info->lock);
- mutex_lock(&root->fs_info->cleaner_mutex);
- btrfs_clean_old_snapshots(info->tree_root);
- btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
- mutex_unlock(&root->fs_info->cleaner_mutex);
+ /*
+ * ok we don't have enough space, but maybe we have free space on our
+ * devices to allocate new chunks for relocation, so loop through our
+ * alloc devices and guess if we have enough space. However, if we
+ * were marked as full, then we know there aren't enough chunks, and we
+ * can just return.
+ */
+ ret = -1;
+ if (full)
+ goto out;
- trans = btrfs_start_transaction(info->tree_root, 1);
- btrfs_commit_transaction(trans, info->tree_root);
+ mutex_lock(&root->fs_info->chunk_mutex);
+ list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
+ u64 min_free = btrfs_block_group_used(&block_group->item);
+ u64 dev_offset, max_avail;
- while (1) {
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
-next:
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- goto out;
- if (ret == 1) {
- ret = 0;
+ /*
+ * check to make sure we can actually find a chunk with enough
+ * space to fit our block group in.
+ */
+ if (device->total_bytes > device->bytes_used + min_free) {
+ ret = find_free_dev_extent(NULL, device, min_free,
+ &dev_offset, &max_avail);
+ if (!ret)
break;
- }
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- }
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-
- if (key.objectid >= block_group->key.objectid +
- block_group->key.offset)
- break;
-
- if (progress && need_resched()) {
- btrfs_release_path(root, path);
- cond_resched();
- progress = 0;
- continue;
+ ret = -1;
}
- progress = 1;
-
- if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
- key.objectid + key.offset <= cur_byte) {
- path->slots[0]++;
- goto next;
- }
-
- total_found++;
- cur_byte = key.objectid + key.offset;
- btrfs_release_path(root, path);
-
- __alloc_chunk_for_shrink(root, block_group, 0);
- ret = relocate_one_extent(root, path, &key, block_group,
- reloc_inode, pass);
- BUG_ON(ret < 0);
- if (ret > 0)
- skipped++;
-
- key.objectid = cur_byte;
- key.type = 0;
- key.offset = 0;
}
-
- btrfs_release_path(root, path);
-
- if (pass == 0) {
- btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
- invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
- }
-
- if (total_found > 0) {
- printk(KERN_INFO "btrfs found %llu extents in pass %d\n",
- (unsigned long long)total_found, pass);
- pass++;
- if (total_found == skipped && pass > 2) {
- iput(reloc_inode);
- reloc_inode = create_reloc_inode(info, block_group);
- pass = 0;
- }
- goto again;
- }
-
- /* delete reloc_inode */
- iput(reloc_inode);
-
- /* unpin extents in this range */
- trans = btrfs_start_transaction(info->tree_root, 1);
- btrfs_commit_transaction(trans, info->tree_root);
-
- spin_lock(&block_group->lock);
- WARN_ON(block_group->pinned > 0);
- WARN_ON(block_group->reserved > 0);
- WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
- spin_unlock(&block_group->lock);
- btrfs_put_block_group(block_group);
- ret = 0;
+ mutex_unlock(&root->fs_info->chunk_mutex);
out:
- btrfs_free_path(path);
+ btrfs_put_block_group(block_group);
return ret;
}
-#endif
static int find_first_block_group(struct btrfs_root *root,
struct btrfs_path *path, struct btrfs_key *key)
{
struct btrfs_block_group_cache *block_group;
struct btrfs_space_info *space_info;
+ struct btrfs_caching_control *caching_ctl;
struct rb_node *n;
+ down_write(&info->extent_commit_sem);
+ while (!list_empty(&info->caching_block_groups)) {
+ caching_ctl = list_entry(info->caching_block_groups.next,
+ struct btrfs_caching_control, list);
+ list_del(&caching_ctl->list);
+ put_caching_control(caching_ctl);
+ }
+ up_write(&info->extent_commit_sem);
+
spin_lock(&info->block_group_cache_lock);
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
block_group = rb_entry(n, struct btrfs_block_group_cache,
up_write(&block_group->space_info->groups_sem);
if (block_group->cached == BTRFS_CACHE_STARTED)
- wait_event(block_group->caching_q,
- block_group_cache_done(block_group));
+ wait_block_group_cache_done(block_group);
btrfs_remove_free_space_cache(block_group);
spin_lock_init(&cache->lock);
spin_lock_init(&cache->tree_lock);
cache->fs_info = info;
- init_waitqueue_head(&cache->caching_q);
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
cache->flags = btrfs_block_group_flags(&cache->item);
cache->sectorsize = root->sectorsize;
- remove_sb_from_cache(root, cache);
-
/*
* check for two cases, either we are full, and therefore
* don't need to bother with the caching work since we won't
* time, particularly in the full case.
*/
if (found_key.offset == btrfs_block_group_used(&cache->item)) {
+ exclude_super_stripes(root, cache);
+ cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
+ free_excluded_extents(root, cache);
} else if (btrfs_block_group_used(&cache->item) == 0) {
+ exclude_super_stripes(root, cache);
+ cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
add_new_free_space(cache, root->fs_info,
found_key.objectid,
found_key.objectid +
found_key.offset);
+ free_excluded_extents(root, cache);
}
ret = update_space_info(info, cache->flags, found_key.offset,
&space_info);
BUG_ON(ret);
cache->space_info = space_info;
+ spin_lock(&cache->space_info->lock);
+ cache->space_info->bytes_super += cache->bytes_super;
+ spin_unlock(&cache->space_info->lock);
+
down_write(&space_info->groups_sem);
list_add_tail(&cache->list, &space_info->block_groups);
up_write(&space_info->groups_sem);
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
spin_lock_init(&cache->tree_lock);
- init_waitqueue_head(&cache->caching_q);
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
cache->flags = type;
btrfs_set_block_group_flags(&cache->item, type);
+ cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
- remove_sb_from_cache(root, cache);
+ exclude_super_stripes(root, cache);
add_new_free_space(cache, root->fs_info, chunk_offset,
chunk_offset + size);
+ free_excluded_extents(root, cache);
+
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
BUG_ON(ret);
+
+ spin_lock(&cache->space_info->lock);
+ cache->space_info->bytes_super += cache->bytes_super;
+ spin_unlock(&cache->space_info->lock);
+
down_write(&cache->space_info->groups_sem);
list_add_tail(&cache->list, &cache->space_info->block_groups);
up_write(&cache->space_info->groups_sem);
up_write(&block_group->space_info->groups_sem);
if (block_group->cached == BTRFS_CACHE_STARTED)
- wait_event(block_group->caching_q,
- block_group_cache_done(block_group));
+ wait_block_group_cache_done(block_group);
btrfs_remove_free_space_cache(block_group);
}
if (bits & EXTENT_DIRTY)
tree->dirty_bytes += end - start + 1;
- set_state_cb(tree, state, bits);
- state->state |= bits;
state->start = start;
state->end = end;
+ set_state_cb(tree, state, bits);
+ state->state |= bits;
node = tree_insert(&tree->state, end, &state->rb_node);
if (node) {
struct extent_state *found;
* bits were already set, or zero if none of the bits were already set.
*/
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int wake, int delete, gfp_t mask)
+ int bits, int wake, int delete,
+ struct extent_state **cached_state,
+ gfp_t mask)
{
struct extent_state *state;
+ struct extent_state *cached;
struct extent_state *prealloc = NULL;
+ struct rb_node *next_node;
struct rb_node *node;
u64 last_end;
int err;
}
spin_lock(&tree->lock);
+ if (cached_state) {
+ cached = *cached_state;
+ *cached_state = NULL;
+ cached_state = NULL;
+ if (cached && cached->tree && cached->start == start) {
+ atomic_dec(&cached->refs);
+ state = cached;
+ goto hit_next;
+ }
+ free_extent_state(cached);
+ }
/*
* this search will find the extents that end after
* our range starts
if (!node)
goto out;
state = rb_entry(node, struct extent_state, rb_node);
+hit_next:
if (state->start > end)
goto out;
WARN_ON(state->end < start);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
- } else {
- start = state->start;
}
goto search_again;
}
if (wake)
wake_up(&state->wq);
+
set |= clear_state_bit(tree, prealloc, bits,
wake, delete);
prealloc = NULL;
goto out;
}
+ if (state->end < end && prealloc && !need_resched())
+ next_node = rb_next(&state->rb_node);
+ else
+ next_node = NULL;
+
set |= clear_state_bit(tree, state, bits, wake, delete);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
+ if (start <= end && next_node) {
+ state = rb_entry(next_node, struct extent_state,
+ rb_node);
+ if (state->start == start)
+ goto hit_next;
+ }
goto search_again;
out:
state->state |= bits;
}
+static void cache_state(struct extent_state *state,
+ struct extent_state **cached_ptr)
+{
+ if (cached_ptr && !(*cached_ptr)) {
+ if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
+ *cached_ptr = state;
+ atomic_inc(&state->refs);
+ }
+ }
+}
+
/*
- * set some bits on a range in the tree. This may require allocations
- * or sleeping, so the gfp mask is used to indicate what is allowed.
+ * set some bits on a range in the tree. This may require allocations or
+ * sleeping, so the gfp mask is used to indicate what is allowed.
*
- * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
- * range already has the desired bits set. The start of the existing
- * range is returned in failed_start in this case.
+ * If any of the exclusive bits are set, this will fail with -EEXIST if some
+ * part of the range already has the desired bits set. The start of the
+ * existing range is returned in failed_start in this case.
*
- * [start, end] is inclusive
- * This takes the tree lock.
+ * [start, end] is inclusive This takes the tree lock.
*/
+
static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int exclusive, u64 *failed_start,
+ int bits, int exclusive_bits, u64 *failed_start,
+ struct extent_state **cached_state,
gfp_t mask)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
struct rb_node *node;
int err = 0;
- int set;
u64 last_start;
u64 last_end;
+
again:
if (!prealloc && (mask & __GFP_WAIT)) {
prealloc = alloc_extent_state(mask);
}
spin_lock(&tree->lock);
+ if (cached_state && *cached_state) {
+ state = *cached_state;
+ if (state->start == start && state->tree) {
+ node = &state->rb_node;
+ goto hit_next;
+ }
+ }
/*
* this search will find all the extents that end after
* our range starts.
BUG_ON(err == -EEXIST);
goto out;
}
-
state = rb_entry(node, struct extent_state, rb_node);
+hit_next:
last_start = state->start;
last_end = state->end;
* Just lock what we found and keep going
*/
if (state->start == start && state->end <= end) {
- set = state->state & bits;
- if (set && exclusive) {
+ struct rb_node *next_node;
+ if (state->state & exclusive_bits) {
*failed_start = state->start;
err = -EEXIST;
goto out;
}
+
set_state_bits(tree, state, bits);
+ cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
+
start = last_end + 1;
+ if (start < end && prealloc && !need_resched()) {
+ next_node = rb_next(node);
+ if (next_node) {
+ state = rb_entry(next_node, struct extent_state,
+ rb_node);
+ if (state->start == start)
+ goto hit_next;
+ }
+ }
goto search_again;
}
* desired bit on it.
*/
if (state->start < start) {
- set = state->state & bits;
- if (exclusive && set) {
+ if (state->state & exclusive_bits) {
*failed_start = start;
err = -EEXIST;
goto out;
goto out;
if (state->end <= end) {
set_state_bits(tree, state, bits);
+ cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
- } else {
- start = state->start;
}
goto search_again;
}
this_end = last_start - 1;
err = insert_state(tree, prealloc, start, this_end,
bits);
+ cache_state(prealloc, cached_state);
prealloc = NULL;
BUG_ON(err == -EEXIST);
if (err)
* on the first half
*/
if (state->start <= end && state->end > end) {
- set = state->state & bits;
- if (exclusive && set) {
+ if (state->state & exclusive_bits) {
*failed_start = start;
err = -EEXIST;
goto out;
BUG_ON(err == -EEXIST);
set_state_bits(tree, prealloc, bits);
+ cache_state(prealloc, cached_state);
merge_state(tree, prealloc);
prealloc = NULL;
goto out;
gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
- mask);
-}
-
-int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
+ NULL, mask);
}
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{
return set_extent_bit(tree, start, end, bits, 0, NULL,
- mask);
+ NULL, mask);
}
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{
- return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
+ return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
}
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_DIRTY,
- 0, NULL, mask);
+ EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
+ 0, NULL, NULL, mask);
}
int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return clear_extent_bit(tree, start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
-}
-
-int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
+ EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
+ NULL, mask);
}
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
- mask);
+ NULL, mask);
}
static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
- return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
+ return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
+ NULL, mask);
}
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
- mask);
+ NULL, mask);
}
static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, gfp_t mask)
{
- return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
-}
-
-static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
- 0, NULL, mask);
-}
-
-static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
- u64 end, gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
+ return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
+ NULL, mask);
}
int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
* either insert or lock state struct between start and end use mask to tell
* us if waiting is desired.
*/
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, struct extent_state **cached_state, gfp_t mask)
{
int err;
u64 failed_start;
while (1) {
- err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
- &failed_start, mask);
+ err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
+ EXTENT_LOCKED, &failed_start,
+ cached_state, mask);
if (err == -EEXIST && (mask & __GFP_WAIT)) {
wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
start = failed_start;
return err;
}
+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+{
+ return lock_extent_bits(tree, start, end, 0, NULL, mask);
+}
+
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
int err;
u64 failed_start;
- err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
- &failed_start, mask);
+ err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
+ &failed_start, NULL, mask);
if (err == -EEXIST) {
if (failed_start > start)
clear_extent_bit(tree, start, failed_start - 1,
- EXTENT_LOCKED, 1, 0, mask);
+ EXTENT_LOCKED, 1, 0, NULL, mask);
return 0;
}
return 1;
}
+int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
+ struct extent_state **cached, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
+ mask);
+}
+
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
+ mask);
}
/*
page_cache_release(page);
index++;
}
- set_extent_dirty(tree, start, end, GFP_NOFS);
return 0;
}
page_cache_release(page);
index++;
}
- set_extent_writeback(tree, start, end, GFP_NOFS);
return 0;
}
u64 delalloc_start;
u64 delalloc_end;
u64 found;
+ struct extent_state *cached_state = NULL;
int ret;
int loops = 0;
/* some of the pages are gone, lets avoid looping by
* shortening the size of the delalloc range we're searching
*/
+ free_extent_state(cached_state);
if (!loops) {
unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
max_bytes = PAGE_CACHE_SIZE - offset;
BUG_ON(ret);
/* step three, lock the state bits for the whole range */
- lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
+ lock_extent_bits(tree, delalloc_start, delalloc_end,
+ 0, &cached_state, GFP_NOFS);
/* then test to make sure it is all still delalloc */
ret = test_range_bit(tree, delalloc_start, delalloc_end,
- EXTENT_DELALLOC, 1);
+ EXTENT_DELALLOC, 1, cached_state);
if (!ret) {
- unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
+ unlock_extent_cached(tree, delalloc_start, delalloc_end,
+ &cached_state, GFP_NOFS);
__unlock_for_delalloc(inode, locked_page,
delalloc_start, delalloc_end);
cond_resched();
goto again;
}
+ free_extent_state(cached_state);
*start = delalloc_start;
*end = delalloc_end;
out_failed:
int clear_unlock,
int clear_delalloc, int clear_dirty,
int set_writeback,
- int end_writeback)
+ int end_writeback,
+ int set_private2)
{
int ret;
struct page *pages[16];
if (clear_delalloc)
clear_bits |= EXTENT_DELALLOC;
- clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS);
- if (!(unlock_pages || clear_dirty || set_writeback || end_writeback))
+ clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
+ if (!(unlock_pages || clear_dirty || set_writeback || end_writeback ||
+ set_private2))
return 0;
while (nr_pages > 0) {
min_t(unsigned long,
nr_pages, ARRAY_SIZE(pages)), pages);
for (i = 0; i < ret; i++) {
+
+ if (set_private2)
+ SetPagePrivate2(pages[i]);
+
if (pages[i] == locked_page) {
page_cache_release(pages[i]);
continue;
* range is found set.
*/
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int filled)
+ int bits, int filled, struct extent_state *cached)
{
struct extent_state *state = NULL;
struct rb_node *node;
int bitset = 0;
spin_lock(&tree->lock);
- node = tree_search(tree, start);
+ if (cached && cached->tree && cached->start == start)
+ node = &cached->rb_node;
+ else
+ node = tree_search(tree, start);
while (node && start <= end) {
state = rb_entry(node, struct extent_state, rb_node);
bitset = 0;
break;
}
+
+ if (state->end == (u64)-1)
+ break;
+
start = state->end + 1;
if (start > end)
break;
{
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 end = start + PAGE_CACHE_SIZE - 1;
- if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
+ if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
SetPageUptodate(page);
return 0;
}
{
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 end = start + PAGE_CACHE_SIZE - 1;
- if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
+ if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
unlock_page(page);
return 0;
}
static int check_page_writeback(struct extent_io_tree *tree,
struct page *page)
{
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 end = start + PAGE_CACHE_SIZE - 1;
- if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
- end_page_writeback(page);
+ end_page_writeback(page);
return 0;
}
}
if (!uptodate) {
- clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
+ clear_extent_uptodate(tree, start, end, GFP_NOFS);
ClearPageUptodate(page);
SetPageError(page);
}
- clear_extent_writeback(tree, start, end, GFP_ATOMIC);
-
if (whole_page)
end_page_writeback(page);
else
continue;
}
/* the get_extent function already copied into the page */
- if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
+ if (test_range_bit(tree, cur, cur_end,
+ EXTENT_UPTODATE, 1, NULL)) {
check_page_uptodate(tree, page);
unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
cur = cur + iosize;
u64 iosize;
u64 unlock_start;
sector_t sector;
+ struct extent_state *cached_state = NULL;
struct extent_map *em;
struct block_device *bdev;
int ret;
delalloc_end = 0;
page_started = 0;
if (!epd->extent_locked) {
+ u64 delalloc_to_write = 0;
/*
* make sure the wbc mapping index is at least updated
* to this page.
tree->ops->fill_delalloc(inode, page, delalloc_start,
delalloc_end, &page_started,
&nr_written);
+ /*
+ * delalloc_end is already one less than the total
+ * length, so we don't subtract one from
+ * PAGE_CACHE_SIZE
+ */
+ delalloc_to_write += (delalloc_end - delalloc_start +
+ PAGE_CACHE_SIZE) >>
+ PAGE_CACHE_SHIFT;
delalloc_start = delalloc_end + 1;
}
+ if (wbc->nr_to_write < delalloc_to_write) {
+ int thresh = 8192;
+
+ if (delalloc_to_write < thresh * 2)
+ thresh = delalloc_to_write;
+ wbc->nr_to_write = min_t(u64, delalloc_to_write,
+ thresh);
+ }
/* did the fill delalloc function already unlock and start
* the IO?
goto done_unlocked;
}
}
- lock_extent(tree, start, page_end, GFP_NOFS);
-
- unlock_start = start;
-
if (tree->ops && tree->ops->writepage_start_hook) {
ret = tree->ops->writepage_start_hook(page, start,
page_end);
if (ret == -EAGAIN) {
- unlock_extent(tree, start, page_end, GFP_NOFS);
redirty_page_for_writepage(wbc, page);
update_nr_written(page, wbc, nr_written);
unlock_page(page);
update_nr_written(page, wbc, nr_written + 1);
end = page_end;
- if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
- printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
-
if (last_byte <= start) {
- clear_extent_dirty(tree, start, page_end, GFP_NOFS);
- unlock_extent(tree, start, page_end, GFP_NOFS);
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, start,
page_end, NULL, 1);
goto done;
}
- set_extent_uptodate(tree, start, page_end, GFP_NOFS);
blocksize = inode->i_sb->s_blocksize;
while (cur <= end) {
if (cur >= last_byte) {
- clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
- unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur,
page_end, NULL, 1);
*/
if (compressed || block_start == EXTENT_MAP_HOLE ||
block_start == EXTENT_MAP_INLINE) {
- clear_extent_dirty(tree, cur,
- cur + iosize - 1, GFP_NOFS);
-
- unlock_extent(tree, unlock_start, cur + iosize - 1,
- GFP_NOFS);
-
/*
* end_io notification does not happen here for
* compressed extents
}
/* leave this out until we have a page_mkwrite call */
if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
- EXTENT_DIRTY, 0)) {
+ EXTENT_DIRTY, 0, NULL)) {
cur = cur + iosize;
pg_offset += iosize;
continue;
}
- clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
if (tree->ops && tree->ops->writepage_io_hook) {
ret = tree->ops->writepage_io_hook(page, cur,
cur + iosize - 1);
set_page_writeback(page);
end_page_writeback(page);
}
- if (unlock_start <= page_end)
- unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
unlock_page(page);
done_unlocked:
+ /* drop our reference on any cached states */
+ free_extent_state(cached_state);
return 0;
}
writepage_t writepage, void *data,
void (*flush_fn)(void *))
{
- struct backing_dev_info *bdi = mapping->backing_dev_info;
int ret = 0;
int done = 0;
+ int nr_to_write_done = 0;
struct pagevec pvec;
int nr_pages;
pgoff_t index;
scanned = 1;
}
retry:
- while (!done && (index <= end) &&
+ while (!done && !nr_to_write_done && (index <= end) &&
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY, min(end - index,
(pgoff_t)PAGEVEC_SIZE-1) + 1))) {
unlock_page(page);
ret = 0;
}
- if (ret || wbc->nr_to_write <= 0)
- done = 1;
- if (wbc->nonblocking && bdi_write_congested(bdi)) {
- wbc->encountered_congestion = 1;
+ if (ret)
done = 1;
- }
+
+ /*
+ * the filesystem may choose to bump up nr_to_write.
+ * We have to make sure to honor the new nr_to_write
+ * at any time
+ */
+ nr_to_write_done = wbc->nr_to_write <= 0;
}
pagevec_release(&pvec);
cond_resched();
return 0;
lock_extent(tree, start, end, GFP_NOFS);
- wait_on_extent_writeback(tree, start, end);
+ wait_on_page_writeback(page);
clear_extent_bit(tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
- 1, 1, GFP_NOFS);
+ 1, 1, NULL, GFP_NOFS);
return 0;
}
!isnew && !PageUptodate(page) &&
(block_off_end > to || block_off_start < from) &&
!test_range_bit(tree, block_start, cur_end,
- EXTENT_UPTODATE, 1)) {
+ EXTENT_UPTODATE, 1, NULL)) {
u64 sector;
u64 extent_offset = block_start - em->start;
size_t iosize;
*/
set_extent_bit(tree, block_start,
block_start + iosize - 1,
- EXTENT_LOCKED, 0, NULL, GFP_NOFS);
+ EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
ret = submit_extent_page(READ, tree, page,
sector, iosize, page_offset, em->bdev,
NULL, 1,
int ret = 1;
if (test_range_bit(tree, start, end,
- EXTENT_IOBITS | EXTENT_ORDERED, 0))
+ EXTENT_IOBITS, 0, NULL))
ret = 0;
else {
if ((mask & GFP_NOFS) == GFP_NOFS)
mask = GFP_NOFS;
- clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
- 1, 1, mask);
+ /*
+ * at this point we can safely clear everything except the
+ * locked bit and the nodatasum bit
+ */
+ clear_extent_bit(tree, start, end,
+ ~(EXTENT_LOCKED | EXTENT_NODATASUM),
+ 0, 0, NULL, mask);
}
return ret;
}
u64 len;
while (start <= end) {
len = end - start + 1;
- spin_lock(&map->lock);
+ write_lock(&map->lock);
em = lookup_extent_mapping(map, start, len);
if (!em || IS_ERR(em)) {
- spin_unlock(&map->lock);
+ write_unlock(&map->lock);
break;
}
if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
em->start != start) {
- spin_unlock(&map->lock);
+ write_unlock(&map->lock);
free_extent_map(em);
break;
}
if (!test_range_bit(tree, em->start,
extent_map_end(em) - 1,
- EXTENT_LOCKED | EXTENT_WRITEBACK |
- EXTENT_ORDERED,
- 0)) {
+ EXTENT_LOCKED | EXTENT_WRITEBACK,
+ 0, NULL)) {
remove_extent_mapping(map, em);
/* once for the rb tree */
free_extent_map(em);
}
start = extent_map_end(em);
- spin_unlock(&map->lock);
+ write_unlock(&map->lock);
/* once for us */
free_extent_map(em);
int uptodate;
unsigned long index;
- ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
+ ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
if (ret)
return 1;
while (start <= end) {
return 1;
ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1);
+ EXTENT_UPTODATE, 1, NULL);
if (ret)
return ret;
return 0;
if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1)) {
+ EXTENT_UPTODATE, 1, NULL)) {
return 0;
}
#define EXTENT_DEFRAG (1 << 6)
#define EXTENT_DEFRAG_DONE (1 << 7)
#define EXTENT_BUFFER_FILLED (1 << 8)
-#define EXTENT_ORDERED (1 << 9)
-#define EXTENT_ORDERED_METADATA (1 << 10)
-#define EXTENT_BOUNDARY (1 << 11)
-#define EXTENT_NODATASUM (1 << 12)
+#define EXTENT_BOUNDARY (1 << 9)
+#define EXTENT_NODATASUM (1 << 10)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
/* flags for bio submission */
struct extent_io_tree *tree, struct page *page,
gfp_t mask);
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
+int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, struct extent_state **cached, gfp_t mask);
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
u64 max_bytes, unsigned long bits);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int filled);
+ int bits, int filled, struct extent_state *cached_state);
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int wake, int delete, gfp_t mask);
+ int bits, int wake, int delete, struct extent_state **cached,
+ gfp_t mask);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
int clear_unlock,
int clear_delalloc, int clear_dirty,
int set_writeback,
- int end_writeback);
+ int end_writeback,
+ int set_private2);
#endif
void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
{
tree->map.rb_node = NULL;
- spin_lock_init(&tree->lock);
+ rwlock_init(&tree->lock);
}
/**
return 0;
}
+int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
+{
+ int ret = 0;
+ struct extent_map *merge = NULL;
+ struct rb_node *rb;
+ struct extent_map *em;
+
+ write_lock(&tree->lock);
+ em = lookup_extent_mapping(tree, start, len);
+
+ WARN_ON(em->start != start || !em);
+
+ if (!em)
+ goto out;
+
+ clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+ if (em->start != 0) {
+ rb = rb_prev(&em->rb_node);
+ if (rb)
+ merge = rb_entry(rb, struct extent_map, rb_node);
+ if (rb && mergable_maps(merge, em)) {
+ em->start = merge->start;
+ em->len += merge->len;
+ em->block_len += merge->block_len;
+ em->block_start = merge->block_start;
+ merge->in_tree = 0;
+ rb_erase(&merge->rb_node, &tree->map);
+ free_extent_map(merge);
+ }
+ }
+
+ rb = rb_next(&em->rb_node);
+ if (rb)
+ merge = rb_entry(rb, struct extent_map, rb_node);
+ if (rb && mergable_maps(em, merge)) {
+ em->len += merge->len;
+ em->block_len += merge->len;
+ rb_erase(&merge->rb_node, &tree->map);
+ merge->in_tree = 0;
+ free_extent_map(merge);
+ }
+
+ free_extent_map(em);
+out:
+ write_unlock(&tree->lock);
+ return ret;
+
+}
+
/**
* add_extent_mapping - add new extent map to the extent tree
* @tree: tree to insert new map in
ret = -EEXIST;
goto out;
}
- assert_spin_locked(&tree->lock);
rb = tree_insert(&tree->map, em->start, &em->rb_node);
if (rb) {
ret = -EEXIST;
struct rb_node *next = NULL;
u64 end = range_end(start, len);
- assert_spin_locked(&tree->lock);
rb_node = __tree_search(&tree->map, start, &prev, &next);
if (!rb_node && prev) {
em = rb_entry(prev, struct extent_map, rb_node);
return em;
}
+/**
+ * search_extent_mapping - find a nearby extent map
+ * @tree: tree to lookup in
+ * @start: byte offset to start the search
+ * @len: length of the lookup range
+ *
+ * Find and return the first extent_map struct in @tree that intersects the
+ * [start, len] range.
+ *
+ * If one can't be found, any nearby extent may be returned
+ */
+struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len)
+{
+ struct extent_map *em;
+ struct rb_node *rb_node;
+ struct rb_node *prev = NULL;
+ struct rb_node *next = NULL;
+
+ rb_node = __tree_search(&tree->map, start, &prev, &next);
+ if (!rb_node && prev) {
+ em = rb_entry(prev, struct extent_map, rb_node);
+ goto found;
+ }
+ if (!rb_node && next) {
+ em = rb_entry(next, struct extent_map, rb_node);
+ goto found;
+ }
+ if (!rb_node) {
+ em = NULL;
+ goto out;
+ }
+ if (IS_ERR(rb_node)) {
+ em = ERR_PTR(PTR_ERR(rb_node));
+ goto out;
+ }
+ em = rb_entry(rb_node, struct extent_map, rb_node);
+ goto found;
+
+ em = NULL;
+ goto out;
+
+found:
+ atomic_inc(&em->refs);
+out:
+ return em;
+}
+
/**
* remove_extent_mapping - removes an extent_map from the extent tree
* @tree: extent tree to remove from
int ret = 0;
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
- assert_spin_locked(&tree->lock);
rb_erase(&em->rb_node, &tree->map);
em->in_tree = 0;
return ret;
struct extent_map_tree {
struct rb_root map;
- spinlock_t lock;
+ rwlock_t lock;
};
static inline u64 extent_map_end(struct extent_map *em)
void free_extent_map(struct extent_map *em);
int __init extent_map_init(void);
void extent_map_exit(void);
+int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len);
+struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len);
#endif
int err = 0;
int i;
struct inode *inode = fdentry(file)->d_inode;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- u64 hint_byte;
u64 num_bytes;
u64 start_pos;
u64 end_of_last_block;
root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
end_of_last_block = start_pos + num_bytes - 1;
-
- lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
- trans = btrfs_join_transaction(root, 1);
- if (!trans) {
- err = -ENOMEM;
- goto out_unlock;
- }
- btrfs_set_trans_block_group(trans, inode);
- hint_byte = 0;
-
- set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-
- /* check for reserved extents on each page, we don't want
- * to reset the delalloc bit on things that already have
- * extents reserved.
- */
btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
for (i = 0; i < num_pages; i++) {
struct page *p = pages[i];
* at this time.
*/
}
- err = btrfs_end_transaction(trans, root);
-out_unlock:
- unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
return err;
}
if (!split2)
split2 = alloc_extent_map(GFP_NOFS);
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (!em) {
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
break;
}
flags = em->flags;
if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
- spin_unlock(&em_tree->lock);
if (em->start <= start &&
(!testend || em->start + em->len >= start + len)) {
free_extent_map(em);
+ write_unlock(&em_tree->lock);
break;
}
if (start < em->start) {
start = em->start + em->len;
}
free_extent_map(em);
+ write_unlock(&em_tree->lock);
continue;
}
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
free_extent_map(split);
split = NULL;
}
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
/* once for us */
free_extent_map(em);
noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
u64 start, u64 end, u64 locked_end,
- u64 inline_limit, u64 *hint_byte)
+ u64 inline_limit, u64 *hint_byte, int drop_cache)
{
u64 extent_end = 0;
u64 search_start = start;
int ret;
inline_limit = 0;
- btrfs_drop_extent_cache(inode, start, end - 1, 0);
+ if (drop_cache)
+ btrfs_drop_extent_cache(inode, start, end - 1, 0);
path = btrfs_alloc_path();
if (!path)
static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
{
- u64 max_bytes, possible_bytes;
+ u64 max_bytes;
+ u64 bitmap_bytes;
+ u64 extent_bytes;
/*
* The goal is to keep the total amount of memory used per 1gb of space
max_bytes = MAX_CACHE_BYTES_PER_GIG *
(div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
- possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) +
- (sizeof(struct btrfs_free_space) *
- block_group->extents_thresh);
+ /*
+ * we want to account for 1 more bitmap than what we have so we can make
+ * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
+ * we add more bitmaps.
+ */
+ bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
- if (possible_bytes > max_bytes) {
- int extent_bytes = max_bytes -
- (block_group->total_bitmaps * PAGE_CACHE_SIZE);
+ if (bitmap_bytes >= max_bytes) {
+ block_group->extents_thresh = 0;
+ return;
+ }
- if (extent_bytes <= 0) {
- block_group->extents_thresh = 0;
- return;
- }
+ /*
+ * we want the extent entry threshold to always be at most 1/2 the maxw
+ * bytes we can have, or whatever is less than that.
+ */
+ extent_bytes = max_bytes - bitmap_bytes;
+ extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
- block_group->extents_thresh = extent_bytes /
- (sizeof(struct btrfs_free_space));
- }
+ block_group->extents_thresh =
+ div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
}
static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
BUG_ON(block_group->total_bitmaps >= max_bitmaps);
info->offset = offset_to_bitmap(block_group, offset);
+ info->bytes = 0;
link_free_space(block_group, info);
block_group->total_bitmaps++;
ptr = (unsigned long)(ref + 1);
ret = 0;
} else if (ret < 0) {
+ if (ret == -EOVERFLOW)
+ ret = -EMLINK;
goto out;
} else {
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
ret = btrfs_insert_empty_item(trans, root, path, &key,
sizeof(struct btrfs_inode_item));
- if (ret == 0 && objectid > root->highest_inode)
- root->highest_inode = objectid;
return ret;
}
slot = path->slots[0] - 1;
l = path->nodes[0];
btrfs_item_key_to_cpu(l, &found_key, slot);
- *objectid = found_key.objectid;
+ *objectid = max_t(u64, found_key.objectid,
+ BTRFS_FIRST_FREE_OBJECTID - 1);
} else {
- *objectid = BTRFS_FIRST_FREE_OBJECTID;
+ *objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
}
ret = 0;
error:
return ret;
}
-/*
- * walks the btree of allocated inodes and find a hole.
- */
int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 dirid, u64 *objectid)
{
- struct btrfs_path *path;
- struct btrfs_key key;
int ret;
- int slot = 0;
- u64 last_ino = 0;
- int start_found;
- struct extent_buffer *l;
- struct btrfs_key search_key;
- u64 search_start = dirid;
-
mutex_lock(&root->objectid_mutex);
- if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID &&
- root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) {
- *objectid = ++root->last_inode_alloc;
- mutex_unlock(&root->objectid_mutex);
- return 0;
- }
- path = btrfs_alloc_path();
- BUG_ON(!path);
- search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID);
- search_key.objectid = search_start;
- search_key.type = 0;
- search_key.offset = 0;
-
- start_found = 0;
- ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0);
- if (ret < 0)
- goto error;
- while (1) {
- l = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(l)) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 0)
- continue;
- if (ret < 0)
- goto error;
- if (!start_found) {
- *objectid = search_start;
- start_found = 1;
- goto found;
- }
- *objectid = last_ino > search_start ?
- last_ino : search_start;
- goto found;
- }
- btrfs_item_key_to_cpu(l, &key, slot);
- if (key.objectid >= search_start) {
- if (start_found) {
- if (last_ino < search_start)
- last_ino = search_start;
- if (key.objectid > last_ino) {
- *objectid = last_ino;
- goto found;
- }
- } else if (key.objectid > search_start) {
- *objectid = search_start;
- goto found;
- }
- }
- if (key.objectid >= BTRFS_LAST_FREE_OBJECTID)
- break;
+ if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
+ ret = btrfs_find_highest_inode(root, &root->highest_objectid);
+ if (ret)
+ goto out;
+ }
- start_found = 1;
- last_ino = key.objectid + 1;
- path->slots[0]++;
+ if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
+ ret = -ENOSPC;
+ goto out;
}
- BUG_ON(1);
-found:
- btrfs_release_path(root, path);
- btrfs_free_path(path);
- BUG_ON(*objectid < search_start);
- mutex_unlock(&root->objectid_mutex);
- return 0;
-error:
- btrfs_release_path(root, path);
- btrfs_free_path(path);
+
+ *objectid = ++root->highest_objectid;
+ ret = 0;
+out:
mutex_unlock(&root->objectid_mutex);
return ret;
}
}
ret = btrfs_drop_extents(trans, root, inode, start,
- aligned_end, aligned_end, start, &hint_byte);
+ aligned_end, aligned_end, start,
+ &hint_byte, 1);
BUG_ON(ret);
if (isize > actual_end)
inline_len, compressed_size,
compressed_pages);
BUG_ON(ret);
- btrfs_drop_extent_cache(inode, start, aligned_end, 0);
+ btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
return 0;
}
extent_clear_unlock_delalloc(inode,
&BTRFS_I(inode)->io_tree,
start, end, NULL, 1, 0,
- 0, 1, 1, 1);
+ 0, 1, 1, 1, 0);
ret = 0;
goto free_pages_out;
}
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
while (1) {
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- NULL, 1, 1, 0, 1, 1, 0);
+ NULL, 1, 1, 0, 1, 1, 0, 0);
ret = btrfs_submit_compressed_write(inode,
async_extent->start,
extent_clear_unlock_delalloc(inode,
&BTRFS_I(inode)->io_tree,
start, end, NULL, 1, 1,
- 1, 1, 1, 1);
+ 1, 1, 1, 1, 0);
*nr_written = *nr_written +
(end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
*page_started = 1;
BUG_ON(disk_num_bytes >
btrfs_super_total_bytes(&root->fs_info->super_copy));
+
+ read_lock(&BTRFS_I(inode)->extent_tree.lock);
+ em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
+ start, num_bytes);
+ if (em) {
+ alloc_hint = em->block_start;
+ free_extent_map(em);
+ }
+ read_unlock(&BTRFS_I(inode)->extent_tree.lock);
btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
while (disk_num_bytes > 0) {
em = alloc_extent_map(GFP_NOFS);
em->start = start;
em->orig_start = em->start;
-
ram_size = ins.offset;
em->len = ins.offset;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
while (1) {
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
/* we're not doing compressed IO, don't unlock the first
* page (which the caller expects to stay locked), don't
* clear any dirty bits and don't set any writeback bits
+ *
+ * Do set the Private2 bit so we know this page was properly
+ * setup for writepage
*/
extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
start, start + ram_size - 1,
locked_page, unlock, 1,
- 1, 0, 0, 0);
+ 1, 0, 0, 0, 1);
disk_num_bytes -= cur_alloc_size;
num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
int limit = 10 * 1024 * 1042;
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
- EXTENT_DELALLOC, 1, 0, GFP_NOFS);
+ EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS);
while (start < end) {
async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
async_cow->inode = inode;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
while (1) {
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
cur_offset, cur_offset + num_bytes - 1,
- locked_page, 1, 1, 1, 0, 0, 0);
+ locked_page, 1, 1, 1, 0, 0, 0, 1);
cur_offset = extent_end;
if (cur_offset > end)
break;
lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
/* already ordered? We're done */
- if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
- EXTENT_ORDERED, 0)) {
+ if (PagePrivate2(page))
goto out;
- }
ordered = btrfs_lookup_ordered_extent(inode, page_start);
if (ordered) {
struct inode *inode = page->mapping->host;
struct btrfs_writepage_fixup *fixup;
struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret;
- ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
- EXTENT_ORDERED, 0);
- if (ret)
+ /* this page is properly in the ordered list */
+ if (TestClearPagePrivate2(page))
return 0;
if (PageChecked(page))
BUG_ON(!path);
path->leave_spinning = 1;
+
+ /*
+ * we may be replacing one extent in the tree with another.
+ * The new extent is pinned in the extent map, and we don't want
+ * to drop it from the cache until it is completely in the btree.
+ *
+ * So, tell btrfs_drop_extents to leave this extent in the cache.
+ * the caller is expected to unpin it and allow it to be merged
+ * with the others.
+ */
ret = btrfs_drop_extents(trans, root, inode, file_pos,
file_pos + num_bytes, locked_end,
- file_pos, &hint);
+ file_pos, &hint, 0);
BUG_ON(ret);
ins.objectid = inode->i_ino;
btrfs_mark_buffer_dirty(leaf);
inode_add_bytes(inode, num_bytes);
- btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);
ins.objectid = disk_bytenr;
ins.offset = disk_num_bytes;
ordered_extent->len,
compressed, 0, 0,
BTRFS_FILE_EXTENT_REG);
+ unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
+ ordered_extent->file_offset,
+ ordered_extent->len);
BUG_ON(ret);
}
unlock_extent(io_tree, ordered_extent->file_offset,
static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate)
{
+ ClearPagePrivate2(page);
return btrfs_finish_ordered_io(page->mapping->host, start, end);
}
failrec->last_mirror = 0;
failrec->bio_flags = 0;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, failrec->len);
if (em->start > start || em->start + em->len < start) {
free_extent_map(em);
em = NULL;
}
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (!em || IS_ERR(em)) {
kfree(failrec);
return 0;
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
- test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) {
+ test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
GFP_NOFS);
return 0;
return ret;
}
+int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *dir, u64 objectid,
+ const char *name, int name_len)
+{
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_dir_item *di;
+ struct btrfs_key key;
+ u64 index;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
+ name, name_len, -1);
+ BUG_ON(!di || IS_ERR(di));
+
+ leaf = path->nodes[0];
+ btrfs_dir_item_key_to_cpu(leaf, di, &key);
+ WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
+ ret = btrfs_delete_one_dir_name(trans, root, path, di);
+ BUG_ON(ret);
+ btrfs_release_path(root, path);
+
+ ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
+ objectid, root->root_key.objectid,
+ dir->i_ino, &index, name, name_len);
+ if (ret < 0) {
+ BUG_ON(ret != -ENOENT);
+ di = btrfs_search_dir_index_item(root, path, dir->i_ino,
+ name, name_len);
+ BUG_ON(!di || IS_ERR(di));
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ btrfs_release_path(root, path);
+ index = key.offset;
+ }
+
+ di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
+ index, name, name_len, -1);
+ BUG_ON(!di || IS_ERR(di));
+
+ leaf = path->nodes[0];
+ btrfs_dir_item_key_to_cpu(leaf, di, &key);
+ WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
+ ret = btrfs_delete_one_dir_name(trans, root, path, di);
+ BUG_ON(ret);
+ btrfs_release_path(root, path);
+
+ btrfs_i_size_write(dir, dir->i_size - name_len * 2);
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ ret = btrfs_update_inode(trans, root, dir);
+ BUG_ON(ret);
+ dir->i_sb->s_dirt = 1;
+
+ btrfs_free_path(path);
+ return 0;
+}
+
static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
struct btrfs_trans_handle *trans;
unsigned long nr = 0;
- /*
- * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir
- * the root of a subvolume or snapshot
- */
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
- inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+ inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
return -ENOTEMPTY;
- }
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, dir);
+ if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+ err = btrfs_unlink_subvol(trans, root, dir,
+ BTRFS_I(inode)->location.objectid,
+ dentry->d_name.name,
+ dentry->d_name.len);
+ goto out;
+ }
+
err = btrfs_orphan_add(trans, inode);
if (err)
- goto fail_trans;
+ goto out;
/* now the directory is empty */
err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
dentry->d_name.name, dentry->d_name.len);
if (!err)
btrfs_i_size_write(inode, 0);
-
-fail_trans:
+out:
nr = trans->blocks_used;
ret = btrfs_end_transaction_throttle(trans, root);
btrfs_btree_balance_dirty(root, nr);
cur_offset,
cur_offset + hole_size,
block_end,
- cur_offset, &hint_byte);
+ cur_offset, &hint_byte, 1);
if (err)
break;
err = btrfs_insert_file_extent(trans, root,
}
btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (inode->i_nlink > 0) {
+ BUG_ON(btrfs_root_refs(&root->root_item) != 0);
+ goto no_delete;
+ }
+
btrfs_i_size_write(inode, 0);
trans = btrfs_join_transaction(root, 1);
* is kind of like crossing a mount point.
*/
static int fixup_tree_root_location(struct btrfs_root *root,
- struct btrfs_key *location,
- struct btrfs_root **sub_root,
- struct dentry *dentry)
+ struct inode *dir,
+ struct dentry *dentry,
+ struct btrfs_key *location,
+ struct btrfs_root **sub_root)
{
- struct btrfs_root_item *ri;
+ struct btrfs_path *path;
+ struct btrfs_root *new_root;
+ struct btrfs_root_ref *ref;
+ struct extent_buffer *leaf;
+ int ret;
+ int err = 0;
- if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
- return 0;
- if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
- return 0;
+ path = btrfs_alloc_path();
+ if (!path) {
+ err = -ENOMEM;
+ goto out;
+ }
- *sub_root = btrfs_read_fs_root(root->fs_info, location,
- dentry->d_name.name,
- dentry->d_name.len);
- if (IS_ERR(*sub_root))
- return PTR_ERR(*sub_root);
+ err = -ENOENT;
+ ret = btrfs_find_root_ref(root->fs_info->tree_root, path,
+ BTRFS_I(dir)->root->root_key.objectid,
+ location->objectid);
+ if (ret) {
+ if (ret < 0)
+ err = ret;
+ goto out;
+ }
- ri = &(*sub_root)->root_item;
- location->objectid = btrfs_root_dirid(ri);
- btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
- location->offset = 0;
+ leaf = path->nodes[0];
+ ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
+ if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino ||
+ btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
+ goto out;
- return 0;
+ ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
+ (unsigned long)(ref + 1),
+ dentry->d_name.len);
+ if (ret)
+ goto out;
+
+ btrfs_release_path(root->fs_info->tree_root, path);
+
+ new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
+ if (IS_ERR(new_root)) {
+ err = PTR_ERR(new_root);
+ goto out;
+ }
+
+ if (btrfs_root_refs(&new_root->root_item) == 0) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ *sub_root = new_root;
+ location->objectid = btrfs_root_dirid(&new_root->root_item);
+ location->type = BTRFS_INODE_ITEM_KEY;
+ location->offset = 0;
+ err = 0;
+out:
+ btrfs_free_path(path);
+ return err;
}
static void inode_tree_add(struct inode *inode)
struct btrfs_inode *entry;
struct rb_node **p;
struct rb_node *parent;
-
again:
p = &root->inode_tree.rb_node;
parent = NULL;
+ if (hlist_unhashed(&inode->i_hash))
+ return;
+
spin_lock(&root->inode_lock);
while (*p) {
parent = *p;
static void inode_tree_del(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
+ int empty = 0;
spin_lock(&root->inode_lock);
if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
+ empty = RB_EMPTY_ROOT(&root->inode_tree);
}
spin_unlock(&root->inode_lock);
+
+ if (empty && btrfs_root_refs(&root->root_item) == 0) {
+ synchronize_srcu(&root->fs_info->subvol_srcu);
+ spin_lock(&root->inode_lock);
+ empty = RB_EMPTY_ROOT(&root->inode_tree);
+ spin_unlock(&root->inode_lock);
+ if (empty)
+ btrfs_add_dead_root(root);
+ }
+}
+
+int btrfs_invalidate_inodes(struct btrfs_root *root)
+{
+ struct rb_node *node;
+ struct rb_node *prev;
+ struct btrfs_inode *entry;
+ struct inode *inode;
+ u64 objectid = 0;
+
+ WARN_ON(btrfs_root_refs(&root->root_item) != 0);
+
+ spin_lock(&root->inode_lock);
+again:
+ node = root->inode_tree.rb_node;
+ prev = NULL;
+ while (node) {
+ prev = node;
+ entry = rb_entry(node, struct btrfs_inode, rb_node);
+
+ if (objectid < entry->vfs_inode.i_ino)
+ node = node->rb_left;
+ else if (objectid > entry->vfs_inode.i_ino)
+ node = node->rb_right;
+ else
+ break;
+ }
+ if (!node) {
+ while (prev) {
+ entry = rb_entry(prev, struct btrfs_inode, rb_node);
+ if (objectid <= entry->vfs_inode.i_ino) {
+ node = prev;
+ break;
+ }
+ prev = rb_next(prev);
+ }
+ }
+ while (node) {
+ entry = rb_entry(node, struct btrfs_inode, rb_node);
+ objectid = entry->vfs_inode.i_ino + 1;
+ inode = igrab(&entry->vfs_inode);
+ if (inode) {
+ spin_unlock(&root->inode_lock);
+ if (atomic_read(&inode->i_count) > 1)
+ d_prune_aliases(inode);
+ /*
+ * btrfs_drop_inode will remove it from
+ * the inode cache when its usage count
+ * hits zero.
+ */
+ iput(inode);
+ cond_resched();
+ spin_lock(&root->inode_lock);
+ goto again;
+ }
+
+ if (cond_resched_lock(&root->inode_lock))
+ goto again;
+
+ node = rb_next(node);
+ }
+ spin_unlock(&root->inode_lock);
+ return 0;
}
static noinline void init_btrfs_i(struct inode *inode)
return inode;
}
+static struct inode *new_simple_dir(struct super_block *s,
+ struct btrfs_key *key,
+ struct btrfs_root *root)
+{
+ struct inode *inode = new_inode(s);
+
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ init_btrfs_i(inode);
+
+ BTRFS_I(inode)->root = root;
+ memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
+ BTRFS_I(inode)->dummy_inode = 1;
+
+ inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+ return inode;
+}
+
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
{
struct inode *inode;
- struct btrfs_inode *bi = BTRFS_I(dir);
- struct btrfs_root *root = bi->root;
+ struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *sub_root = root;
struct btrfs_key location;
+ int index;
int ret;
+ dentry->d_op = &btrfs_dentry_operations;
+
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
if (ret < 0)
return ERR_PTR(ret);
- inode = NULL;
- if (location.objectid) {
- ret = fixup_tree_root_location(root, &location, &sub_root,
- dentry);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
- return ERR_PTR(-ENOENT);
+ if (location.objectid == 0)
+ return NULL;
+
+ if (location.type == BTRFS_INODE_ITEM_KEY) {
+ inode = btrfs_iget(dir->i_sb, &location, root);
+ return inode;
+ }
+
+ BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
+
+ index = srcu_read_lock(&root->fs_info->subvol_srcu);
+ ret = fixup_tree_root_location(root, dir, dentry,
+ &location, &sub_root);
+ if (ret < 0) {
+ if (ret != -ENOENT)
+ inode = ERR_PTR(ret);
+ else
+ inode = new_simple_dir(dir->i_sb, &location, sub_root);
+ } else {
inode = btrfs_iget(dir->i_sb, &location, sub_root);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
}
+ srcu_read_unlock(&root->fs_info->subvol_srcu, index);
+
return inode;
}
+static int btrfs_dentry_delete(struct dentry *dentry)
+{
+ struct btrfs_root *root;
+
+ if (!dentry->d_inode)
+ return 0;
+
+ root = BTRFS_I(dentry->d_inode)->root;
+ if (btrfs_root_refs(&root->root_item) == 0)
+ return 1;
+ return 0;
+}
+
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
struct inode *inode;
- if (dentry->d_name.len > BTRFS_NAME_LEN)
- return ERR_PTR(-ENAMETOOLONG);
-
inode = btrfs_lookup_dentry(dir, dentry);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (ret != 0)
goto fail;
- if (objectid > root->highest_inode)
- root->highest_inode = objectid;
-
inode->i_uid = current_fsuid();
if (dir && (dir->i_mode & S_ISGID)) {
struct inode *parent_inode, struct inode *inode,
const char *name, int name_len, int add_backref, u64 index)
{
- int ret;
+ int ret = 0;
struct btrfs_key key;
struct btrfs_root *root = BTRFS_I(parent_inode)->root;
- key.objectid = inode->i_ino;
- btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
- key.offset = 0;
+ if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
+ } else {
+ key.objectid = inode->i_ino;
+ btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+ key.offset = 0;
+ }
+
+ if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
+ key.objectid, root->root_key.objectid,
+ parent_inode->i_ino,
+ index, name, name_len);
+ } else if (add_backref) {
+ ret = btrfs_insert_inode_ref(trans, root,
+ name, name_len, inode->i_ino,
+ parent_inode->i_ino, index);
+ }
- ret = btrfs_insert_dir_item(trans, root, name, name_len,
- parent_inode->i_ino,
- &key, btrfs_inode_type(inode),
- index);
if (ret == 0) {
- if (add_backref) {
- ret = btrfs_insert_inode_ref(trans, root,
- name, name_len,
- inode->i_ino,
- parent_inode->i_ino,
- index);
- }
+ ret = btrfs_insert_dir_item(trans, root, name, name_len,
+ parent_inode->i_ino, &key,
+ btrfs_inode_type(inode), index);
+ BUG_ON(ret);
+
btrfs_i_size_write(parent_inode, parent_inode->i_size +
name_len * 2);
parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
err = btrfs_add_nondir(trans, dentry, inode, 1, index);
- if (err)
- drop_inode = 1;
-
- btrfs_update_inode_block_group(trans, dir);
- err = btrfs_update_inode(trans, root, inode);
-
- if (err)
+ if (err) {
drop_inode = 1;
+ } else {
+ btrfs_update_inode_block_group(trans, dir);
+ err = btrfs_update_inode(trans, root, inode);
+ BUG_ON(err);
+ btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
+ }
nr = trans->blocks_used;
-
- btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
btrfs_end_transaction_throttle(trans, root);
fail:
if (drop_inode) {
int compressed;
again:
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (em)
em->bdev = root->fs_info->fs_devices->latest_bdev;
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (em) {
if (em->start > start || em->start + em->len <= start)
map = kmap(page);
read_extent_buffer(leaf, map + pg_offset, ptr,
copy_size);
+ if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
+ memset(map + pg_offset + copy_size, 0,
+ PAGE_CACHE_SIZE - pg_offset -
+ copy_size);
+ }
kunmap(page);
}
flush_dcache_page(page);
}
err = 0;
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
/* it is possible that someone inserted the extent into the tree
* while we had the lock dropped. It is also possible that
err = 0;
}
}
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
out:
if (path)
btrfs_free_path(path);
u64 page_start = page_offset(page);
u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+
+ /*
+ * we have the page locked, so new writeback can't start,
+ * and the dirty bit won't be cleared while we are here.
+ *
+ * Wait for IO on this page so that we can safely clear
+ * the PagePrivate2 bit and do ordered accounting
+ */
wait_on_page_writeback(page);
+
tree = &BTRFS_I(page->mapping->host)->io_tree;
if (offset) {
btrfs_releasepage(page, GFP_NOFS);
return;
}
-
lock_extent(tree, page_start, page_end, GFP_NOFS);
ordered = btrfs_lookup_ordered_extent(page->mapping->host,
page_offset(page));
*/
clear_extent_bit(tree, page_start, page_end,
EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_LOCKED, 1, 0, GFP_NOFS);
- btrfs_finish_ordered_io(page->mapping->host,
- page_start, page_end);
+ EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
+ /*
+ * whoever cleared the private bit is responsible
+ * for the finish_ordered_io
+ */
+ if (TestClearPagePrivate2(page)) {
+ btrfs_finish_ordered_io(page->mapping->host,
+ page_start, page_end);
+ }
btrfs_put_ordered_extent(ordered);
lock_extent(tree, page_start, page_end, GFP_NOFS);
}
clear_extent_bit(tree, page_start, page_end,
- EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_ORDERED,
- 1, 1, GFP_NOFS);
+ EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
+ 1, 1, NULL, GFP_NOFS);
__btrfs_releasepage(page, GFP_NOFS);
ClearPageChecked(page);
}
ClearPageChecked(page);
set_page_dirty(page);
+ SetPageUptodate(page);
BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
out_unlock:
+ if (!ret)
+ return VM_FAULT_LOCKED;
unlock_page(page);
out:
return ret;
* create a new subvolume directory/inode (helper for the ioctl).
*/
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *new_root, struct dentry *dentry,
+ struct btrfs_root *new_root,
u64 new_dirid, u64 alloc_hint)
{
struct inode *inode;
- int error;
+ int err;
u64 index = 0;
inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
inode->i_nlink = 1;
btrfs_i_size_write(inode, 0);
- error = btrfs_update_inode(trans, new_root, inode);
- if (error)
- return error;
+ err = btrfs_update_inode(trans, new_root, inode);
+ BUG_ON(err);
- d_instantiate(dentry, inode);
+ iput(inode);
return 0;
}
kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
}
+void btrfs_drop_inode(struct inode *inode)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
+ generic_delete_inode(inode);
+ else
+ generic_drop_inode(inode);
+}
+
static void init_once(void *foo)
{
struct btrfs_inode *ei = (struct btrfs_inode *) foo;
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(old_dir)->root;
+ struct btrfs_root *dest = BTRFS_I(new_dir)->root;
struct inode *new_inode = new_dentry->d_inode;
struct inode *old_inode = old_dentry->d_inode;
struct timespec ctime = CURRENT_TIME;
u64 index = 0;
+ u64 root_objectid;
int ret;
- /* we're not allowed to rename between subvolumes */
- if (BTRFS_I(old_inode)->root->root_key.objectid !=
- BTRFS_I(new_dir)->root->root_key.objectid)
+ if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+ return -EPERM;
+
+ /* we only allow rename subvolume link between subvolumes */
+ if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
return -EXDEV;
- if (S_ISDIR(old_inode->i_mode) && new_inode &&
- new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
+ if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
+ (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID))
return -ENOTEMPTY;
- }
- /* to rename a snapshot or subvolume, we need to juggle the
- * backrefs. This isn't coded yet
- */
- if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
- return -EXDEV;
+ if (S_ISDIR(old_inode->i_mode) && new_inode &&
+ new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
+ return -ENOTEMPTY;
ret = btrfs_check_metadata_free_space(root);
if (ret)
- goto out_unlock;
+ return ret;
/*
* we're using rename to replace one file with another.
old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
filemap_flush(old_inode->i_mapping);
+ /* close the racy window with snapshot create/destroy ioctl */
+ if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
+ down_read(&root->fs_info->subvol_sem);
+
trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, new_dir);
+
+ if (dest != root)
+ btrfs_record_root_in_trans(trans, dest);
+ ret = btrfs_set_inode_index(new_dir, &index);
+ if (ret)
+ goto out_fail;
+
+ if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ /* force full log commit if subvolume involved. */
+ root->fs_info->last_trans_log_full_commit = trans->transid;
+ } else {
+ ret = btrfs_insert_inode_ref(trans, dest,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len,
+ old_inode->i_ino,
+ new_dir->i_ino, index);
+ if (ret)
+ goto out_fail;
+ /*
+ * this is an ugly little race, but the rename is required
+ * to make sure that if we crash, the inode is either at the
+ * old name or the new one. pinning the log transaction lets
+ * us make sure we don't allow a log commit to come in after
+ * we unlink the name but before we add the new name back in.
+ */
+ btrfs_pin_log_trans(root);
+ }
/*
* make sure the inode gets flushed if it is replacing
* something.
btrfs_add_ordered_operation(trans, root, old_inode);
}
- /*
- * this is an ugly little race, but the rename is required to make
- * sure that if we crash, the inode is either at the old name
- * or the new one. pinning the log transaction lets us make sure
- * we don't allow a log commit to come in after we unlink the
- * name but before we add the new name back in.
- */
- btrfs_pin_log_trans(root);
-
- btrfs_set_trans_block_group(trans, new_dir);
-
- btrfs_inc_nlink(old_dentry->d_inode);
old_dir->i_ctime = old_dir->i_mtime = ctime;
new_dir->i_ctime = new_dir->i_mtime = ctime;
old_inode->i_ctime = ctime;
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
- ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
- old_dentry->d_name.name,
- old_dentry->d_name.len);
- if (ret)
- goto out_fail;
+ if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
+ ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
+ old_dentry->d_name.name,
+ old_dentry->d_name.len);
+ } else {
+ btrfs_inc_nlink(old_dentry->d_inode);
+ ret = btrfs_unlink_inode(trans, root, old_dir,
+ old_dentry->d_inode,
+ old_dentry->d_name.name,
+ old_dentry->d_name.len);
+ }
+ BUG_ON(ret);
if (new_inode) {
new_inode->i_ctime = CURRENT_TIME;
- ret = btrfs_unlink_inode(trans, root, new_dir,
- new_dentry->d_inode,
- new_dentry->d_name.name,
- new_dentry->d_name.len);
- if (ret)
- goto out_fail;
+ if (unlikely(new_inode->i_ino ==
+ BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+ root_objectid = BTRFS_I(new_inode)->location.objectid;
+ ret = btrfs_unlink_subvol(trans, dest, new_dir,
+ root_objectid,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len);
+ BUG_ON(new_inode->i_nlink == 0);
+ } else {
+ ret = btrfs_unlink_inode(trans, dest, new_dir,
+ new_dentry->d_inode,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len);
+ }
+ BUG_ON(ret);
if (new_inode->i_nlink == 0) {
ret = btrfs_orphan_add(trans, new_dentry->d_inode);
- if (ret)
- goto out_fail;
+ BUG_ON(ret);
}
-
}
- ret = btrfs_set_inode_index(new_dir, &index);
- if (ret)
- goto out_fail;
- ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode,
- old_inode, new_dentry->d_name.name,
- new_dentry->d_name.len, 1, index);
- if (ret)
- goto out_fail;
+ ret = btrfs_add_link(trans, new_dir, old_inode,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len, 0, index);
+ BUG_ON(ret);
- btrfs_log_new_name(trans, old_inode, old_dir,
- new_dentry->d_parent);
+ if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ btrfs_log_new_name(trans, old_inode, old_dir,
+ new_dentry->d_parent);
+ btrfs_end_log_trans(root);
+ }
out_fail:
-
- /* this btrfs_end_log_trans just allows the current
- * log-sub transaction to complete
- */
- btrfs_end_log_trans(root);
btrfs_end_transaction_throttle(trans, root);
-out_unlock:
+
+ if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
+ up_read(&root->fs_info->subvol_sem);
return ret;
}
0, 0, 0,
BTRFS_FILE_EXTENT_PREALLOC);
BUG_ON(ret);
+ btrfs_drop_extent_cache(inode, cur_offset,
+ cur_offset + ins.offset -1, 0);
num_bytes -= ins.offset;
cur_offset += ins.offset;
alloc_hint = ins.objectid + ins.offset;
.lookup = btrfs_lookup,
.permission = btrfs_permission,
};
+
static struct file_operations btrfs_dir_file_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.invalidatepage = btrfs_invalidatepage,
.releasepage = btrfs_releasepage,
.set_page_dirty = btrfs_set_page_dirty,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations btrfs_symlink_aops = {
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
};
+
+struct dentry_operations btrfs_dentry_operations = {
+ .d_delete = btrfs_dentry_delete,
+};
struct btrfs_root_item root_item;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
- struct btrfs_root *new_root = root;
- struct inode *dir;
+ struct btrfs_root *new_root;
+ struct inode *dir = dentry->d_parent->d_inode;
int ret;
int err;
u64 objectid;
ret = btrfs_check_metadata_free_space(root);
if (ret)
- goto fail_commit;
+ return ret;
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
if (ret)
goto fail;
+ key.offset = (u64)-1;
+ new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
+ BUG_ON(IS_ERR(new_root));
+
+ btrfs_record_root_in_trans(trans, new_root);
+
+ ret = btrfs_create_subvol_root(trans, new_root, new_dirid,
+ BTRFS_I(dir)->block_group);
/*
* insert the directory item
*/
- key.offset = (u64)-1;
- dir = dentry->d_parent->d_inode;
ret = btrfs_set_inode_index(dir, &index);
BUG_ON(ret);
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
- /* add the backref first */
ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
- objectid, BTRFS_ROOT_BACKREF_KEY,
- root->root_key.objectid,
+ objectid, root->root_key.objectid,
dir->i_ino, index, name, namelen);
BUG_ON(ret);
- /* now add the forward ref */
- ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
- root->root_key.objectid, BTRFS_ROOT_REF_KEY,
- objectid,
- dir->i_ino, index, name, namelen);
-
- BUG_ON(ret);
-
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- goto fail_commit;
-
- new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
- BUG_ON(!new_root);
-
- trans = btrfs_start_transaction(new_root, 1);
- BUG_ON(!trans);
-
- ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid,
- BTRFS_I(dir)->block_group);
- if (ret)
- goto fail;
-
+ d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
fail:
nr = trans->blocks_used;
- err = btrfs_commit_transaction(trans, new_root);
+ err = btrfs_commit_transaction(trans, root);
if (err && !ret)
ret = err;
-fail_commit:
- btrfs_btree_balance_dirty(root, nr);
return ret;
}
* sys_mkdirat and vfs_mkdir, but we only do a single component lookup
* inside this filesystem so it's quite a bit simpler.
*/
-static noinline int btrfs_mksubvol(struct path *parent, char *name,
- int mode, int namelen,
+static noinline int btrfs_mksubvol(struct path *parent,
+ char *name, int namelen,
struct btrfs_root *snap_src)
{
+ struct inode *dir = parent->dentry->d_inode;
struct dentry *dentry;
int error;
- mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
dentry = lookup_one_len(name, parent->dentry, namelen);
error = PTR_ERR(dentry);
if (dentry->d_inode)
goto out_dput;
- if (!IS_POSIXACL(parent->dentry->d_inode))
- mode &= ~current_umask();
-
error = mnt_want_write(parent->mnt);
if (error)
goto out_dput;
- error = btrfs_may_create(parent->dentry->d_inode, dentry);
+ error = btrfs_may_create(dir, dentry);
if (error)
goto out_drop_write;
- /*
- * Actually perform the low-level subvolume creation after all
- * this VFS fuzz.
- *
- * Eventually we want to pass in an inode under which we create this
- * subvolume, but for now all are under the filesystem root.
- *
- * Also we should pass on the mode eventually to allow creating new
- * subvolume with specific mode bits.
- */
+ down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
+
+ if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
+ goto out_up_read;
+
if (snap_src) {
- struct dentry *dir = dentry->d_parent;
- struct dentry *test = dir->d_parent;
- struct btrfs_path *path = btrfs_alloc_path();
- int ret;
- u64 test_oid;
- u64 parent_oid = BTRFS_I(dir->d_inode)->root->root_key.objectid;
-
- test_oid = snap_src->root_key.objectid;
-
- ret = btrfs_find_root_ref(snap_src->fs_info->tree_root,
- path, parent_oid, test_oid);
- if (ret == 0)
- goto create;
- btrfs_release_path(snap_src->fs_info->tree_root, path);
-
- /* we need to make sure we aren't creating a directory loop
- * by taking a snapshot of something that has our current
- * subvol in its directory tree. So, this loops through
- * the dentries and checks the forward refs for each subvolume
- * to see if is references the subvolume where we are
- * placing this new snapshot.
- */
- while (1) {
- if (!test ||
- dir == snap_src->fs_info->sb->s_root ||
- test == snap_src->fs_info->sb->s_root ||
- test->d_inode->i_sb != snap_src->fs_info->sb) {
- break;
- }
- if (S_ISLNK(test->d_inode->i_mode)) {
- printk(KERN_INFO "Btrfs symlink in snapshot "
- "path, failed\n");
- error = -EMLINK;
- btrfs_free_path(path);
- goto out_drop_write;
- }
- test_oid =
- BTRFS_I(test->d_inode)->root->root_key.objectid;
- ret = btrfs_find_root_ref(snap_src->fs_info->tree_root,
- path, test_oid, parent_oid);
- if (ret == 0) {
- printk(KERN_INFO "Btrfs snapshot creation "
- "failed, looping\n");
- error = -EMLINK;
- btrfs_free_path(path);
- goto out_drop_write;
- }
- btrfs_release_path(snap_src->fs_info->tree_root, path);
- test = test->d_parent;
- }
-create:
- btrfs_free_path(path);
- error = create_snapshot(snap_src, dentry, name, namelen);
+ error = create_snapshot(snap_src, dentry,
+ name, namelen);
} else {
- error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root,
- dentry, name, namelen);
+ error = create_subvol(BTRFS_I(dir)->root, dentry,
+ name, namelen);
}
- if (error)
- goto out_drop_write;
-
- fsnotify_mkdir(parent->dentry->d_inode, dentry);
+ if (!error)
+ fsnotify_mkdir(dir, dentry);
+out_up_read:
+ up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
out_drop_write:
mnt_drop_write(parent->mnt);
out_dput:
dput(dentry);
out_unlock:
- mutex_unlock(&parent->dentry->d_inode->i_mutex);
+ mutex_unlock(&dir->i_mutex);
return error;
}
-
static int btrfs_defrag_file(struct file *file)
{
struct inode *inode = fdentry(file)->d_inode;
clear_page_dirty_for_io(page);
btrfs_set_extent_delalloc(inode, page_start, page_end);
-
- unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
set_page_dirty(page);
+ unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
unlock_page(page);
page_cache_release(page);
balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
return 0;
}
-static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
+static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
+ void __user *arg)
{
u64 new_size;
u64 old_size;
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
struct btrfs_ioctl_vol_args *vol_args;
- struct btrfs_dir_item *di;
- struct btrfs_path *path;
struct file *src_file;
- u64 root_dirid;
int namelen;
int ret = 0;
goto out;
}
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
- root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
- di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
- path, root_dirid,
- vol_args->name, namelen, 0);
- btrfs_free_path(path);
-
- if (di && !IS_ERR(di)) {
- ret = -EEXIST;
- goto out;
- }
-
- if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto out;
- }
-
if (subvol) {
- ret = btrfs_mksubvol(&file->f_path, vol_args->name,
- file->f_path.dentry->d_inode->i_mode,
- namelen, NULL);
+ ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
+ NULL);
} else {
struct inode *src_inode;
src_file = fget(vol_args->fd);
fput(src_file);
goto out;
}
- ret = btrfs_mksubvol(&file->f_path, vol_args->name,
- file->f_path.dentry->d_inode->i_mode,
- namelen, BTRFS_I(src_inode)->root);
+ ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
+ BTRFS_I(src_inode)->root);
fput(src_file);
}
-
out:
kfree(vol_args);
return ret;
}
+/*
+ * helper to check if the subvolume references other subvolumes
+ */
+static noinline int may_destroy_subvol(struct btrfs_root *root)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = root->root_key.objectid;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
+ &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret == 0);
+
+ ret = 0;
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid == root->root_key.objectid &&
+ key.type == BTRFS_ROOT_REF_KEY)
+ ret = -ENOTEMPTY;
+ }
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static noinline int btrfs_ioctl_snap_destroy(struct file *file,
+ void __user *arg)
+{
+ struct dentry *parent = fdentry(file);
+ struct dentry *dentry;
+ struct inode *dir = parent->d_inode;
+ struct inode *inode;
+ struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct btrfs_root *dest = NULL;
+ struct btrfs_ioctl_vol_args *vol_args;
+ struct btrfs_trans_handle *trans;
+ int namelen;
+ int ret;
+ int err = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+
+ vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+ namelen = strlen(vol_args->name);
+ if (strchr(vol_args->name, '/') ||
+ strncmp(vol_args->name, "..", namelen) == 0) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = mnt_want_write(file->f_path.mnt);
+ if (err)
+ goto out;
+
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ dentry = lookup_one_len(vol_args->name, parent, namelen);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto out_unlock_dir;
+ }
+
+ if (!dentry->d_inode) {
+ err = -ENOENT;
+ goto out_dput;
+ }
+
+ inode = dentry->d_inode;
+ if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ err = -EINVAL;
+ goto out_dput;
+ }
+
+ dest = BTRFS_I(inode)->root;
+
+ mutex_lock(&inode->i_mutex);
+ err = d_invalidate(dentry);
+ if (err)
+ goto out_unlock;
+
+ down_write(&root->fs_info->subvol_sem);
+
+ err = may_destroy_subvol(dest);
+ if (err)
+ goto out_up_write;
+
+ trans = btrfs_start_transaction(root, 1);
+ ret = btrfs_unlink_subvol(trans, root, dir,
+ dest->root_key.objectid,
+ dentry->d_name.name,
+ dentry->d_name.len);
+ BUG_ON(ret);
+
+ btrfs_record_root_in_trans(trans, dest);
+
+ memset(&dest->root_item.drop_progress, 0,
+ sizeof(dest->root_item.drop_progress));
+ dest->root_item.drop_level = 0;
+ btrfs_set_root_refs(&dest->root_item, 0);
+
+ ret = btrfs_insert_orphan_item(trans,
+ root->fs_info->tree_root,
+ dest->root_key.objectid);
+ BUG_ON(ret);
+
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+ inode->i_flags |= S_DEAD;
+out_up_write:
+ up_write(&root->fs_info->subvol_sem);
+out_unlock:
+ mutex_unlock(&inode->i_mutex);
+ if (!err) {
+ btrfs_invalidate_inodes(dest);
+ d_delete(dentry);
+ }
+out_dput:
+ dput(dentry);
+out_unlock_dir:
+ mutex_unlock(&dir->i_mutex);
+ mnt_drop_write(file->f_path.mnt);
+out:
+ kfree(vol_args);
+ return err;
+}
+
static int btrfs_ioctl_defrag(struct file *file)
{
struct inode *inode = fdentry(file)->d_inode;
return ret;
}
-static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
- u64 off, u64 olen, u64 destoff)
+static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
+ u64 off, u64 olen, u64 destoff)
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
/* punch hole in destination first */
btrfs_drop_extents(trans, root, inode, off, off + len,
- off + len, 0, &hint_byte);
+ off + len, 0, &hint_byte, 1);
/* clone data */
key.objectid = src->i_ino;
datao += off - key.offset;
datal -= off - key.offset;
}
- if (key.offset + datao + datal + key.offset >
- off + len)
+ if (key.offset + datao + datal > off + len)
datal = off + len - key.offset - datao;
/* disko == 0 means it's a hole */
if (!disko)
return btrfs_ioctl_snap_create(file, argp, 0);
case BTRFS_IOC_SUBVOL_CREATE:
return btrfs_ioctl_snap_create(file, argp, 1);
+ case BTRFS_IOC_SNAP_DESTROY:
+ return btrfs_ioctl_snap_destroy(file, argp);
case BTRFS_IOC_DEFRAG:
return btrfs_ioctl_defrag(file);
case BTRFS_IOC_RESIZE:
#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
struct btrfs_ioctl_vol_args)
-
+#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
+ struct btrfs_ioctl_vol_args)
#endif
*
* len is the length of the extent
*
- * This also sets the EXTENT_ORDERED bit on the range in the inode.
- *
* The tree is given a single reference on the ordered extent that was
* inserted.
*/
entry->start = start;
entry->len = len;
entry->disk_len = disk_len;
+ entry->bytes_left = len;
entry->inode = inode;
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
&entry->rb_node);
BUG_ON(node);
- set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
- entry_end(entry) - 1, GFP_NOFS);
-
spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
list_add_tail(&entry->root_extent_list,
&BTRFS_I(inode)->root->fs_info->ordered_extents);
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
int ret;
tree = &BTRFS_I(inode)->ordered_tree;
mutex_lock(&tree->mutex);
- clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
- GFP_NOFS);
node = tree_search(tree, file_offset);
if (!node) {
ret = 1;
goto out;
}
- ret = test_range_bit(io_tree, entry->file_offset,
- entry->file_offset + entry->len - 1,
- EXTENT_ORDERED, 0);
- if (ret == 0)
+ if (io_size > entry->bytes_left) {
+ printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
+ (unsigned long long)entry->bytes_left,
+ (unsigned long long)io_size);
+ }
+ entry->bytes_left -= io_size;
+ if (entry->bytes_left == 0)
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+ else
+ ret = 1;
out:
mutex_unlock(&tree->mutex);
return ret == 0;
u64 orig_end;
u64 wait_end;
struct btrfs_ordered_extent *ordered;
+ int found;
if (start + len < start) {
orig_end = INT_LIMIT(loff_t);
orig_end >> PAGE_CACHE_SHIFT);
end = orig_end;
+ found = 0;
while (1) {
ordered = btrfs_lookup_first_ordered_extent(inode, end);
if (!ordered)
btrfs_put_ordered_extent(ordered);
break;
}
+ found++;
btrfs_start_ordered_extent(inode, ordered, 1);
end = ordered->file_offset;
btrfs_put_ordered_extent(ordered);
break;
end--;
}
- if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
- EXTENT_ORDERED | EXTENT_DELALLOC, 0)) {
+ if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
+ EXTENT_DELALLOC, 0, NULL)) {
schedule_timeout(1);
goto again;
}
*/
if (test_range_bit(io_tree, disk_i_size,
ordered->file_offset + ordered->len - 1,
- EXTENT_DELALLOC, 0)) {
+ EXTENT_DELALLOC, 0, NULL)) {
goto out;
}
/*
*/
if (i_size_test > entry_end(ordered) &&
!test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
- EXTENT_DELALLOC, 0)) {
+ EXTENT_DELALLOC, 0, NULL)) {
new_i_size = min_t(u64, i_size_test, i_size_read(inode));
}
BTRFS_I(inode)->disk_i_size = new_i_size;
/* extent length on disk */
u64 disk_len;
+ /* number of bytes that still need writing */
+ u64 bytes_left;
+
/* flags (described above) */
unsigned long flags;
btrfs_free_path(path);
return ret;
}
+
+int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = BTRFS_ORPHAN_OBJECTID;
+ key.type = BTRFS_ORPHAN_ITEM_KEY;
+ key.offset = offset;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+
+ btrfs_free_path(path);
+ return ret;
+}
int nr;
};
+#define MAX_EXTENTS 128
+
+struct file_extent_cluster {
+ u64 start;
+ u64 end;
+ u64 boundary[MAX_EXTENTS];
+ unsigned int nr;
+};
+
struct reloc_control {
/* block group to relocate */
struct btrfs_block_group_cache *block_group;
struct reloc_control *rc)
{
if (test_range_bit(&rc->processed_blocks, bytenr,
- bytenr + blocksize - 1, EXTENT_DIRTY, 1))
+ bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
return 1;
return 0;
}
}
static noinline_for_stack
-int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
+int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
+ u64 block_start)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map *em;
+ int ret = 0;
+
+ em = alloc_extent_map(GFP_NOFS);
+ if (!em)
+ return -ENOMEM;
+
+ em->start = start;
+ em->len = end + 1 - start;
+ em->block_len = em->len;
+ em->block_start = block_start;
+ em->bdev = root->fs_info->fs_devices->latest_bdev;
+ set_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+ lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+ while (1) {
+ write_lock(&em_tree->lock);
+ ret = add_extent_mapping(em_tree, em);
+ write_unlock(&em_tree->lock);
+ if (ret != -EEXIST) {
+ free_extent_map(em);
+ break;
+ }
+ btrfs_drop_extent_cache(inode, start, end, 0);
+ }
+ unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+ return ret;
+}
+
+static int relocate_file_extent_cluster(struct inode *inode,
+ struct file_extent_cluster *cluster)
{
u64 page_start;
u64 page_end;
- unsigned long i;
- unsigned long first_index;
+ u64 offset = BTRFS_I(inode)->index_cnt;
+ unsigned long index;
unsigned long last_index;
- unsigned int total_read = 0;
- unsigned int total_dirty = 0;
+ unsigned int dirty_page = 0;
struct page *page;
struct file_ra_state *ra;
- struct btrfs_ordered_extent *ordered;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ int nr = 0;
int ret = 0;
+ if (!cluster->nr)
+ return 0;
+
ra = kzalloc(sizeof(*ra), GFP_NOFS);
if (!ra)
return -ENOMEM;
+ index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
+ last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
+
mutex_lock(&inode->i_mutex);
- first_index = start >> PAGE_CACHE_SHIFT;
- last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
- /* make sure the dirty trick played by the caller work */
- while (1) {
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- first_index, last_index);
- if (ret != -EBUSY)
- break;
- schedule_timeout(HZ/10);
- }
+ i_size_write(inode, cluster->end + 1 - offset);
+ ret = setup_extent_mapping(inode, cluster->start - offset,
+ cluster->end - offset, cluster->start);
if (ret)
goto out_unlock;
file_ra_state_init(ra, inode->i_mapping);
- for (i = first_index ; i <= last_index; i++) {
- if (total_read % ra->ra_pages == 0) {
- btrfs_force_ra(inode->i_mapping, ra, NULL, i,
- min(last_index, ra->ra_pages + i - 1));
- }
- total_read++;
-again:
- if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
- BUG_ON(1);
- page = grab_cache_page(inode->i_mapping, i);
+ WARN_ON(cluster->start != cluster->boundary[0]);
+ while (index <= last_index) {
+ page = find_lock_page(inode->i_mapping, index);
if (!page) {
- ret = -ENOMEM;
- goto out_unlock;
+ page_cache_sync_readahead(inode->i_mapping,
+ ra, NULL, index,
+ last_index + 1 - index);
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+ }
+
+ if (PageReadahead(page)) {
+ page_cache_async_readahead(inode->i_mapping,
+ ra, NULL, page, index,
+ last_index + 1 - index);
}
+
if (!PageUptodate(page)) {
btrfs_readpage(NULL, page);
lock_page(page);
goto out_unlock;
}
}
- wait_on_page_writeback(page);
page_start = (u64)page->index << PAGE_CACHE_SHIFT;
page_end = page_start + PAGE_CACHE_SIZE - 1;
- lock_extent(io_tree, page_start, page_end, GFP_NOFS);
-
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
- if (ordered) {
- unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
- unlock_page(page);
- page_cache_release(page);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- goto again;
- }
+
+ lock_extent(&BTRFS_I(inode)->io_tree,
+ page_start, page_end, GFP_NOFS);
+
set_page_extent_mapped(page);
- if (i == first_index)
- set_extent_bits(io_tree, page_start, page_end,
+ if (nr < cluster->nr &&
+ page_start + offset == cluster->boundary[nr]) {
+ set_extent_bits(&BTRFS_I(inode)->io_tree,
+ page_start, page_end,
EXTENT_BOUNDARY, GFP_NOFS);
+ nr++;
+ }
btrfs_set_extent_delalloc(inode, page_start, page_end);
set_page_dirty(page);
- total_dirty++;
+ dirty_page++;
- unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
+ unlock_extent(&BTRFS_I(inode)->io_tree,
+ page_start, page_end, GFP_NOFS);
unlock_page(page);
page_cache_release(page);
+
+ index++;
+ if (nr < cluster->nr &&
+ page_end + 1 + offset == cluster->boundary[nr]) {
+ balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+ dirty_page);
+ dirty_page = 0;
+ }
+ }
+ if (dirty_page) {
+ balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+ dirty_page);
}
+ WARN_ON(nr != cluster->nr);
out_unlock:
mutex_unlock(&inode->i_mutex);
kfree(ra);
- balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
return ret;
}
static noinline_for_stack
-int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key)
+int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
+ struct file_extent_cluster *cluster)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- struct extent_map *em;
- u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt;
- u64 end = start + extent_key->offset - 1;
-
- em = alloc_extent_map(GFP_NOFS);
- em->start = start;
- em->len = extent_key->offset;
- em->block_len = extent_key->offset;
- em->block_start = extent_key->objectid;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
+ int ret;
- /* setup extent map to cheat btrfs_readpage */
- lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
- while (1) {
- int ret;
- spin_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, start, end, 0);
+ if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
+ ret = relocate_file_extent_cluster(inode, cluster);
+ if (ret)
+ return ret;
+ cluster->nr = 0;
}
- unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
- return relocate_inode_pages(inode, start, extent_key->offset);
+ if (!cluster->nr)
+ cluster->start = extent_key->objectid;
+ else
+ BUG_ON(cluster->nr >= MAX_EXTENTS);
+ cluster->end = extent_key->objectid + extent_key->offset - 1;
+ cluster->boundary[cluster->nr] = extent_key->objectid;
+ cluster->nr++;
+
+ if (cluster->nr >= MAX_EXTENTS) {
+ ret = relocate_file_extent_cluster(inode, cluster);
+ if (ret)
+ return ret;
+ cluster->nr = 0;
+ }
+ return 0;
}
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
return 0;
}
+
static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
{
struct rb_root blocks = RB_ROOT;
struct btrfs_key key;
+ struct file_extent_cluster *cluster;
struct btrfs_trans_handle *trans = NULL;
struct btrfs_path *path;
struct btrfs_extent_item *ei;
int ret;
int err = 0;
+ cluster = kzalloc(sizeof(*cluster), GFP_NOFS);
+ if (!cluster)
+ return -ENOMEM;
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
+ rc->extents_found = 0;
+ rc->extents_skipped = 0;
+
rc->search_start = rc->block_group->key.objectid;
clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
GFP_NOFS);
}
nr = trans->blocks_used;
- btrfs_end_transaction_throttle(trans, rc->extent_root);
+ btrfs_end_transaction(trans, rc->extent_root);
trans = NULL;
btrfs_btree_balance_dirty(rc->extent_root, nr);
if (rc->stage == MOVE_DATA_EXTENTS &&
(flags & BTRFS_EXTENT_FLAG_DATA)) {
rc->found_file_extent = 1;
- ret = relocate_data_extent(rc->data_inode, &key);
+ ret = relocate_data_extent(rc->data_inode,
+ &key, cluster);
if (ret < 0) {
err = ret;
break;
btrfs_btree_balance_dirty(rc->extent_root, nr);
}
+ if (!err) {
+ ret = relocate_file_extent_cluster(rc->data_inode, cluster);
+ if (ret < 0)
+ err = ret;
+ }
+
+ kfree(cluster);
+
rc->create_reloc_root = 0;
smp_mb();
}
static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 objectid, u64 size)
+ struct btrfs_root *root, u64 objectid)
{
struct btrfs_path *path;
struct btrfs_inode_item *item;
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
btrfs_set_inode_generation(leaf, item, 1);
- btrfs_set_inode_size(leaf, item, size);
+ btrfs_set_inode_size(leaf, item, 0);
btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
btrfs_mark_buffer_dirty(leaf);
if (err)
goto out;
- err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
- BUG_ON(err);
-
- err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
- group->key.offset, 0, group->key.offset,
- 0, 0, 0);
+ err = __insert_orphan_inode(trans, root, objectid);
BUG_ON(err);
key.objectid = objectid;
btrfs_wait_ordered_extents(fs_info->tree_root, 0);
while (1) {
- mutex_lock(&fs_info->cleaner_mutex);
- btrfs_clean_old_snapshots(fs_info->tree_root);
- mutex_unlock(&fs_info->cleaner_mutex);
-
rc->extents_found = 0;
rc->extents_skipped = 0;
+ mutex_lock(&fs_info->cleaner_mutex);
+
+ btrfs_clean_old_snapshots(fs_info->tree_root);
ret = relocate_block_group(rc);
+
+ mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) {
err = ret;
break;
}
}
- filemap_fdatawrite_range(fs_info->btree_inode->i_mapping,
- rc->block_group->key.objectid,
- rc->block_group->key.objectid +
- rc->block_group->key.offset - 1);
+ filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
+ rc->block_group->key.objectid,
+ rc->block_group->key.objectid +
+ rc->block_group->key.offset - 1);
WARN_ON(rc->block_group->pinned > 0);
WARN_ON(rc->block_group->reserved > 0);
return err;
}
+static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
+{
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ trans = btrfs_start_transaction(root->fs_info->tree_root, 1);
+
+ memset(&root->root_item.drop_progress, 0,
+ sizeof(root->root_item.drop_progress));
+ root->root_item.drop_level = 0;
+ btrfs_set_root_refs(&root->root_item, 0);
+ ret = btrfs_update_root(trans, root->fs_info->tree_root,
+ &root->root_key, &root->root_item);
+ BUG_ON(ret);
+
+ ret = btrfs_end_transaction(trans, root->fs_info->tree_root);
+ BUG_ON(ret);
+ return 0;
+}
+
/*
* recover relocation interrupted by system crash.
*
fs_root = read_fs_root(root->fs_info,
reloc_root->root_key.offset);
if (IS_ERR(fs_root)) {
- err = PTR_ERR(fs_root);
- goto out;
+ ret = PTR_ERR(fs_root);
+ if (ret != -ENOENT) {
+ err = ret;
+ goto out;
+ }
+ mark_garbage_root(reloc_root);
}
}
goto out;
BUG_ON(ret == 0);
+ if (path->slots[0] == 0) {
+ ret = 1;
+ goto out;
+ }
l = path->nodes[0];
- BUG_ON(path->slots[0] == 0);
slot = path->slots[0] - 1;
btrfs_item_key_to_cpu(l, &found_key, slot);
- if (found_key.objectid != objectid) {
+ if (found_key.objectid != objectid ||
+ found_key.type != BTRFS_ROOT_ITEM_KEY) {
ret = 1;
goto out;
}
- read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
- sizeof(*item));
- memcpy(key, &found_key, sizeof(found_key));
+ if (item)
+ read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
+ sizeof(*item));
+ if (key)
+ memcpy(key, &found_key, sizeof(found_key));
ret = 0;
out:
btrfs_free_path(path);
return ret;
}
+int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int err = 0;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = BTRFS_ORPHAN_OBJECTID;
+ key.type = BTRFS_ORPHAN_ITEM_KEY;
+ key.offset = 0;
+
+ while (1) {
+ ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+ if (ret < 0) {
+ err = ret;
+ break;
+ }
+
+ leaf = path->nodes[0];
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(tree_root, path);
+ if (ret < 0)
+ err = ret;
+ if (ret != 0)
+ break;
+ leaf = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ btrfs_release_path(tree_root, path);
+
+ if (key.objectid != BTRFS_ORPHAN_OBJECTID ||
+ key.type != BTRFS_ORPHAN_ITEM_KEY)
+ break;
+
+ ret = btrfs_find_dead_roots(tree_root, key.offset);
+ if (ret) {
+ err = ret;
+ break;
+ }
+
+ key.offset++;
+ }
+
+ btrfs_free_path(path);
+ return err;
+}
+
/* drop the root item for 'key' from 'root' */
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_key *key)
return ret;
}
-#if 0 /* this will get used when snapshot deletion is implemented */
int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root,
- u64 root_id, u8 type, u64 ref_id)
+ u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
+ const char *name, int name_len)
+
{
+ struct btrfs_path *path;
+ struct btrfs_root_ref *ref;
+ struct extent_buffer *leaf;
struct btrfs_key key;
+ unsigned long ptr;
+ int err = 0;
int ret;
- struct btrfs_path *path;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
key.objectid = root_id;
- key.type = type;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = ref_id;
-
+again:
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
- BUG_ON(ret);
-
- ret = btrfs_del_item(trans, tree_root, path);
- BUG_ON(ret);
+ BUG_ON(ret < 0);
+ if (ret == 0) {
+ leaf = path->nodes[0];
+ ref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_root_ref);
+
+ WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
+ WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
+ ptr = (unsigned long)(ref + 1);
+ WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
+ *sequence = btrfs_root_ref_sequence(leaf, ref);
+
+ ret = btrfs_del_item(trans, tree_root, path);
+ BUG_ON(ret);
+ } else
+ err = -ENOENT;
+
+ if (key.type == BTRFS_ROOT_BACKREF_KEY) {
+ btrfs_release_path(tree_root, path);
+ key.objectid = ref_id;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = root_id;
+ goto again;
+ }
btrfs_free_path(path);
- return ret;
+ return err;
}
-#endif
int btrfs_find_root_ref(struct btrfs_root *tree_root,
struct btrfs_path *path,
return ret;
}
-
/*
* add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY
* or BTRFS_ROOT_BACKREF_KEY.
*/
int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root,
- u64 root_id, u8 type, u64 ref_id,
- u64 dirid, u64 sequence,
+ u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
const char *name, int name_len)
{
struct btrfs_key key;
struct extent_buffer *leaf;
unsigned long ptr;
-
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
key.objectid = root_id;
- key.type = type;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = ref_id;
-
+again:
ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
sizeof(*ref) + name_len);
BUG_ON(ret);
write_extent_buffer(leaf, name, ptr, name_len);
btrfs_mark_buffer_dirty(leaf);
+ if (key.type == BTRFS_ROOT_BACKREF_KEY) {
+ btrfs_release_path(tree_root, path);
+ key.objectid = ref_id;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = root_id;
+ goto again;
+ }
+
btrfs_free_path(path);
- return ret;
+ return 0;
}
}
static const struct super_operations btrfs_super_ops = {
+ .drop_inode = btrfs_drop_inode,
.delete_inode = btrfs_delete_inode,
.put_super = btrfs_put_super,
.sync_fs = btrfs_sync_fs,
{
if (root->ref_cows && root->last_trans < trans->transid) {
WARN_ON(root == root->fs_info->extent_root);
- WARN_ON(root->root_item.refs == 0);
WARN_ON(root->commit_root != root->node);
radix_tree_tag_set(&root->fs_info->fs_roots_radix,
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
key.objectid = objectid;
- key.offset = 0;
+ /* record when the snapshot was created in key.offset */
+ key.offset = trans->transid;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
old = btrfs_lock_root_node(root);
ret = btrfs_update_inode(trans, parent_root, parent_inode);
BUG_ON(ret);
- /* add the backref first */
ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
pending->root_key.objectid,
- BTRFS_ROOT_BACKREF_KEY,
parent_root->root_key.objectid,
parent_inode->i_ino, index, pending->name,
namelen);
BUG_ON(ret);
- /* now add the forward ref */
- ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
- parent_root->root_key.objectid,
- BTRFS_ROOT_REF_KEY,
- pending->root_key.objectid,
- parent_inode->i_ino, index, pending->name,
- namelen);
-
inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
d_instantiate(pending->dentry, inode);
fail:
unsigned long timeout = 1;
struct btrfs_transaction *cur_trans;
struct btrfs_transaction *prev_trans = NULL;
- struct extent_io_tree *pinned_copy;
DEFINE_WAIT(wait);
int ret;
int should_grow = 0;
return 0;
}
- pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS);
- if (!pinned_copy)
- return -ENOMEM;
-
- extent_io_tree_init(pinned_copy,
- root->fs_info->btree_inode->i_mapping, GFP_NOFS);
-
trans->transaction->in_commit = 1;
trans->transaction->blocked = 1;
if (cur_trans->list.prev != &root->fs_info->trans_list) {
ret = commit_cowonly_roots(trans, root);
BUG_ON(ret);
+ btrfs_prepare_extent_commit(trans, root);
+
cur_trans = root->fs_info->running_transaction;
spin_lock(&root->fs_info->new_trans_lock);
root->fs_info->running_transaction = NULL;
memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
sizeof(root->fs_info->super_copy));
- btrfs_copy_pinned(root, pinned_copy);
-
trans->transaction->blocked = 0;
wake_up(&root->fs_info->transaction_wait);
*/
mutex_unlock(&root->fs_info->tree_log_mutex);
- btrfs_finish_extent_commit(trans, root, pinned_copy);
- kfree(pinned_copy);
+ btrfs_finish_extent_commit(trans, root);
/* do the directory inserts of any pending snapshot creations */
finish_pending_snapshots(trans, root->fs_info);
while (!list_empty(&list)) {
root = list_entry(list.next, struct btrfs_root, root_list);
- list_del_init(&root->root_list);
- btrfs_drop_snapshot(root, 0);
+ list_del(&root->root_list);
+
+ if (btrfs_header_backref_rev(root->node) <
+ BTRFS_MIXED_BACKREF_REV)
+ btrfs_drop_snapshot(root, 0);
+ else
+ btrfs_drop_snapshot(root, 1);
}
return 0;
}
struct walk_control *wc, u64 gen)
{
if (wc->pin)
- btrfs_update_pinned_extents(log->fs_info->extent_root,
- eb->start, eb->len, 1);
+ btrfs_pin_extent(log->fs_info->extent_root,
+ eb->start, eb->len, 0);
if (btrfs_buffer_uptodate(eb, gen)) {
if (wc->write)
saved_nbytes = inode_get_bytes(inode);
/* drop any overlapping extents */
ret = btrfs_drop_extents(trans, root, inode,
- start, extent_end, extent_end, start, &alloc_hint);
+ start, extent_end, extent_end, start, &alloc_hint, 1);
BUG_ON(ret);
if (found_type == BTRFS_FILE_EXTENT_REG ||
if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
break;
- if (parent == sb->s_root)
+ if (IS_ROOT(parent))
break;
parent = parent->d_parent;
goto end_no_trans;
}
+ if (root != BTRFS_I(inode)->root ||
+ btrfs_root_refs(&root->root_item) == 0) {
+ ret = 1;
+ goto end_no_trans;
+ }
+
ret = check_parent_dirs_for_sync(trans, inode, parent,
sb, last_committed);
if (ret)
break;
inode = parent->d_inode;
+ if (root != BTRFS_I(inode)->root)
+ break;
+
if (BTRFS_I(inode)->generation >
root->fs_info->last_trans_committed) {
ret = btrfs_log_inode(trans, root, inode, inode_only);
BUG_ON(ret);
}
- if (parent == sb->s_root)
+ if (IS_ROOT(parent))
break;
parent = parent->d_parent;
struct btrfs_key tmp_key;
struct btrfs_root *log;
struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
- u64 highest_inode;
struct walk_control wc = {
.process_func = process_one_buffer,
.stage = 0,
path);
BUG_ON(ret);
}
- ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode);
- if (ret == 0) {
- wc.replay_dest->highest_inode = highest_inode;
- wc.replay_dest->last_inode_alloc = highest_inode;
- }
key.offset = found_key.offset - 1;
wc.replay_dest->log_root = NULL;
* is now congested. Back off and let other work structs
* run instead
*/
- if (pending && bdi_write_congested(bdi) && batch_run > 32 &&
+ if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
fs_info->fs_devices->open_devices > 1) {
struct io_context *ioc;
* called very infrequently and that a given device has a small number
* of extents
*/
-static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device,
- u64 num_bytes, u64 *start,
- u64 *max_avail)
+int find_free_dev_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device, u64 num_bytes,
+ u64 *start, u64 *max_avail)
{
struct btrfs_key key;
struct btrfs_root *root = device->dev_root;
extent_root = root->fs_info->extent_root;
em_tree = &root->fs_info->mapping_tree.map_tree;
+ ret = btrfs_can_relocate(extent_root, chunk_offset);
+ if (ret)
+ return -ENOSPC;
+
/* step one, relocate all the extents inside this chunk */
ret = btrfs_relocate_block_group(extent_root, chunk_offset);
BUG_ON(ret);
* step two, delete the device extents and the
* chunk tree entries
*/
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, chunk_offset, 1);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
BUG_ON(em->start > chunk_offset ||
em->start + em->len < chunk_offset);
ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
BUG_ON(ret);
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
remove_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
kfree(map);
em->bdev = NULL;
struct btrfs_key found_key;
u64 chunk_tree = chunk_root->root_key.objectid;
u64 chunk_type;
+ bool retried = false;
+ int failed = 0;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
+again:
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
key.offset = (u64)-1;
key.type = BTRFS_CHUNK_ITEM_KEY;
ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
found_key.objectid,
found_key.offset);
- BUG_ON(ret);
+ if (ret == -ENOSPC)
+ failed++;
+ else if (ret)
+ BUG();
}
if (found_key.offset == 0)
key.offset = found_key.offset - 1;
}
ret = 0;
+ if (failed && !retried) {
+ failed = 0;
+ retried = true;
+ goto again;
+ } else if (failed && retried) {
+ WARN_ON(1);
+ ret = -ENOSPC;
+ }
error:
btrfs_free_path(path);
return ret;
continue;
ret = btrfs_shrink_device(device, old_size - size_to_free);
+ if (ret == -ENOSPC)
+ break;
BUG_ON(ret);
trans = btrfs_start_transaction(dev_root, 1);
chunk = btrfs_item_ptr(path->nodes[0],
path->slots[0],
struct btrfs_chunk);
- key.offset = found_key.offset;
/* chunk zero is special */
- if (key.offset == 0)
+ if (found_key.offset == 0)
break;
btrfs_release_path(chunk_root, path);
chunk_root->root_key.objectid,
found_key.objectid,
found_key.offset);
- BUG_ON(ret);
+ BUG_ON(ret && ret != -ENOSPC);
+ key.offset = found_key.offset - 1;
}
ret = 0;
error:
u64 chunk_offset;
int ret;
int slot;
+ int failed = 0;
+ bool retried = false;
struct extent_buffer *l;
struct btrfs_key key;
struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
u64 old_total = btrfs_super_total_bytes(super_copy);
+ u64 old_size = device->total_bytes;
u64 diff = device->total_bytes - new_size;
if (new_size >= device->total_bytes)
if (!path)
return -ENOMEM;
- trans = btrfs_start_transaction(root, 1);
- if (!trans) {
- ret = -ENOMEM;
- goto done;
- }
-
path->reada = 2;
lock_chunks(root);
if (device->writeable)
device->fs_devices->total_rw_bytes -= diff;
unlock_chunks(root);
- btrfs_end_transaction(trans, root);
+again:
key.objectid = device->devid;
key.offset = (u64)-1;
key.type = BTRFS_DEV_EXTENT_KEY;
goto done;
if (ret) {
ret = 0;
+ btrfs_release_path(root, path);
break;
}
slot = path->slots[0];
btrfs_item_key_to_cpu(l, &key, path->slots[0]);
- if (key.objectid != device->devid)
+ if (key.objectid != device->devid) {
+ btrfs_release_path(root, path);
break;
+ }
dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
length = btrfs_dev_extent_length(l, dev_extent);
- if (key.offset + length <= new_size)
+ if (key.offset + length <= new_size) {
+ btrfs_release_path(root, path);
break;
+ }
chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
chunk_offset);
- if (ret)
+ if (ret && ret != -ENOSPC)
goto done;
+ if (ret == -ENOSPC)
+ failed++;
+ key.offset -= 1;
+ }
+
+ if (failed && !retried) {
+ failed = 0;
+ retried = true;
+ goto again;
+ } else if (failed && retried) {
+ ret = -ENOSPC;
+ lock_chunks(root);
+
+ device->total_bytes = old_size;
+ if (device->writeable)
+ device->fs_devices->total_rw_bytes += diff;
+ unlock_chunks(root);
+ goto done;
}
/* Shrinking succeeded, else we would be at "done". */
em->block_len = em->len;
em_tree = &extent_root->fs_info->mapping_tree.map_tree;
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
BUG_ON(ret);
free_extent_map(em);
int readonly = 0;
int i;
- spin_lock(&map_tree->map_tree.lock);
+ read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
- spin_unlock(&map_tree->map_tree.lock);
+ read_unlock(&map_tree->map_tree.lock);
if (!em)
return 1;
struct extent_map *em;
while (1) {
- spin_lock(&tree->map_tree.lock);
+ write_lock(&tree->map_tree.lock);
em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
if (em)
remove_extent_mapping(&tree->map_tree, em);
- spin_unlock(&tree->map_tree.lock);
+ write_unlock(&tree->map_tree.lock);
if (!em)
break;
kfree(em->bdev);
struct extent_map_tree *em_tree = &map_tree->map_tree;
int ret;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, len);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
atomic_set(&multi->error, 0);
}
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, *length);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (!em && unplug_page)
return 0;
u64 stripe_nr;
int i, j, nr = 0;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, chunk_start, 1);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
BUG_ON(!em || em->start != chunk_start);
map = (struct map_lookup *)em->bdev;
logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
- spin_lock(&map_tree->map_tree.lock);
+ read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
- spin_unlock(&map_tree->map_tree.lock);
+ read_unlock(&map_tree->map_tree.lock);
/* already mapped? */
if (em && em->start <= logical && em->start + em->len > logical) {
map->stripes[i].dev->in_fs_metadata = 1;
}
- spin_lock(&map_tree->map_tree.lock);
+ write_lock(&map_tree->map_tree.lock);
ret = add_extent_mapping(&map_tree->map_tree, em);
- spin_unlock(&map_tree->map_tree.lock);
+ write_unlock(&map_tree->map_tree.lock);
BUG_ON(ret);
free_extent_map(em);
void btrfs_unlock_volumes(void);
void btrfs_lock_volumes(void);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
+int find_free_dev_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device, u64 num_bytes,
+ u64 *start, u64 *max_avail);
#endif
struct address_space *mapping = inode->i_mapping;
struct page *page;
void *fsdata;
- unsigned long limit;
int err;
- err = -EFBIG;
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && size > (loff_t)limit) {
- send_sig(SIGXFSZ, current, 0);
- goto out;
- }
- if (size > inode->i_sb->s_maxbytes)
+ err = inode_newsize_ok(inode, size);
+ if (err)
goto out;
err = pagecache_write_begin(NULL, mapping, size, 0,
{
struct char_device_struct *cd;
struct cdev *cdev;
- char *s;
int err = -ENOMEM;
cd = __register_chrdev_region(major, baseminor, count, name);
cdev->owner = fops->owner;
cdev->ops = fops;
kobject_set_name(&cdev->kobj, "%s", name);
- for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/'))
- *s = '!';
err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
if (err)
cifs_sb->mountdata = NULL;
}
#endif
- if (cifs_sb->local_nls)
- unload_nls(cifs_sb->local_nls);
+ unload_nls(cifs_sb->local_nls);
kfree(cifs_sb);
}
return rc;
static int cifs_vmtruncate(struct inode *inode, loff_t offset)
{
- struct address_space *mapping = inode->i_mapping;
- unsigned long limit;
+ loff_t oldsize;
+ int err;
spin_lock(&inode->i_lock);
- if (inode->i_size < offset)
- goto do_expand;
- /*
- * truncation of in-use swapfiles is disallowed - it would cause
- * subsequent swapout to scribble on the now-freed blocks.
- */
- if (IS_SWAPFILE(inode)) {
- spin_unlock(&inode->i_lock);
- goto out_busy;
- }
- i_size_write(inode, offset);
- spin_unlock(&inode->i_lock);
- /*
- * unmap_mapping_range is called twice, first simply for efficiency
- * so that truncate_inode_pages does fewer single-page unmaps. However
- * after this first call, and before truncate_inode_pages finishes,
- * it is possible for private pages to be COWed, which remain after
- * truncate_inode_pages finishes, hence the second unmap_mapping_range
- * call must be made for correctness.
- */
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(mapping, offset);
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- goto out_truncate;
-
-do_expand:
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && offset > limit) {
+ err = inode_newsize_ok(inode, offset);
+ if (err) {
spin_unlock(&inode->i_lock);
- goto out_sig;
- }
- if (offset > inode->i_sb->s_maxbytes) {
- spin_unlock(&inode->i_lock);
- goto out_big;
+ goto out;
}
+
+ oldsize = inode->i_size;
i_size_write(inode, offset);
spin_unlock(&inode->i_lock);
-out_truncate:
+ truncate_pagecache(inode, oldsize, offset);
if (inode->i_op->truncate)
inode->i_op->truncate(inode);
- return 0;
-out_sig:
- send_sig(SIGXFSZ, current, 0);
-out_big:
- return -EFBIG;
-out_busy:
- return -ETXTBSY;
+out:
+ return err;
}
static int
#define _CODA_INT_
struct dentry;
+struct file;
extern struct file_system_type coda_fs_type;
extern unsigned long coda_timeout;
char __user * type, unsigned long flags,
void __user * data)
{
- unsigned long type_page;
+ char *kernel_type;
unsigned long data_page;
- unsigned long dev_page;
+ char *kernel_dev;
char *dir_page;
int retval;
- retval = copy_mount_options (type, &type_page);
+ retval = copy_mount_string(type, &kernel_type);
if (retval < 0)
goto out;
if (IS_ERR(dir_page))
goto out1;
- retval = copy_mount_options (dev_name, &dev_page);
+ retval = copy_mount_string(dev_name, &kernel_dev);
if (retval < 0)
goto out2;
- retval = copy_mount_options (data, &data_page);
+ retval = copy_mount_options(data, &data_page);
if (retval < 0)
goto out3;
retval = -EINVAL;
- if (type_page && data_page) {
- if (!strcmp((char *)type_page, SMBFS_NAME)) {
+ if (kernel_type && data_page) {
+ if (!strcmp(kernel_type, SMBFS_NAME)) {
do_smb_super_data_conv((void *)data_page);
- } else if (!strcmp((char *)type_page, NCPFS_NAME)) {
+ } else if (!strcmp(kernel_type, NCPFS_NAME)) {
do_ncp_super_data_conv((void *)data_page);
- } else if (!strcmp((char *)type_page, NFS4_NAME)) {
+ } else if (!strcmp(kernel_type, NFS4_NAME)) {
if (do_nfs4_super_data_conv((void *) data_page))
goto out4;
}
}
- retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
+ retval = do_mount(kernel_dev, dir_page, kernel_type,
flags, (void*)data_page);
out4:
free_page(data_page);
out3:
- free_page(dev_page);
+ kfree(kernel_dev);
out2:
putname(dir_page);
out1:
- free_page(type_page);
+ kfree(kernel_type);
out:
return retval;
}
}
int drop_caches_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
- proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ proc_dointvec_minmax(table, write, buffer, length, ppos);
if (write) {
if (sysctl_drop_caches & 1)
drop_pagecache();
#include <linux/kmod.h>
#include <linux/fsnotify.h>
#include <linux/fs_struct.h>
+#include <linux/pipe_fs_i.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
int core_uses_pid;
char core_pattern[CORENAME_MAX_SIZE] = "core";
+unsigned int core_pipe_limit;
int suid_dumpable = 0;
/* The maximal length of core_pattern is also specified in sysctl.c */
return retval;
}
-int set_binfmt(struct linux_binfmt *new)
+void set_binfmt(struct linux_binfmt *new)
{
- struct linux_binfmt *old = current->binfmt;
+ struct mm_struct *mm = current->mm;
- if (new) {
- if (!try_module_get(new->module))
- return -1;
- }
- current->binfmt = new;
- if (old)
- module_put(old->module);
- return 0;
+ if (mm->binfmt)
+ module_put(mm->binfmt->module);
+
+ mm->binfmt = new;
+ if (new)
+ __module_get(new->module);
}
EXPORT_SYMBOL(set_binfmt);
return (ret >= 2) ? 2 : ret;
}
+static void wait_for_dump_helpers(struct file *file)
+{
+ struct pipe_inode_info *pipe;
+
+ pipe = file->f_path.dentry->d_inode->i_pipe;
+
+ pipe_lock(pipe);
+ pipe->readers++;
+ pipe->writers--;
+
+ while ((pipe->readers > 1) && (!signal_pending(current))) {
+ wake_up_interruptible_sync(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ pipe_wait(pipe);
+ }
+
+ pipe->readers--;
+ pipe->writers++;
+ pipe_unlock(pipe);
+
+}
+
+
void do_coredump(long signr, int exit_code, struct pt_regs *regs)
{
struct core_state core_state;
unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
char **helper_argv = NULL;
int helper_argc = 0;
- char *delimit;
+ int dump_count = 0;
+ static atomic_t core_dump_count = ATOMIC_INIT(0);
audit_core_dumps(signr);
- binfmt = current->binfmt;
+ binfmt = mm->binfmt;
if (!binfmt || !binfmt->core_dump)
goto fail;
lock_kernel();
ispipe = format_corename(corename, signr);
unlock_kernel();
- /*
- * Don't bother to check the RLIMIT_CORE value if core_pattern points
- * to a pipe. Since we're not writing directly to the filesystem
- * RLIMIT_CORE doesn't really apply, as no actual core file will be
- * created unless the pipe reader choses to write out the core file
- * at which point file size limits and permissions will be imposed
- * as it does with any other process
- */
+
if ((!ispipe) && (core_limit < binfmt->min_coredump))
goto fail_unlock;
if (ispipe) {
+ if (core_limit == 0) {
+ /*
+ * Normally core limits are irrelevant to pipes, since
+ * we're not writing to the file system, but we use
+ * core_limit of 0 here as a speacial value. Any
+ * non-zero limit gets set to RLIM_INFINITY below, but
+ * a limit of 0 skips the dump. This is a consistent
+ * way to catch recursive crashes. We can still crash
+ * if the core_pattern binary sets RLIM_CORE = !0
+ * but it runs as root, and can do lots of stupid things
+ * Note that we use task_tgid_vnr here to grab the pid
+ * of the process group leader. That way we get the
+ * right pid if a thread in a multi-threaded
+ * core_pattern process dies.
+ */
+ printk(KERN_WARNING
+ "Process %d(%s) has RLIMIT_CORE set to 0\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_unlock;
+ }
+
+ dump_count = atomic_inc_return(&core_dump_count);
+ if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+ printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Skipping core dump\n");
+ goto fail_dropcount;
+ }
+
helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
if (!helper_argv) {
printk(KERN_WARNING "%s failed to allocate memory\n",
__func__);
- goto fail_unlock;
- }
- /* Terminate the string before the first option */
- delimit = strchr(corename, ' ');
- if (delimit)
- *delimit = '\0';
- delimit = strrchr(helper_argv[0], '/');
- if (delimit)
- delimit++;
- else
- delimit = helper_argv[0];
- if (!strcmp(delimit, current->comm)) {
- printk(KERN_NOTICE "Recursive core dump detected, "
- "aborting\n");
- goto fail_unlock;
+ goto fail_dropcount;
}
core_limit = RLIM_INFINITY;
/* SIGPIPE can happen, but it's just never processed */
- if (call_usermodehelper_pipe(corename+1, helper_argv, NULL,
+ if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
&file)) {
printk(KERN_INFO "Core dump to %s pipe failed\n",
corename);
- goto fail_unlock;
+ goto fail_dropcount;
}
} else
file = filp_open(corename,
O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
0600);
if (IS_ERR(file))
- goto fail_unlock;
+ goto fail_dropcount;
inode = file->f_path.dentry->d_inode;
if (inode->i_nlink > 1)
goto close_fail; /* multiple links - don't dump */
if (retval)
current->signal->group_exit_code |= 0x80;
close_fail:
+ if (ispipe && core_pipe_limit)
+ wait_for_dump_helpers(file);
filp_close(file, NULL);
+fail_dropcount:
+ if (dump_count)
+ atomic_dec(&core_dump_count);
fail_unlock:
if (helper_argv)
argv_free(helper_argv);
}
lock_super(sb);
- lock_kernel();
sbi = sb->s_fs_info;
fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
out:
if (or)
osd_end_request(or);
- unlock_kernel();
unlock_super(sb);
kfree(fscb);
return ret;
int num_pend;
struct exofs_sb_info *sbi = sb->s_fs_info;
- lock_kernel();
-
if (sb->s_dirt)
exofs_write_super(sb);
osduld_put_device(sbi->s_dev);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
-
- unlock_kernel();
}
/*
.writepages = ext2_writepages,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
const struct address_space_operations ext2_aops_xip = {
.direct_IO = ext2_direct_IO,
.writepages = ext2_writepages,
.migratepage = buffer_migrate_page,
+ .error_remove_page = generic_error_remove_page,
};
/*
.direct_IO = ext3_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations ext3_writeback_aops = {
.direct_IO = ext3_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations ext3_journalled_aops = {
.invalidatepage = ext3_invalidatepage,
.releasepage = ext3_releasepage,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
void ext3_set_aops(struct inode *inode)
.direct_IO = ext4_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations ext4_writeback_aops = {
.direct_IO = ext4_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations ext4_journalled_aops = {
.invalidatepage = ext4_invalidatepage,
.releasepage = ext4_releasepage,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations ext4_da_aops = {
.direct_IO = ext4_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
void ext4_set_aops(struct inode *inode)
iput(sbi->fat_inode);
- if (sbi->nls_disk) {
- unload_nls(sbi->nls_disk);
- sbi->nls_disk = NULL;
- sbi->options.codepage = fat_default_codepage;
- }
- if (sbi->nls_io) {
- unload_nls(sbi->nls_io);
- sbi->nls_io = NULL;
- }
- if (sbi->options.iocharset != fat_default_iocharset) {
+ unload_nls(sbi->nls_disk);
+ unload_nls(sbi->nls_io);
+
+ if (sbi->options.iocharset != fat_default_iocharset)
kfree(sbi->options.iocharset);
- sbi->options.iocharset = fat_default_iocharset;
- }
sb->s_fs_info = NULL;
kfree(sbi);
return pid;
}
+static int f_setown_ex(struct file *filp, unsigned long arg)
+{
+ struct f_owner_ex * __user owner_p = (void * __user)arg;
+ struct f_owner_ex owner;
+ struct pid *pid;
+ int type;
+ int ret;
+
+ ret = copy_from_user(&owner, owner_p, sizeof(owner));
+ if (ret)
+ return ret;
+
+ switch (owner.type) {
+ case F_OWNER_TID:
+ type = PIDTYPE_MAX;
+ break;
+
+ case F_OWNER_PID:
+ type = PIDTYPE_PID;
+ break;
+
+ case F_OWNER_GID:
+ type = PIDTYPE_PGID;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ rcu_read_lock();
+ pid = find_vpid(owner.pid);
+ if (owner.pid && !pid)
+ ret = -ESRCH;
+ else
+ ret = __f_setown(filp, pid, type, 1);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int f_getown_ex(struct file *filp, unsigned long arg)
+{
+ struct f_owner_ex * __user owner_p = (void * __user)arg;
+ struct f_owner_ex owner;
+ int ret = 0;
+
+ read_lock(&filp->f_owner.lock);
+ owner.pid = pid_vnr(filp->f_owner.pid);
+ switch (filp->f_owner.pid_type) {
+ case PIDTYPE_MAX:
+ owner.type = F_OWNER_TID;
+ break;
+
+ case PIDTYPE_PID:
+ owner.type = F_OWNER_PID;
+ break;
+
+ case PIDTYPE_PGID:
+ owner.type = F_OWNER_GID;
+ break;
+
+ default:
+ WARN_ON(1);
+ ret = -EINVAL;
+ break;
+ }
+ read_unlock(&filp->f_owner.lock);
+
+ if (!ret)
+ ret = copy_to_user(owner_p, &owner, sizeof(owner));
+ return ret;
+}
+
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp)
{
case F_SETOWN:
err = f_setown(filp, arg, 1);
break;
+ case F_GETOWN_EX:
+ err = f_getown_ex(filp, arg);
+ break;
+ case F_SETOWN_EX:
+ err = f_setown_ex(filp, arg);
+ break;
case F_GETSIG:
err = filp->f_owner.signum;
break;
static void send_sigio_to_task(struct task_struct *p,
struct fown_struct *fown,
- int fd,
- int reason)
+ int fd, int reason, int group)
{
/*
* F_SETSIG can change ->signum lockless in parallel, make
else
si.si_band = band_table[reason - POLL_IN];
si.si_fd = fd;
- if (!group_send_sig_info(signum, &si, p))
+ if (!do_send_sig_info(signum, &si, p, group))
break;
/* fall-through: fall back on the old plain SIGIO signal */
case 0:
- group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
+ do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
}
}
struct task_struct *p;
enum pid_type type;
struct pid *pid;
+ int group = 1;
read_lock(&fown->lock);
+
type = fown->pid_type;
+ if (type == PIDTYPE_MAX) {
+ group = 0;
+ type = PIDTYPE_PID;
+ }
+
pid = fown->pid;
if (!pid)
goto out_unlock_fown;
read_lock(&tasklist_lock);
do_each_pid_task(pid, type, p) {
- send_sigio_to_task(p, fown, fd, band);
+ send_sigio_to_task(p, fown, fd, band, group);
} while_each_pid_task(pid, type, p);
read_unlock(&tasklist_lock);
out_unlock_fown:
}
static void send_sigurg_to_task(struct task_struct *p,
- struct fown_struct *fown)
+ struct fown_struct *fown, int group)
{
if (sigio_perm(p, fown, SIGURG))
- group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
+ do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
}
int send_sigurg(struct fown_struct *fown)
struct task_struct *p;
enum pid_type type;
struct pid *pid;
+ int group = 1;
int ret = 0;
read_lock(&fown->lock);
+
type = fown->pid_type;
+ if (type == PIDTYPE_MAX) {
+ group = 0;
+ type = PIDTYPE_PID;
+ }
+
pid = fown->pid;
if (!pid)
goto out_unlock_fown;
read_lock(&tasklist_lock);
do_each_pid_task(pid, type, p) {
- send_sigurg_to_task(p, fown);
+ send_sigurg_to_task(p, fown, group);
} while_each_pid_task(pid, type, p);
read_unlock(&tasklist_lock);
out_unlock_fown:
* Handle nr_files sysctl
*/
#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+int proc_nr_files(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
files_stat.nr_files = get_nr_files();
- return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ return proc_dointvec(table, write, buffer, lenp, ppos);
}
#else
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+int proc_nr_files(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
return 0;
if (attr->ia_valid & ATTR_SIZE) {
- unsigned long limit;
- if (IS_SWAPFILE(inode))
- return -ETXTBSY;
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
- send_sig(SIGXFSZ, current, 0);
- return -EFBIG;
- }
+ err = inode_newsize_ok(inode, attr->ia_size);
+ if (err)
+ return err;
is_truncate = true;
}
* FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
*/
if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
- if (outarg.attr.size < oldsize)
- fuse_truncate(inode->i_mapping, outarg.attr.size);
+ truncate_pagecache(inode, oldsize, outarg.attr.size);
invalidate_inode_pages2(inode->i_mapping);
}
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
u64 attr_valid);
-void fuse_truncate(struct address_space *mapping, loff_t offset);
-
/**
* Initialize the client device
*/
return 0;
}
-void fuse_truncate(struct address_space *mapping, loff_t offset)
-{
- /* See vmtruncate() */
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(mapping, offset);
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-}
-
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
u64 attr_valid)
{
spin_unlock(&fc->lock);
if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
- if (attr->size < oldsize)
- fuse_truncate(inode->i_mapping, attr->size);
+ truncate_pagecache(inode, oldsize, attr->size);
invalidate_inode_pages2(inode->i_mapping);
}
}
.direct_IO = gfs2_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations gfs2_ordered_aops = {
.direct_IO = gfs2_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
static const struct address_space_operations gfs2_jdata_aops = {
.invalidatepage = gfs2_invalidatepage,
.releasepage = gfs2_releasepage,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
void gfs2_set_aops(struct inode *inode)
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/namei.h>
-#include <linux/utsname.h>
#include <linux/mm.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
brelse(HFS_SB(sb)->mdb_bh);
brelse(HFS_SB(sb)->alt_mdb_bh);
- if (HFS_SB(sb)->nls_io)
- unload_nls(HFS_SB(sb)->nls_io);
- if (HFS_SB(sb)->nls_disk)
- unload_nls(HFS_SB(sb)->nls_disk);
+ unload_nls(HFS_SB(sb)->nls_io);
+ unload_nls(HFS_SB(sb)->nls_disk);
free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
kfree(HFS_SB(sb));
iput(HFSPLUS_SB(sb).alloc_file);
iput(HFSPLUS_SB(sb).hidden_dir);
brelse(HFSPLUS_SB(sb).s_vhbh);
- if (HFSPLUS_SB(sb).nls)
- unload_nls(HFSPLUS_SB(sb).nls);
+ unload_nls(HFSPLUS_SB(sb).nls);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
cleanup:
hfsplus_put_super(sb);
- if (nls)
- unload_nls(nls);
+ unload_nls(nls);
return err;
}
static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
{
- struct super_block *sb = inode->i_sb;
-
- if (!hlist_unhashed(&inode->i_hash)) {
- if (!(inode->i_state & (I_DIRTY|I_SYNC)))
- list_move(&inode->i_list, &inode_unused);
- inodes_stat.nr_unused++;
- if (!sb || (sb->s_flags & MS_ACTIVE)) {
- spin_unlock(&inode_lock);
- return;
- }
- inode->i_state |= I_WILL_FREE;
- spin_unlock(&inode_lock);
- /*
- * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
- * in our backing_dev_info.
- */
- write_inode_now(inode, 1);
- spin_lock(&inode_lock);
- inode->i_state &= ~I_WILL_FREE;
- inodes_stat.nr_unused--;
- hlist_del_init(&inode->i_hash);
+ if (generic_detach_inode(inode)) {
+ truncate_hugepages(inode, 0);
+ clear_inode(inode);
+ destroy_inode(inode);
}
- list_del_init(&inode->i_list);
- list_del_init(&inode->i_sb_list);
- inode->i_state |= I_FREEING;
- inodes_stat.nr_inodes--;
- spin_unlock(&inode_lock);
- truncate_hugepages(inode, 0);
- clear_inode(inode);
- destroy_inode(inode);
}
static void hugetlbfs_drop_inode(struct inode *inode)
static struct vfsmount *hugetlbfs_vfsmount;
-static int can_do_hugetlb_shm(int creat_flags)
+static int can_do_hugetlb_shm(void)
{
- if (creat_flags != HUGETLB_SHMFS_INODE)
- return 0;
- if (capable(CAP_IPC_LOCK))
- return 1;
- if (in_group_p(sysctl_hugetlb_shm_group))
- return 1;
- return 0;
+ return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
}
struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
if (!hugetlbfs_vfsmount)
return ERR_PTR(-ENOENT);
- if (!can_do_hugetlb_shm(creat_flags)) {
+ if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
*user = current_user();
if (user_shm_lock(size, *user)) {
WARN_ONCE(1,
}
EXPORT_SYMBOL(generic_delete_inode);
-static void generic_forget_inode(struct inode *inode)
+/**
+ * generic_detach_inode - remove inode from inode lists
+ * @inode: inode to remove
+ *
+ * Remove inode from inode lists, write it if it's dirty. This is just an
+ * internal VFS helper exported for hugetlbfs. Do not use!
+ *
+ * Returns 1 if inode should be completely destroyed.
+ */
+int generic_detach_inode(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
inodes_stat.nr_unused++;
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode_lock);
- return;
+ return 0;
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_WILL_FREE;
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(generic_detach_inode);
+
+static void generic_forget_inode(struct inode *inode)
+{
+ if (!generic_detach_inode(inode))
+ return;
if (inode->i_data.nrpages)
truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
struct inode *inode = dentry->d_inode;
struct timespec now;
- if (mnt_want_write(mnt))
- return;
if (inode->i_flags & S_NOATIME)
- goto out;
+ return;
if (IS_NOATIME(inode))
- goto out;
+ return;
if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
- goto out;
+ return;
if (mnt->mnt_flags & MNT_NOATIME)
- goto out;
+ return;
if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
- goto out;
+ return;
now = current_fs_time(inode->i_sb);
if (!relatime_need_update(mnt, inode, now))
- goto out;
+ return;
if (timespec_equal(&inode->i_atime, &now))
- goto out;
+ return;
+
+ if (mnt_want_write(mnt))
+ return;
inode->i_atime = now;
mark_inode_dirty_sync(inode);
-out:
mnt_drop_write(mnt);
}
EXPORT_SYMBOL(touch_atime);
{
struct inode *inode = file->f_path.dentry->d_inode;
struct timespec now;
- int sync_it = 0;
- int err;
+ enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
+ /* First try to exhaust all avenues to not sync */
if (IS_NOCMTIME(inode))
return;
- err = mnt_want_write_file(file);
- if (err)
- return;
-
now = current_fs_time(inode->i_sb);
- if (!timespec_equal(&inode->i_mtime, &now)) {
- inode->i_mtime = now;
- sync_it = 1;
- }
+ if (!timespec_equal(&inode->i_mtime, &now))
+ sync_it = S_MTIME;
- if (!timespec_equal(&inode->i_ctime, &now)) {
- inode->i_ctime = now;
- sync_it = 1;
- }
+ if (!timespec_equal(&inode->i_ctime, &now))
+ sync_it |= S_CTIME;
- if (IS_I_VERSION(inode)) {
- inode_inc_iversion(inode);
- sync_it = 1;
- }
+ if (IS_I_VERSION(inode))
+ sync_it |= S_VERSION;
+
+ if (!sync_it)
+ return;
- if (sync_it)
- mark_inode_dirty_sync(inode);
+ /* Finally allowed to write? Takes lock. */
+ if (mnt_want_write_file(file))
+ return;
+
+ /* Only change inode inside the lock region */
+ if (sync_it & S_VERSION)
+ inode_inc_iversion(inode);
+ if (sync_it & S_CTIME)
+ inode->i_ctime = now;
+ if (sync_it & S_MTIME)
+ inode->i_mtime = now;
+ mark_inode_dirty_sync(inode);
mnt_drop_write(file->f_path.mnt);
}
EXPORT_SYMBOL(file_update_time);
else if (S_ISSOCK(mode))
inode->i_fop = &bad_sock_fops;
else
- printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
- mode);
+ printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
+ " inode %s:%lu\n", mode, inode->i_sb->s_id,
+ inode->i_ino);
}
EXPORT_SYMBOL(init_special_inode);
* namespace.c
*/
extern int copy_mount_options(const void __user *, unsigned long *);
+extern int copy_mount_string(const void __user *, char **);
extern void free_vfsmnt(struct vfsmount *);
extern struct vfsmount *alloc_vfsmnt(const char *);
static int fiemap_check_ranges(struct super_block *sb,
u64 start, u64 len, u64 *new_len)
{
+ u64 maxbytes = (u64) sb->s_maxbytes;
+
*new_len = len;
if (len == 0)
return -EINVAL;
- if (start > sb->s_maxbytes)
+ if (start > maxbytes)
return -EFBIG;
/*
* Shrink request scope to what the fs can actually handle.
*/
- if ((len > sb->s_maxbytes) ||
- (sb->s_maxbytes - len) < start)
- *new_len = sb->s_maxbytes - start;
+ if (len > maxbytes || (maxbytes - len) < start)
+ *new_len = maxbytes - start;
return 0;
}
#ifdef CONFIG_JOLIET
lock_kernel();
- if (sbi->s_nls_iocharset) {
- unload_nls(sbi->s_nls_iocharset);
- sbi->s_nls_iocharset = NULL;
- }
+ unload_nls(sbi->s_nls_iocharset);
unlock_kernel();
#endif
printk(KERN_WARNING "%s: get root inode failed\n", __func__);
out_no_inode:
#ifdef CONFIG_JOLIET
- if (sbi->s_nls_iocharset)
- unload_nls(sbi->s_nls_iocharset);
+ unload_nls(sbi->s_nls_iocharset);
#endif
goto out_freesbi;
out_no_read:
rc = jfs_umount(sb);
if (rc)
jfs_err("jfs_umount failed with return code %d", rc);
- if (sbi->nls_tab)
- unload_nls(sbi->nls_tab);
- sbi->nls_tab = NULL;
+
+ unload_nls(sbi->nls_tab);
truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
iput(sbi->direct_inode);
- sbi->direct_inode = NULL;
kfree(sbi);
if (nls_map != (void *) -1) {
/* Discard old (if remount) */
- if (sbi->nls_tab)
- unload_nls(sbi->nls_tab);
+ unload_nls(sbi->nls_tab);
sbi->nls_tab = nls_map;
}
return 1;
const void *from, size_t available)
{
loff_t pos = *ppos;
+ size_t ret;
+
if (pos < 0)
return -EINVAL;
- if (pos >= available)
+ if (pos >= available || !count)
return 0;
if (count > available - pos)
count = available - pos;
- if (copy_to_user(to, from + pos, count))
+ ret = copy_to_user(to, from + pos, count);
+ if (ret == count)
return -EFAULT;
+ count -= ret;
*ppos = pos + count;
return count;
}
if (copy_from_user(attr->set_buf, buf, size))
goto out;
- ret = len; /* claim we got the whole input */
attr->set_buf[size] = '\0';
val = simple_strtol(attr->set_buf, NULL, 0);
- attr->set(attr->data, val);
+ ret = attr->set(attr->data, val);
+ if (ret == 0)
+ ret = len; /* on success, claim we got the whole input */
out:
mutex_unlock(&attr->mutex);
return ret;
#include <linux/types.h>
#include <linux/sched.h>
-#include <linux/utsname.h>
#include <linux/nfs.h>
#include <linux/sunrpc/xdr.h>
#include <linux/types.h>
#include <linux/sched.h>
-#include <linux/utsname.h>
#include <linux/nfs.h>
#include <linux/sunrpc/xdr.h>
{
struct vfsmount *mnt;
- if (!type || !memchr(type, 0, PAGE_SIZE))
+ if (!type)
return -EINVAL;
/* we need capabilities... */
return 0;
}
+int copy_mount_string(const void __user *data, char **where)
+{
+ char *tmp;
+
+ if (!data) {
+ *where = NULL;
+ return 0;
+ }
+
+ tmp = strndup_user(data, PAGE_SIZE);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+
+ *where = tmp;
+ return 0;
+}
+
/*
* Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
* be given to the mount() call (ie: read-only, no-dev, no-suid etc).
if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
return -EINVAL;
- if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
- return -EINVAL;
if (data_page)
((char *)data_page)[PAGE_SIZE - 1] = 0;
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
char __user *, type, unsigned long, flags, void __user *, data)
{
- int retval;
+ int ret;
+ char *kernel_type;
+ char *kernel_dir;
+ char *kernel_dev;
unsigned long data_page;
- unsigned long type_page;
- unsigned long dev_page;
- char *dir_page;
- retval = copy_mount_options(type, &type_page);
- if (retval < 0)
- return retval;
+ ret = copy_mount_string(type, &kernel_type);
+ if (ret < 0)
+ goto out_type;
- dir_page = getname(dir_name);
- retval = PTR_ERR(dir_page);
- if (IS_ERR(dir_page))
- goto out1;
+ kernel_dir = getname(dir_name);
+ if (IS_ERR(kernel_dir)) {
+ ret = PTR_ERR(kernel_dir);
+ goto out_dir;
+ }
- retval = copy_mount_options(dev_name, &dev_page);
- if (retval < 0)
- goto out2;
+ ret = copy_mount_string(dev_name, &kernel_dev);
+ if (ret < 0)
+ goto out_dev;
- retval = copy_mount_options(data, &data_page);
- if (retval < 0)
- goto out3;
+ ret = copy_mount_options(data, &data_page);
+ if (ret < 0)
+ goto out_data;
- retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
- flags, (void *)data_page);
- free_page(data_page);
+ ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
+ (void *) data_page);
-out3:
- free_page(dev_page);
-out2:
- putname(dir_page);
-out1:
- free_page(type_page);
- return retval;
+ free_page(data_page);
+out_data:
+ kfree(kernel_dev);
+out_dev:
+ putname(kernel_dir);
+out_dir:
+ kfree(kernel_type);
+out_type:
+ return ret;
}
/*
#ifdef CONFIG_NCPFS_NLS
/* unload the NLS charsets */
- if (server->nls_vol)
- {
- unload_nls(server->nls_vol);
- server->nls_vol = NULL;
- }
- if (server->nls_io)
- {
- unload_nls(server->nls_io);
- server->nls_io = NULL;
- }
+ unload_nls(server->nls_vol);
+ unload_nls(server->nls_io);
#endif /* CONFIG_NCPFS_NLS */
if (server->info_filp)
oldset_io = server->nls_io;
server->nls_io = iocharset;
- if (oldset_cp)
- unload_nls(oldset_cp);
- if (oldset_io)
- unload_nls(oldset_io);
+ unload_nls(oldset_cp);
+ unload_nls(oldset_io);
return 0;
}
.hostname = clp->cl_hostname,
.address = (struct sockaddr *)&clp->cl_addr,
.addrlen = clp->cl_addrlen,
- .protocol = server->flags & NFS_MOUNT_TCP ?
- IPPROTO_TCP : IPPROTO_UDP,
.nfs_version = clp->rpc_ops->version,
.noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
1 : 0,
if (server->flags & NFS_MOUNT_NONLM)
return 0;
+ switch (clp->cl_proto) {
+ default:
+ nlm_init.protocol = IPPROTO_TCP;
+ break;
+ case XPRT_TRANSPORT_UDP:
+ nlm_init.protocol = IPPROTO_UDP;
+ }
+
host = nlmclnt_init(&nlm_init);
if (IS_ERR(host))
return PTR_ERR(host);
dprintk("--> nfs_init_server()\n");
#ifdef CONFIG_NFS_V3
- if (data->flags & NFS_MOUNT_VER3)
+ if (data->version == 3)
cl_init.rpc_ops = &nfs_v3_clientops;
#endif
target->acdirmin = source->acdirmin;
target->acdirmax = source->acdirmax;
target->caps = source->caps;
+ target->options = source->options;
}
/*
.direct_IO = nfs_direct_IO,
.migratepage = nfs_migrate_page,
.launder_page = nfs_launder_page,
+ .error_remove_page = generic_error_remove_page,
};
/*
/*
* Get the cache cookie for an NFS superblock. We have to handle
* uniquification here because the cache doesn't do it for us.
+ *
+ * The default uniquifier is just an empty string, but it may be overridden
+ * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
+ * superblock across an automount point of some nature.
*/
-void nfs_fscache_get_super_cookie(struct super_block *sb,
- struct nfs_parsed_mount_data *data)
+void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq,
+ struct nfs_clone_mount *mntdata)
{
struct nfs_fscache_key *key, *xkey;
struct nfs_server *nfss = NFS_SB(sb);
struct rb_node **p, *parent;
- const char *uniq = data->fscache_uniq ?: "";
int diff, ulen;
- ulen = strlen(uniq);
+ if (uniq) {
+ ulen = strlen(uniq);
+ } else if (mntdata) {
+ struct nfs_server *mnt_s = NFS_SB(mntdata->sb);
+ if (mnt_s->fscache_key) {
+ uniq = mnt_s->fscache_key->key.uniquifier;
+ ulen = mnt_s->fscache_key->key.uniq_len;
+ }
+ }
+
+ if (!uniq) {
+ uniq = "";
+ ulen = 1;
+ }
+
key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL);
if (!key)
return;
extern void nfs_fscache_release_client_cookie(struct nfs_client *);
extern void nfs_fscache_get_super_cookie(struct super_block *,
- struct nfs_parsed_mount_data *);
+ const char *,
+ struct nfs_clone_mount *);
extern void nfs_fscache_release_super_cookie(struct super_block *);
extern void nfs_fscache_init_inode_cookie(struct inode *);
static inline void nfs_fscache_get_super_cookie(
struct super_block *sb,
- struct nfs_parsed_mount_data *data)
+ const char *uniq,
+ struct nfs_clone_mount *mntdata)
{
}
static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
*/
static int nfs_vmtruncate(struct inode * inode, loff_t offset)
{
- if (i_size_read(inode) < offset) {
- unsigned long limit;
-
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && offset > limit)
- goto out_sig;
- if (offset > inode->i_sb->s_maxbytes)
- goto out_big;
- spin_lock(&inode->i_lock);
- i_size_write(inode, offset);
- spin_unlock(&inode->i_lock);
- } else {
- struct address_space *mapping = inode->i_mapping;
+ loff_t oldsize;
+ int err;
- /*
- * truncation of in-use swapfiles is disallowed - it would
- * cause subsequent swapout to scribble on the now-freed
- * blocks.
- */
- if (IS_SWAPFILE(inode))
- return -ETXTBSY;
- spin_lock(&inode->i_lock);
- i_size_write(inode, offset);
- spin_unlock(&inode->i_lock);
+ err = inode_newsize_ok(inode, offset);
+ if (err)
+ goto out;
- /*
- * unmap_mapping_range is called twice, first simply for
- * efficiency so that truncate_inode_pages does fewer
- * single-page unmaps. However after this first call, and
- * before truncate_inode_pages finishes, it is possible for
- * private pages to be COWed, which remain after
- * truncate_inode_pages finishes, hence the second
- * unmap_mapping_range call must be made for correctness.
- */
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(mapping, offset);
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- }
- return 0;
-out_sig:
- send_sig(SIGXFSZ, current, 0);
-out_big:
- return -EFBIG;
+ spin_lock(&inode->i_lock);
+ oldsize = inode->i_size;
+ i_size_write(inode, offset);
+ spin_unlock(&inode->i_lock);
+
+ truncate_pagecache(inode, oldsize, offset);
+out:
+ return err;
}
/**
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/in.h>
*/
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/sunrpc/clnt.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/in.h>
*/
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/in.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/in.h>
unlock_kernel();
}
+static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(int flags)
+{
+ struct nfs_parsed_mount_data *data;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (data) {
+ data->flags = flags;
+ data->rsize = NFS_MAX_FILE_IO_SIZE;
+ data->wsize = NFS_MAX_FILE_IO_SIZE;
+ data->acregmin = NFS_DEF_ACREGMIN;
+ data->acregmax = NFS_DEF_ACREGMAX;
+ data->acdirmin = NFS_DEF_ACDIRMIN;
+ data->acdirmax = NFS_DEF_ACDIRMAX;
+ data->nfs_server.port = NFS_UNSPEC_PORT;
+ data->auth_flavors[0] = RPC_AUTH_UNIX;
+ data->auth_flavor_len = 1;
+ data->minorversion = 0;
+ }
+ return data;
+}
+
/*
* Sanity-check a server address provided by the mount command.
*
int status;
if (args->mount_server.version == 0) {
- if (args->flags & NFS_MOUNT_VER3)
- args->mount_server.version = NFS_MNT3_VERSION;
- else
- args->mount_server.version = NFS_MNT_VERSION;
+ switch (args->version) {
+ default:
+ args->mount_server.version = NFS_MNT3_VERSION;
+ break;
+ case 2:
+ args->mount_server.version = NFS_MNT_VERSION;
+ }
}
request.version = args->mount_server.version;
if (data == NULL)
goto out_no_data;
- args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP);
- args->rsize = NFS_MAX_FILE_IO_SIZE;
- args->wsize = NFS_MAX_FILE_IO_SIZE;
- args->acregmin = NFS_DEF_ACREGMIN;
- args->acregmax = NFS_DEF_ACREGMAX;
- args->acdirmin = NFS_DEF_ACDIRMIN;
- args->acdirmax = NFS_DEF_ACDIRMAX;
- args->mount_server.port = NFS_UNSPEC_PORT;
- args->nfs_server.port = NFS_UNSPEC_PORT;
- args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- args->auth_flavors[0] = RPC_AUTH_UNIX;
- args->auth_flavor_len = 1;
- args->minorversion = 0;
-
switch (data->version) {
case 1:
data->namlen = 0;
}
#ifndef CONFIG_NFS_V3
- if (args->flags & NFS_MOUNT_VER3)
+ if (args->version == 3)
goto out_v3_not_compiled;
#endif /* !CONFIG_NFS_V3 */
if (data->bsize)
sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
- if (server->flags & NFS_MOUNT_VER3) {
+ if (server->nfs_client->rpc_ops->version == 3) {
/* The VFS shouldn't apply the umask to mode bits. We will do
* so ourselves when necessary.
*/
sb->s_blocksize = old_sb->s_blocksize;
sb->s_maxbytes = old_sb->s_maxbytes;
- if (server->flags & NFS_MOUNT_VER3) {
+ if (server->nfs_client->rpc_ops->version == 3) {
/* The VFS shouldn't apply the umask to mode bits. We will do
* so ourselves when necessary.
*/
};
int error = -ENOMEM;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ data = nfs_alloc_parsed_mount_data(NFS_MOUNT_VER3 | NFS_MOUNT_TCP);
mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
if (data == NULL || mntfh == NULL)
goto out_free_fh;
if (!s->s_root) {
/* initial superblock/root creation */
nfs_fill_super(s, data);
- nfs_fscache_get_super_cookie(s, data);
+ nfs_fscache_get_super_cookie(
+ s, data ? data->fscache_uniq : NULL, NULL);
}
mntroot = nfs_get_root(s, mntfh);
if (!s->s_root) {
/* initial superblock/root creation */
nfs_clone_super(s, data->sb);
+ nfs_fscache_get_super_cookie(s, NULL, data);
}
mntroot = nfs_get_root(s, data->fh);
if (data == NULL)
goto out_no_data;
- args->rsize = NFS_MAX_FILE_IO_SIZE;
- args->wsize = NFS_MAX_FILE_IO_SIZE;
- args->acregmin = NFS_DEF_ACREGMIN;
- args->acregmax = NFS_DEF_ACREGMAX;
- args->acdirmin = NFS_DEF_ACDIRMIN;
- args->acdirmax = NFS_DEF_ACDIRMAX;
- args->nfs_server.port = NFS_UNSPEC_PORT;
- args->auth_flavors[0] = RPC_AUTH_UNIX;
- args->auth_flavor_len = 1;
args->version = 4;
- args->minorversion = 0;
-
switch (data->version) {
case 1:
if (data->host_addrlen > sizeof(args->nfs_server.address))
if (!s->s_root) {
/* initial superblock/root creation */
nfs4_fill_super(s);
- nfs_fscache_get_super_cookie(s, data);
+ nfs_fscache_get_super_cookie(
+ s, data ? data->fscache_uniq : NULL, NULL);
}
mntroot = nfs4_get_root(s, mntfh);
struct nfs_parsed_mount_data *data;
int error = -ENOMEM;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ data = nfs_alloc_parsed_mount_data(0);
if (data == NULL)
goto out_free_data;
if (!s->s_root) {
/* initial superblock/root creation */
nfs4_clone_super(s, data->sb);
+ nfs_fscache_get_super_cookie(s, NULL, data);
}
mntroot = nfs4_get_root(s, data->fh);
if (!s->s_root) {
/* initial superblock/root creation */
nfs4_fill_super(s);
+ nfs_fscache_get_super_cookie(s, NULL, data);
}
mntroot = nfs4_get_root(s, &mntfh);
#include <linux/init.h>
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/sunrpc/clnt.h>
void unload_nls(struct nls_table *nls)
{
- module_put(nls->owner);
+ if (nls)
+ module_put(nls->owner);
}
static const wchar_t charset2uni[256] = {
.migratepage = buffer_migrate_page, /* Move a page cache page from
one physical page to an
other. */
+ .error_remove_page = generic_error_remove_page,
};
/**
.migratepage = buffer_migrate_page, /* Move a page cache page from
one physical page to an
other. */
+ .error_remove_page = generic_error_remove_page,
};
#ifdef NTFS_RW
v, old_nls->charset);
nls_map = old_nls;
} else /* nls_map */ {
- if (old_nls)
- unload_nls(old_nls);
+ unload_nls(old_nls);
}
} else if (!strcmp(p, "utf8")) {
bool val = false;
ntfs_free(vol->upcase);
vol->upcase = NULL;
}
- if (vol->nls_map) {
- unload_nls(vol->nls_map);
- vol->nls_map = NULL;
- }
+
+ unload_nls(vol->nls_map);
+
sb->s_fs_info = NULL;
kfree(vol);
.releasepage = ocfs2_releasepage,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/sysctl.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/delay.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#include <linux/utsname.h>
#include <linux/init.h>
#include <linux/random.h>
#include <linux/statfs.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
-#include <linux/utsname.h>
#include <linux/namei.h>
#define MLOG_MASK_PREFIX ML_NAMEI
"Committed_AS: %8lu kB\n"
"VmallocTotal: %8lu kB\n"
"VmallocUsed: %8lu kB\n"
- "VmallocChunk: %8lu kB\n",
+ "VmallocChunk: %8lu kB\n"
+#ifdef CONFIG_MEMORY_FAILURE
+ "HardwareCorrupted: %8lu kB\n"
+#endif
+ ,
K(i.totalram),
K(i.freeram),
K(i.bufferram),
(unsigned long)VMALLOC_TOTAL >> 10,
vmi.used >> 10,
vmi.largest_chunk >> 10
+#ifdef CONFIG_MEMORY_FAILURE
+ ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
+#endif
);
hugetlb_report_meminfo(m);
/* careful: calling conventions are nasty here */
res = count;
- error = table->proc_handler(table, write, filp, buf, &res, ppos);
+ error = table->proc_handler(table, write, buf, &res, ppos);
if (!error)
error = res;
out:
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/time.h>
+#include <linux/kernel_stat.h>
#include <asm/cputime.h>
static int uptime_proc_show(struct seq_file *m, void *v)
{
struct timespec uptime;
struct timespec idle;
- cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
+ int i;
+ cputime_t idletime = cputime_zero;
+
+ for_each_possible_cpu(i)
+ idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
do_posix_clock_monotonic_gettime(&uptime);
monotonic_to_bootbased(&uptime);
/* make various checks */
order = get_order(newsize);
if (unlikely(order >= MAX_ORDER))
- goto too_big;
+ return -EFBIG;
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && newsize > limit)
- goto fsize_exceeded;
-
- if (newsize > inode->i_sb->s_maxbytes)
- goto too_big;
+ ret = inode_newsize_ok(inode, newsize);
+ if (ret)
+ return ret;
i_size_write(inode, newsize);
return 0;
- fsize_exceeded:
- send_sig(SIGXFSZ, current, 0);
- too_big:
- return -EFBIG;
-
- add_error:
+add_error:
while (loop < npages)
__free_page(pages + loop++);
return ret;
max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
pos = *ppos;
- retval = -EINVAL;
- if (unlikely(pos < 0))
- goto fput_out;
if (unlikely(pos + count > max)) {
retval = -EOVERFLOW;
if (pos >= max)
pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK;
root = romfs_iget(sb, pos);
- if (!root)
+ if (IS_ERR(root))
goto error;
sb->s_root = d_alloc_root(root);
*/
int seq_path(struct seq_file *m, struct path *path, char *esc)
{
- if (m->count < m->size) {
- char *s = m->buf + m->count;
- char *p = d_path(path, s, m->size - m->count);
+ char *buf;
+ size_t size = seq_get_buf(m, &buf);
+ int res = -1;
+
+ if (size) {
+ char *p = d_path(path, buf, size);
if (!IS_ERR(p)) {
- s = mangle_path(s, p, esc);
- if (s) {
- p = m->buf + m->count;
- m->count = s - m->buf;
- return s - p;
- }
+ char *end = mangle_path(buf, p, esc);
+ if (end)
+ res = end - buf;
}
}
- m->count = m->size;
- return -1;
+ seq_commit(m, res);
+
+ return res;
}
EXPORT_SYMBOL(seq_path);
int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
char *esc)
{
- int err = -ENAMETOOLONG;
- if (m->count < m->size) {
- char *s = m->buf + m->count;
+ char *buf;
+ size_t size = seq_get_buf(m, &buf);
+ int res = -ENAMETOOLONG;
+
+ if (size) {
char *p;
spin_lock(&dcache_lock);
- p = __d_path(path, root, s, m->size - m->count);
+ p = __d_path(path, root, buf, size);
spin_unlock(&dcache_lock);
- err = PTR_ERR(p);
+ res = PTR_ERR(p);
if (!IS_ERR(p)) {
- s = mangle_path(s, p, esc);
- if (s) {
- p = m->buf + m->count;
- m->count = s - m->buf;
- return 0;
- }
+ char *end = mangle_path(buf, p, esc);
+ if (end)
+ res = end - buf;
+ else
+ res = -ENAMETOOLONG;
}
}
- m->count = m->size;
- return err;
+ seq_commit(m, res);
+
+ return res < 0 ? res : 0;
}
/*
*/
int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
{
- if (m->count < m->size) {
- char *s = m->buf + m->count;
- char *p = dentry_path(dentry, s, m->size - m->count);
+ char *buf;
+ size_t size = seq_get_buf(m, &buf);
+ int res = -1;
+
+ if (size) {
+ char *p = dentry_path(dentry, buf, size);
if (!IS_ERR(p)) {
- s = mangle_path(s, p, esc);
- if (s) {
- p = m->buf + m->count;
- m->count = s - m->buf;
- return s - p;
- }
+ char *end = mangle_path(buf, p, esc);
+ if (end)
+ res = end - buf;
}
}
- m->count = m->size;
- return -1;
+ seq_commit(m, res);
+
+ return res;
}
int seq_bitmap(struct seq_file *m, const unsigned long *bits,
static void
smb_unload_nls(struct smb_sb_info *server)
{
- if (server->remote_nls) {
- unload_nls(server->remote_nls);
- server->remote_nls = NULL;
- }
- if (server->local_nls) {
- unload_nls(server->local_nls);
- server->local_nls = NULL;
- }
+ unload_nls(server->remote_nls);
+ unload_nls(server->local_nls);
}
static void
}
EXPORT_SYMBOL(get_super);
+
+/**
+ * get_active_super - get an active reference to the superblock of a device
+ * @bdev: device to get the superblock for
+ *
+ * Scans the superblock list and finds the superblock of the file system
+ * mounted on the device given. Returns the superblock with an active
+ * reference and s_umount held exclusively or %NULL if none was found.
+ */
+struct super_block *get_active_super(struct block_device *bdev)
+{
+ struct super_block *sb;
+
+ if (!bdev)
+ return NULL;
+
+ spin_lock(&sb_lock);
+ list_for_each_entry(sb, &super_blocks, s_list) {
+ if (sb->s_bdev != bdev)
+ continue;
+
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+ down_write(&sb->s_umount);
+ if (sb->s_root) {
+ spin_lock(&sb_lock);
+ if (sb->s_count > S_BIAS) {
+ atomic_inc(&sb->s_active);
+ sb->s_count--;
+ spin_unlock(&sb_lock);
+ return sb;
+ }
+ spin_unlock(&sb_lock);
+ }
+ up_write(&sb->s_umount);
+ put_super(sb);
+ yield();
+ spin_lock(&sb_lock);
+ }
+ spin_unlock(&sb_lock);
+ return NULL;
+}
struct super_block * user_get_super(dev_t dev)
{
{
int retval;
int remount_rw;
-
+
+ if (sb->s_frozen != SB_UNFROZEN)
+ return -EBUSY;
+
#ifdef CONFIG_BLOCK
if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
return -EACCES;
#endif
+
if (flags & MS_RDONLY)
acct_auto_close(sb);
shrink_dcache_sb(sb);
* will protect the lockfs code from trying to start a snapshot
* while we are mounting
*/
- down(&bdev->bd_mount_sem);
+ mutex_lock(&bdev->bd_fsfreeze_mutex);
+ if (bdev->bd_fsfreeze_count > 0) {
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ error = -EBUSY;
+ goto error_bdev;
+ }
s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
- up(&bdev->bd_mount_sem);
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
if (IS_ERR(s))
goto error_s;
if (error)
goto out_sb;
+ /*
+ * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
+ * but s_maxbytes was an unsigned long long for many releases. Throw
+ * this warning for a little while to try and catch filesystems that
+ * violate this rule. This warning should be either removed or
+ * converted to a BUG() in 2.6.34.
+ */
+ WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
+ "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
+
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
up_write(&mnt->mnt_sb->s_umount);
.direct_IO = xfs_vm_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
};
xfs_stats_clear_proc_handler(
ctl_table *ctl,
int write,
- struct file *filp,
void __user *buffer,
size_t *lenp,
loff_t *ppos)
int c, ret, *valp = ctl->data;
__uint32_t vn_active;
- ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
if (!ret && write && *valp) {
printk("XFS Clearing xfsstats\n");
--- /dev/null
+#ifndef ACPI_BUTTON_H
+#define ACPI_BUTTON_H
+
+#include <linux/notifier.h>
+
+#if defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE)
+extern int acpi_lid_notifier_register(struct notifier_block *nb);
+extern int acpi_lid_notifier_unregister(struct notifier_block *nb);
+extern int acpi_lid_open(void);
+#else
+static inline int acpi_lid_notifier_register(struct notifier_block *nb)
+{
+ return 0;
+}
+static inline int acpi_lid_notifier_unregister(struct notifier_block *nb)
+{
+ return 0;
+}
+static inline int acpi_lid_open(void)
+{
+ return 1;
+}
+#endif /* defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) */
+
+#endif /* ACPI_BUTTON_H */
#define F_SETSIG 10 /* for sockets. */
#define F_GETSIG 11 /* for sockets. */
#endif
+#ifndef F_SETOWN_EX
+#define F_SETOWN_EX 12
+#define F_GETOWN_EX 13
+#endif
+
+#define F_OWNER_TID 0
+#define F_OWNER_PID 1
+#define F_OWNER_GID 2
+
+struct f_owner_ex {
+ int type;
+ pid_t pid;
+};
/* for F_[GET|SET]FL */
#define FD_CLOEXEC 1 /* actually anything with low bit set goes */
#define MADV_REMOVE 9 /* remove these pages & resources */
#define MADV_DONTFORK 10 /* don't inherit across fork */
#define MADV_DOFORK 11 /* do inherit across fork */
+#define MADV_HWPOISON 100 /* poison a page for testing */
#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
#ifdef __ARCH_SI_TRAPNO
int _trapno; /* TRAP # which caused the signal */
#endif
+ short _addr_lsb; /* LSB of the reported address */
} _sigfault;
/* SIGPOLL */
#ifdef __ARCH_SI_TRAPNO
#define si_trapno _sifields._sigfault._trapno
#endif
+#define si_addr_lsb _sifields._sigfault._addr_lsb
#define si_band _sifields._sigpoll._band
#define si_fd _sifields._sigpoll._fd
#define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */
#define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */
#define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */
-#define NSIGBUS 3
+/* hardware memory error consumed on a machine check: action required */
+#define BUS_MCEERR_AR (__SI_FAULT|4)
+/* hardware memory error detected in process but not consumed: action optional*/
+#define BUS_MCEERR_AO (__SI_FAULT|5)
+#define NSIGBUS 5
/*
* SIGTRAP si_codes
#ifndef parent_node
#define parent_node(node) ((void)(node),0)
#endif
-#ifndef node_to_cpumask
-#define node_to_cpumask(node) ((void)node, cpu_online_map)
-#endif
#ifndef cpumask_of_node
#define cpumask_of_node(node) ((void)node, cpu_online_mask)
#endif
#endif /* CONFIG_NUMA */
-/*
- * returns pointer to cpumask for specified node
- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
- */
-#ifndef node_to_cpumask_ptr
-
-#define node_to_cpumask_ptr(v, node) \
- cpumask_t _##v = node_to_cpumask(node); \
- const cpumask_t *v = &_##v
-
-#define node_to_cpumask_ptr_next(v, node) \
- _##v = node_to_cpumask(node)
-#endif
-
#endif /* _ASM_GENERIC_TOPOLOGY_H */
{0x8086, 0x2e12, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
{0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
{0x8086, 0x2e32, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+ {0x8086, 0x2e42, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
{0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
{0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
{0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
#define DRM_I915_GEM_GET_APERTURE 0x23
#define DRM_I915_GEM_MMAP_GTT 0x24
#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25
+#define DRM_I915_GEM_MADVISE 0x26
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling)
#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture)
#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_intel_get_pipe_from_crtc_id)
+#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
__u32 pipe;
};
+#define I915_MADV_WILLNEED 0
+#define I915_MADV_DONTNEED 1
+#define __I915_MADV_PURGED 2 /* internal state */
+
+struct drm_i915_gem_madvise {
+ /** Handle of the buffer to change the backing store advice */
+ __u32 handle;
+
+ /* Advice: either the buffer will be needed again in the near future,
+ * or wont be and could be discarded under memory pressure.
+ */
+ __u32 madv;
+
+ /** Whether the backing store still exists. */
+ __u32 retained;
+};
+
#endif /* _I915_DRM_H_ */
* array.
* @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
* dependency chain
- * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining.
+ * @ASYNC_TX_FENCE: specify that the next operation in the dependency
+ * chain uses this operation's result as an input
*/
enum async_tx_flags {
ASYNC_TX_XOR_ZERO_DST = (1 << 0),
ASYNC_TX_XOR_DROP_DST = (1 << 1),
- ASYNC_TX_ACK = (1 << 3),
- ASYNC_TX_DEP_ACK = (1 << 4),
+ ASYNC_TX_ACK = (1 << 2),
+ ASYNC_TX_FENCE = (1 << 3),
+};
+
+/**
+ * struct async_submit_ctl - async_tx submission/completion modifiers
+ * @flags: submission modifiers
+ * @depend_tx: parent dependency of the current operation being submitted
+ * @cb_fn: callback routine to run at operation completion
+ * @cb_param: parameter for the callback routine
+ * @scribble: caller provided space for dma/page address conversions
+ */
+struct async_submit_ctl {
+ enum async_tx_flags flags;
+ struct dma_async_tx_descriptor *depend_tx;
+ dma_async_tx_callback cb_fn;
+ void *cb_param;
+ void *scribble;
};
#ifdef CONFIG_DMA_ENGINE
#define async_tx_issue_pending_all dma_issue_pending_all
+
+/**
+ * async_tx_issue_pending - send pending descriptor to the hardware channel
+ * @tx: descriptor handle to retrieve hardware context
+ *
+ * Note: any dependent operations will have already been issued by
+ * async_tx_channel_switch, or (in the case of no channel switch) will
+ * be already pending on this channel.
+ */
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+ if (likely(tx)) {
+ struct dma_chan *chan = tx->chan;
+ struct dma_device *dma = chan->device;
+
+ dma->device_issue_pending(chan);
+ }
+}
#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
#include <asm/async_tx.h>
#else
#define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
__async_tx_find_channel(dep, type)
struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
- enum dma_transaction_type tx_type);
+__async_tx_find_channel(struct async_submit_ctl *submit,
+ enum dma_transaction_type tx_type);
#endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
#else
static inline void async_tx_issue_pending_all(void)
do { } while (0);
}
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+ do { } while (0);
+}
+
static inline struct dma_chan *
-async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
- enum dma_transaction_type tx_type, struct page **dst, int dst_count,
- struct page **src, int src_count, size_t len)
+async_tx_find_channel(struct async_submit_ctl *submit,
+ enum dma_transaction_type tx_type, struct page **dst,
+ int dst_count, struct page **src, int src_count,
+ size_t len)
{
return NULL;
}
* @cb_fn_param: parameter to pass to the callback routine
*/
static inline void
-async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param)
+async_tx_sync_epilog(struct async_submit_ctl *submit)
{
- if (cb_fn)
- cb_fn(cb_fn_param);
+ if (submit->cb_fn)
+ submit->cb_fn(submit->cb_param);
}
-void
-async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
- enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_fn_param);
+typedef union {
+ unsigned long addr;
+ struct page *page;
+ dma_addr_t dma;
+} addr_conv_t;
+
+static inline void
+init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags,
+ struct dma_async_tx_descriptor *tx,
+ dma_async_tx_callback cb_fn, void *cb_param,
+ addr_conv_t *scribble)
+{
+ args->flags = flags;
+ args->depend_tx = tx;
+ args->cb_fn = cb_fn;
+ args->cb_param = cb_param;
+ args->scribble = scribble;
+}
+
+void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
+ struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
- int src_cnt, size_t len, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_fn_param);
+ int src_cnt, size_t len, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
-async_xor_zero_sum(struct page *dest, struct page **src_list,
- unsigned int offset, int src_cnt, size_t len,
- u32 *result, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_fn_param);
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+ int src_cnt, size_t len, enum sum_check_flags *result,
+ struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
- unsigned int src_offset, size_t len, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_fn_param);
+ unsigned int src_offset, size_t len,
+ struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_memset(struct page *dest, int val, unsigned int offset,
- size_t len, enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_fn_param);
+ size_t len, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
+ size_t len, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
+ size_t len, enum sum_check_flags *pqres, struct page *spare,
+ struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
+ struct page **ptrs, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
- struct dma_async_tx_descriptor *depend_tx,
- dma_async_tx_callback cb_fn, void *cb_fn_param);
+async_raid6_datap_recov(int src_num, size_t bytes, int faila,
+ struct page **ptrs, struct async_submit_ctl *submit);
void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
#endif /* _ASYNC_TX_H_ */
extern int prepare_bprm_creds(struct linux_binprm *bprm);
extern void install_exec_creds(struct linux_binprm *bprm);
extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
-extern int set_binfmt(struct linux_binfmt *new);
+extern void set_binfmt(struct linux_binfmt *new);
extern void free_bprm(struct linux_binprm *);
#endif /* __KERNEL__ */
CGRP_WAIT_ON_RMDIR,
};
+/* which pidlist file are we talking about? */
+enum cgroup_filetype {
+ CGROUP_FILE_PROCS,
+ CGROUP_FILE_TASKS,
+};
+
+/*
+ * A pidlist is a list of pids that virtually represents the contents of one
+ * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
+ * a pair (one each for procs, tasks) for each pid namespace that's relevant
+ * to the cgroup.
+ */
+struct cgroup_pidlist {
+ /*
+ * used to find which pidlist is wanted. doesn't change as long as
+ * this particular list stays in the list.
+ */
+ struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
+ /* array of xids */
+ pid_t *list;
+ /* how many elements the above list has */
+ int length;
+ /* how many files are using the current array */
+ int use_count;
+ /* each of these stored in a list by its cgroup */
+ struct list_head links;
+ /* pointer to the cgroup we belong to, for list removal purposes */
+ struct cgroup *owner;
+ /* protects the other fields */
+ struct rw_semaphore mutex;
+};
+
struct cgroup {
unsigned long flags; /* "unsigned long" so bitops work */
*/
struct list_head release_list;
- /* pids_mutex protects pids_list and cached pid arrays. */
- struct rw_semaphore pids_mutex;
-
- /* Linked list of struct cgroup_pids */
- struct list_head pids_list;
+ /*
+ * list of pidlists, up to two for each namespace (one for procs, one
+ * for tasks); created on demand.
+ */
+ struct list_head pidlists;
+ struct mutex pidlist_mutex;
/* For RCU-protected deletion */
struct rcu_head rcu_head;
* during subsystem registration (at boot time).
*/
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+ /* For RCU-protected deletion */
+ struct rcu_head rcu_head;
};
/*
struct cgroup *cgrp);
int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
- int (*can_attach)(struct cgroup_subsys *ss,
- struct cgroup *cgrp, struct task_struct *tsk);
+ int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct task_struct *tsk, bool threadgroup);
void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cgrp, struct task_struct *tsk);
+ struct cgroup *old_cgrp, struct task_struct *tsk,
+ bool threadgroup);
void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
int (*populate)(struct cgroup_subsys *ss,
*
* configfs Copyright (C) 2005 Oracle. All rights reserved.
*
- * Please read Documentation/filesystems/configfs.txt before using the
- * configfs interface, ESPECIALLY the parts about reference counts and
+ * Please read Documentation/filesystems/configfs/configfs.txt before using
+ * the configfs interface, ESPECIALLY the parts about reference counts and
* item destructors.
*/
/*
* Cpumasks provide a bitmap suitable for representing the
- * set of CPU's in a system, one bit position per CPU number.
- *
- * The new cpumask_ ops take a "struct cpumask *"; the old ones
- * use cpumask_t.
- *
- * See detailed comments in the file linux/bitmap.h describing the
- * data type on which these cpumasks are based.
- *
- * For details of cpumask_scnprintf() and cpumask_parse_user(),
- * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c.
- * For details of cpulist_scnprintf() and cpulist_parse(), see
- * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c.
- * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c
- * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c.
- * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c.
- * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c.
- *
- * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
- * Note: The alternate operations with the suffix "_nr" are used
- * to limit the range of the loop to nr_cpu_ids instead of
- * NR_CPUS when NR_CPUS > 64 for performance reasons.
- * If NR_CPUS is <= 64 then most assembler bitmask
- * operators execute faster with a constant range, so
- * the operator will continue to use NR_CPUS.
- *
- * Another consideration is that nr_cpu_ids is initialized
- * to NR_CPUS and isn't lowered until the possible cpus are
- * discovered (including any disabled cpus). So early uses
- * will span the entire range of NR_CPUS.
- * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
- *
- * The obsolescent cpumask operations are:
- *
- * void cpu_set(cpu, mask) turn on bit 'cpu' in mask
- * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask
- * void cpus_setall(mask) set all bits
- * void cpus_clear(mask) clear all bits
- * int cpu_isset(cpu, mask) true iff bit 'cpu' set in mask
- * int cpu_test_and_set(cpu, mask) test and set bit 'cpu' in mask
- *
- * int cpus_and(dst, src1, src2) dst = src1 & src2 [intersection]
- * void cpus_or(dst, src1, src2) dst = src1 | src2 [union]
- * void cpus_xor(dst, src1, src2) dst = src1 ^ src2
- * int cpus_andnot(dst, src1, src2) dst = src1 & ~src2
- * void cpus_complement(dst, src) dst = ~src
- *
- * int cpus_equal(mask1, mask2) Does mask1 == mask2?
- * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect?
- * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2?
- * int cpus_empty(mask) Is mask empty (no bits sets)?
- * int cpus_full(mask) Is mask full (all bits sets)?
- * int cpus_weight(mask) Hamming weigh - number of set bits
- * int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS
- *
- * void cpus_shift_right(dst, src, n) Shift right
- * void cpus_shift_left(dst, src, n) Shift left
- *
- * int first_cpu(mask) Number lowest set bit, or NR_CPUS
- * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS
- * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids
- *
- * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set
- * (can be used as an lvalue)
- * CPU_MASK_ALL Initializer - all bits set
- * CPU_MASK_NONE Initializer - no bits set
- * unsigned long *cpus_addr(mask) Array of unsigned long's in mask
- *
- * CPUMASK_ALLOC kmalloc's a structure that is a composite of many cpumask_t
- * variables, and CPUMASK_PTR provides pointers to each field.
- *
- * The structure should be defined something like this:
- * struct my_cpumasks {
- * cpumask_t mask1;
- * cpumask_t mask2;
- * };
- *
- * Usage is then:
- * CPUMASK_ALLOC(my_cpumasks);
- * CPUMASK_PTR(mask1, my_cpumasks);
- * CPUMASK_PTR(mask2, my_cpumasks);
- *
- * --- DO NOT reference cpumask_t pointers until this check ---
- * if (my_cpumasks == NULL)
- * "kmalloc failed"...
- *
- * References are now pointers to the cpumask_t variables (*mask1, ...)
- *
- *if NR_CPUS > BITS_PER_LONG
- * CPUMASK_ALLOC(m) Declares and allocates struct m *m =
- * kmalloc(sizeof(*m), GFP_KERNEL)
- * CPUMASK_FREE(m) Macro for kfree(m)
- *else
- * CPUMASK_ALLOC(m) Declares struct m _m, *m = &_m
- * CPUMASK_FREE(m) Nop
- *endif
- * CPUMASK_PTR(v, m) Declares cpumask_t *v = &(m->v)
- * ------------------------------------------------------------------------
- *
- * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
- * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask
- * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
- * int cpulist_parse(buf, map) Parse ascii string as cpulist
- * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit)
- * void cpus_remap(dst, src, old, new) *dst = map(old, new)(src)
- * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap
- * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz
- *
- * for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS
- * for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids
- *
- * int num_online_cpus() Number of online CPUs
- * int num_possible_cpus() Number of all possible CPUs
- * int num_present_cpus() Number of present CPUs
- *
- * int cpu_online(cpu) Is some cpu online?
- * int cpu_possible(cpu) Is some cpu possible?
- * int cpu_present(cpu) Is some cpu present (can schedule)?
- *
- * int any_online_cpu(mask) First online cpu in mask
- *
- * for_each_possible_cpu(cpu) for-loop cpu over cpu_possible_map
- * for_each_online_cpu(cpu) for-loop cpu over cpu_online_map
- * for_each_present_cpu(cpu) for-loop cpu over cpu_present_map
- *
- * Subtlety:
- * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway)
- * to generate slightly worse code. Note for example the additional
- * 40 lines of assembly code compiling the "for each possible cpu"
- * loops buried in the disk_stat_read() macros calls when compiling
- * drivers/block/genhd.c (arch i386, CONFIG_SMP=y). So use a simple
- * one-line #define for cpu_isset(), instead of wrapping an inline
- * inside a macro, the way we do the other calls.
+ * set of CPU's in a system, one bit position per CPU number. In general,
+ * only nr_cpu_ids (<= NR_CPUS) bits are valid.
*/
-
#include <linux/kernel.h>
#include <linux/threads.h>
#include <linux/bitmap.h>
typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
-extern cpumask_t _unused_cpumask_arg_;
-
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
-static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
-{
- set_bit(cpu, dstp->bits);
-}
-
-#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
-static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
-{
- clear_bit(cpu, dstp->bits);
-}
-
-#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
-static inline void __cpus_setall(cpumask_t *dstp, int nbits)
-{
- bitmap_fill(dstp->bits, nbits);
-}
-
-#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
-static inline void __cpus_clear(cpumask_t *dstp, int nbits)
-{
- bitmap_zero(dstp->bits, nbits);
-}
-
-/* No static inline type checking - see Subtlety (1) above. */
-#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
-
-#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
-static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
-{
- return test_and_set_bit(cpu, addr->bits);
-}
-
-#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
-static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
-static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
-static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_andnot(dst, src1, src2) \
- __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
-static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS)
-static inline void __cpus_complement(cpumask_t *dstp,
- const cpumask_t *srcp, int nbits)
-{
- bitmap_complement(dstp->bits, srcp->bits, nbits);
-}
-
-#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_equal(const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- return bitmap_equal(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_intersects(const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- return bitmap_intersects(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_subset(const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
-{
- return bitmap_subset(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
-static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
-{
- return bitmap_empty(srcp->bits, nbits);
-}
-
-#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS)
-static inline int __cpus_full(const cpumask_t *srcp, int nbits)
-{
- return bitmap_full(srcp->bits, nbits);
-}
-
-#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
-static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
-{
- return bitmap_weight(srcp->bits, nbits);
-}
-
-#define cpus_shift_right(dst, src, n) \
- __cpus_shift_right(&(dst), &(src), (n), NR_CPUS)
-static inline void __cpus_shift_right(cpumask_t *dstp,
- const cpumask_t *srcp, int n, int nbits)
-{
- bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
-}
-
-#define cpus_shift_left(dst, src, n) \
- __cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
-static inline void __cpus_shift_left(cpumask_t *dstp,
- const cpumask_t *srcp, int n, int nbits)
-{
- bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
-}
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
/**
- * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
- * @bitmap: the bitmap
- *
- * There are a few places where cpumask_var_t isn't appropriate and
- * static cpumasks must be used (eg. very early boot), yet we don't
- * expose the definition of 'struct cpumask'.
- *
- * This does the conversion, and can be used as a constant initializer.
- */
-#define to_cpumask(bitmap) \
- ((struct cpumask *)(1 ? (bitmap) \
- : (void *)sizeof(__check_is_bitmap(bitmap))))
-
-static inline int __check_is_bitmap(const unsigned long *bitmap)
-{
- return 1;
-}
-
-/*
- * Special-case data structure for "single bit set only" constant CPU masks.
+ * cpumask_bits - get the bits in a cpumask
+ * @maskp: the struct cpumask *
*
- * We pre-generate all the 64 (or 32) possible bit positions, with enough
- * padding to the left and the right, and return the constant pointer
- * appropriately offset.
- */
-extern const unsigned long
- cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
-
-static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
-{
- const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
- p -= cpu / BITS_PER_LONG;
- return to_cpumask(p);
-}
-
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-/*
- * In cases where we take the address of the cpumask immediately,
- * gcc optimizes it out (it's a constant) and there's no huge stack
- * variable created:
+ * You should only assume nr_cpu_ids bits of this mask are valid. This is
+ * a macro so it's const-correct.
*/
-#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu))
-
-
-#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
-
-#if NR_CPUS <= BITS_PER_LONG
-
-#define CPU_MASK_ALL \
-(cpumask_t) { { \
- [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
-} }
-
-#define CPU_MASK_ALL_PTR (&CPU_MASK_ALL)
-
-#else
-
-#define CPU_MASK_ALL \
-(cpumask_t) { { \
- [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \
- [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
-} }
-
-/* cpu_mask_all is in init/main.c */
-extern cpumask_t cpu_mask_all;
-#define CPU_MASK_ALL_PTR (&cpu_mask_all)
-
-#endif
-
-#define CPU_MASK_NONE \
-(cpumask_t) { { \
- [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \
-} }
-
-#define CPU_MASK_CPU0 \
-(cpumask_t) { { \
- [0] = 1UL \
-} }
-
-#define cpus_addr(src) ((src).bits)
-
-#if NR_CPUS > BITS_PER_LONG
-#define CPUMASK_ALLOC(m) struct m *m = kmalloc(sizeof(*m), GFP_KERNEL)
-#define CPUMASK_FREE(m) kfree(m)
-#else
-#define CPUMASK_ALLOC(m) struct m _m, *m = &_m
-#define CPUMASK_FREE(m)
-#endif
-#define CPUMASK_PTR(v, m) cpumask_t *v = &(m->v)
-
-#define cpu_remap(oldbit, old, new) \
- __cpu_remap((oldbit), &(old), &(new), NR_CPUS)
-static inline int __cpu_remap(int oldbit,
- const cpumask_t *oldp, const cpumask_t *newp, int nbits)
-{
- return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits);
-}
-
-#define cpus_remap(dst, src, old, new) \
- __cpus_remap(&(dst), &(src), &(old), &(new), NR_CPUS)
-static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp,
- const cpumask_t *oldp, const cpumask_t *newp, int nbits)
-{
- bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits);
-}
-
-#define cpus_onto(dst, orig, relmap) \
- __cpus_onto(&(dst), &(orig), &(relmap), NR_CPUS)
-static inline void __cpus_onto(cpumask_t *dstp, const cpumask_t *origp,
- const cpumask_t *relmapp, int nbits)
-{
- bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits);
-}
-
-#define cpus_fold(dst, orig, sz) \
- __cpus_fold(&(dst), &(orig), sz, NR_CPUS)
-static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
- int sz, int nbits)
-{
- bitmap_fold(dstp->bits, origp->bits, sz, nbits);
-}
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+#define cpumask_bits(maskp) ((maskp)->bits)
#if NR_CPUS == 1
-
#define nr_cpu_ids 1
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#define first_cpu(src) ({ (void)(src); 0; })
-#define next_cpu(n, src) ({ (void)(src); 1; })
-#define any_online_cpu(mask) 0
-#define for_each_cpu_mask(cpu, mask) \
- for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
-#else /* NR_CPUS > 1 */
-
+#else
extern int nr_cpu_ids;
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-int __first_cpu(const cpumask_t *srcp);
-int __next_cpu(int n, const cpumask_t *srcp);
-int __any_online_cpu(const cpumask_t *mask);
-
-#define first_cpu(src) __first_cpu(&(src))
-#define next_cpu(n, src) __next_cpu((n), &(src))
-#define any_online_cpu(mask) __any_online_cpu(&(mask))
-#define for_each_cpu_mask(cpu, mask) \
- for ((cpu) = -1; \
- (cpu) = next_cpu((cpu), (mask)), \
- (cpu) < NR_CPUS; )
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
#endif
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#if NR_CPUS <= 64
-
-#define next_cpu_nr(n, src) next_cpu(n, src)
-#define cpus_weight_nr(cpumask) cpus_weight(cpumask)
-#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask)
-
-#else /* NR_CPUS > 64 */
-
-int __next_cpu_nr(int n, const cpumask_t *srcp);
-#define next_cpu_nr(n, src) __next_cpu_nr((n), &(src))
-#define cpus_weight_nr(cpumask) __cpus_weight(&(cpumask), nr_cpu_ids)
-#define for_each_cpu_mask_nr(cpu, mask) \
- for ((cpu) = -1; \
- (cpu) = next_cpu_nr((cpu), (mask)), \
- (cpu) < nr_cpu_ids; )
-
-#endif /* NR_CPUS > 64 */
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also,
+ * not all bits may be allocated. */
+#define nr_cpumask_bits nr_cpu_ids
+#else
+#define nr_cpumask_bits NR_CPUS
+#endif
/*
* The following particular system cpumasks and operations manage
extern const struct cpumask *const cpu_possible_mask;
extern const struct cpumask *const cpu_online_mask;
extern const struct cpumask *const cpu_present_mask;
-extern const struct cpumask *const cpu_active_mask;
-
-/* These strip const, as traditionally they weren't const. */
-#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask)
-#define cpu_online_map (*(cpumask_t *)cpu_online_mask)
-#define cpu_present_map (*(cpumask_t *)cpu_present_mask)
-#define cpu_active_map (*(cpumask_t *)cpu_active_mask)
-
-#if NR_CPUS > 1
-#define num_online_cpus() cpumask_weight(cpu_online_mask)
-#define num_possible_cpus() cpumask_weight(cpu_possible_mask)
-#define num_present_cpus() cpumask_weight(cpu_present_mask)
-#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask)
-#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask)
-#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask)
-#define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask)
-#else
-#define num_online_cpus() 1
-#define num_possible_cpus() 1
-#define num_present_cpus() 1
-#define cpu_online(cpu) ((cpu) == 0)
-#define cpu_possible(cpu) ((cpu) == 0)
-#define cpu_present(cpu) ((cpu) == 0)
-#define cpu_active(cpu) ((cpu) == 0)
-#endif
-
-#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
-
-/* These are the new versions of the cpumask operators: passed by pointer.
- * The older versions will be implemented in terms of these, then deleted. */
-#define cpumask_bits(maskp) ((maskp)->bits)
-
-#if NR_CPUS <= BITS_PER_LONG
-#define CPU_BITS_ALL \
-{ \
- [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
-}
-
-#else /* NR_CPUS > BITS_PER_LONG */
-
-#define CPU_BITS_ALL \
-{ \
- [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \
- [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
-}
-#endif /* NR_CPUS > BITS_PER_LONG */
+extern const struct cpumask *const cpu_active_mask;
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also,
- * not all bits may be allocated. */
-#define nr_cpumask_bits nr_cpu_ids
+#if NR_CPUS > 1
+#define num_online_cpus() cpumask_weight(cpu_online_mask)
+#define num_possible_cpus() cpumask_weight(cpu_possible_mask)
+#define num_present_cpus() cpumask_weight(cpu_present_mask)
+#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask)
+#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask)
+#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask)
+#define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask)
#else
-#define nr_cpumask_bits NR_CPUS
+#define num_online_cpus() 1
+#define num_possible_cpus() 1
+#define num_present_cpus() 1
+#define cpu_online(cpu) ((cpu) == 0)
+#define cpu_possible(cpu) ((cpu) == 0)
+#define cpu_present(cpu) ((cpu) == 0)
+#define cpu_active(cpu) ((cpu) == 0)
#endif
/* verify cpu argument to cpumask_* operators */
void init_cpu_present(const struct cpumask *src);
void init_cpu_possible(const struct cpumask *src);
void init_cpu_online(const struct cpumask *src);
+
+/**
+ * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
+ * @bitmap: the bitmap
+ *
+ * There are a few places where cpumask_var_t isn't appropriate and
+ * static cpumasks must be used (eg. very early boot), yet we don't
+ * expose the definition of 'struct cpumask'.
+ *
+ * This does the conversion, and can be used as a constant initializer.
+ */
+#define to_cpumask(bitmap) \
+ ((struct cpumask *)(1 ? (bitmap) \
+ : (void *)sizeof(__check_is_bitmap(bitmap))))
+
+static inline int __check_is_bitmap(const unsigned long *bitmap)
+{
+ return 1;
+}
+
+/*
+ * Special-case data structure for "single bit set only" constant CPU masks.
+ *
+ * We pre-generate all the 64 (or 32) possible bit positions, with enough
+ * padding to the left and the right, and return the constant pointer
+ * appropriately offset.
+ */
+extern const unsigned long
+ cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
+
+static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
+{
+ const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
+ p -= cpu / BITS_PER_LONG;
+ return to_cpumask(p);
+}
+
+#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
+
+#if NR_CPUS <= BITS_PER_LONG
+#define CPU_BITS_ALL \
+{ \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+}
+
+#else /* NR_CPUS > BITS_PER_LONG */
+
+#define CPU_BITS_ALL \
+{ \
+ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+}
+#endif /* NR_CPUS > BITS_PER_LONG */
+
+/*
+ *
+ * From here down, all obsolete. Use cpumask_ variants!
+ *
+ */
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
+/* These strip const, as traditionally they weren't const. */
+#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask)
+#define cpu_online_map (*(cpumask_t *)cpu_online_mask)
+#define cpu_present_map (*(cpumask_t *)cpu_present_mask)
+#define cpu_active_map (*(cpumask_t *)cpu_active_mask)
+
+#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu))
+
+#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
+
+#if NR_CPUS <= BITS_PER_LONG
+
+#define CPU_MASK_ALL \
+(cpumask_t) { { \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+} }
+
+#else
+
+#define CPU_MASK_ALL \
+(cpumask_t) { { \
+ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+} }
+
+#endif
+
+#define CPU_MASK_NONE \
+(cpumask_t) { { \
+ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \
+} }
+
+#define CPU_MASK_CPU0 \
+(cpumask_t) { { \
+ [0] = 1UL \
+} }
+
+#if NR_CPUS == 1
+#define first_cpu(src) ({ (void)(src); 0; })
+#define next_cpu(n, src) ({ (void)(src); 1; })
+#define any_online_cpu(mask) 0
+#define for_each_cpu_mask(cpu, mask) \
+ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#else /* NR_CPUS > 1 */
+int __first_cpu(const cpumask_t *srcp);
+int __next_cpu(int n, const cpumask_t *srcp);
+int __any_online_cpu(const cpumask_t *mask);
+
+#define first_cpu(src) __first_cpu(&(src))
+#define next_cpu(n, src) __next_cpu((n), &(src))
+#define any_online_cpu(mask) __any_online_cpu(&(mask))
+#define for_each_cpu_mask(cpu, mask) \
+ for ((cpu) = -1; \
+ (cpu) = next_cpu((cpu), (mask)), \
+ (cpu) < NR_CPUS; )
+#endif /* SMP */
+
+#if NR_CPUS <= 64
+
+#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask)
+
+#else /* NR_CPUS > 64 */
+
+int __next_cpu_nr(int n, const cpumask_t *srcp);
+#define for_each_cpu_mask_nr(cpu, mask) \
+ for ((cpu) = -1; \
+ (cpu) = __next_cpu_nr((cpu), &(mask)), \
+ (cpu) < nr_cpu_ids; )
+
+#endif /* NR_CPUS > 64 */
+
+#define cpus_addr(src) ((src).bits)
+
+#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
+static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
+{
+ set_bit(cpu, dstp->bits);
+}
+
+#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
+static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
+{
+ clear_bit(cpu, dstp->bits);
+}
+
+#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
+static inline void __cpus_setall(cpumask_t *dstp, int nbits)
+{
+ bitmap_fill(dstp->bits, nbits);
+}
+
+#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
+static inline void __cpus_clear(cpumask_t *dstp, int nbits)
+{
+ bitmap_zero(dstp->bits, nbits);
+}
+
+/* No static inline type checking - see Subtlety (1) above. */
+#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
+
+#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
+static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
+{
+ return test_and_set_bit(cpu, addr->bits);
+}
+
+#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
+static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_andnot(dst, src1, src2) \
+ __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
+static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_equal(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_equal(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_intersects(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_intersects(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_subset(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_subset(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
+static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_empty(srcp->bits, nbits);
+}
+
+#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
+static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_weight(srcp->bits, nbits);
+}
+
+#define cpus_shift_left(dst, src, n) \
+ __cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_left(cpumask_t *dstp,
+ const cpumask_t *srcp, int n, int nbits)
+{
+ bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
+}
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+
#endif /* __LINUX_CPUMASK_H */
extern void __validate_process_creds(struct task_struct *,
const char *, unsigned);
-static inline bool creds_are_invalid(const struct cred *cred)
-{
- if (cred->magic != CRED_MAGIC)
- return true;
- if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
- return true;
-#ifdef CONFIG_SECURITY_SELINUX
- if (selinux_is_enabled()) {
- if ((unsigned long) cred->security < PAGE_SIZE)
- return true;
- if ((*(u32 *)cred->security & 0xffffff00) ==
- (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
- return true;
- }
-#endif
- return false;
-}
+extern bool creds_are_invalid(const struct cred *cred);
static inline void __validate_creds(const struct cred *cred,
const char *file, unsigned line)
*/
#ifndef DCA_H
#define DCA_H
+
+#include <linux/pci.h>
+
/* DCA Provider API */
/* DCA Notifier Interface */
int id;
};
+struct dca_domain {
+ struct list_head node;
+ struct list_head dca_providers;
+ struct pci_bus *pci_rc;
+};
+
struct dca_ops {
int (*add_requester) (struct dca_provider *, struct device *);
int (*remove_requester) (struct dca_provider *, struct device *);
struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
void free_dca_provider(struct dca_provider *dca);
int register_dca_provider(struct dca_provider *dca, struct device *dev);
-void unregister_dca_provider(struct dca_provider *dca);
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev);
static inline void *dca_priv(struct dca_provider *dca)
{
* 2 as published by the Free Software Foundation.
*
* debugfs is for people to use instead of /proc or /sys.
- * See Documentation/DocBook/kernel-api for more details.
+ * See Documentation/DocBook/filesystems for more details.
*/
#ifndef _DEBUGFS_H_
/**
* enum dma_transaction_type - DMA transaction types/indexes
+ *
+ * Note: The DMA_ASYNC_TX capability is not to be set by drivers. It is
+ * automatically set as dma devices are registered.
*/
enum dma_transaction_type {
DMA_MEMCPY,
DMA_XOR,
- DMA_PQ_XOR,
- DMA_DUAL_XOR,
- DMA_PQ_UPDATE,
- DMA_ZERO_SUM,
- DMA_PQ_ZERO_SUM,
+ DMA_PQ,
+ DMA_XOR_VAL,
+ DMA_PQ_VAL,
DMA_MEMSET,
- DMA_MEMCPY_CRC32C,
DMA_INTERRUPT,
DMA_PRIVATE,
+ DMA_ASYNC_TX,
DMA_SLAVE,
};
/**
* enum dma_ctrl_flags - DMA flags to augment operation preparation,
- * control completion, and communicate status.
+ * control completion, and communicate status.
* @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
- * this transaction
+ * this transaction
* @DMA_CTRL_ACK - the descriptor cannot be reused until the client
- * acknowledges receipt, i.e. has has a chance to establish any
- * dependency chains
+ * acknowledges receipt, i.e. has has a chance to establish any dependency
+ * chains
* @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
* @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
* @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
* (if not set, do the source dma-unmapping as page)
* @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
* (if not set, do the destination dma-unmapping as page)
+ * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
+ * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
+ * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
+ * sources that were the result of a previous operation, in the case of a PQ
+ * operation it continues the calculation with new sources
+ * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
+ * on the result of this operation
*/
enum dma_ctrl_flags {
DMA_PREP_INTERRUPT = (1 << 0),
DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
+ DMA_PREP_PQ_DISABLE_P = (1 << 6),
+ DMA_PREP_PQ_DISABLE_Q = (1 << 7),
+ DMA_PREP_CONTINUE = (1 << 8),
+ DMA_PREP_FENCE = (1 << 9),
};
+/**
+ * enum sum_check_bits - bit position of pq_check_flags
+ */
+enum sum_check_bits {
+ SUM_CHECK_P = 0,
+ SUM_CHECK_Q = 1,
+};
+
+/**
+ * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
+ * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
+ * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
+ */
+enum sum_check_flags {
+ SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
+ SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
+};
+
+
/**
* dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
* See linux/cpumask.h
* @flags: flags to augment operation preparation, control completion, and
* communicate status
* @phys: physical address of the descriptor
- * @tx_list: driver common field for operations that require multiple
- * descriptors
* @chan: target channel for this operation
* @tx_submit: set the prepared descriptor(s) to be executed by the engine
* @callback: routine to call after this operation is complete
dma_cookie_t cookie;
enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */
dma_addr_t phys;
- struct list_head tx_list;
struct dma_chan *chan;
dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
dma_async_tx_callback callback;
* @global_node: list_head for global dma_device_list
* @cap_mask: one or more dma_capability flags
* @max_xor: maximum number of xor sources, 0 if no capability
+ * @max_pq: maximum number of PQ sources and PQ-continue capability
+ * @copy_align: alignment shift for memcpy operations
+ * @xor_align: alignment shift for xor operations
+ * @pq_align: alignment shift for pq operations
+ * @fill_align: alignment shift for memset operations
* @dev_id: unique device ID
* @dev: struct device reference for dma mapping api
* @device_alloc_chan_resources: allocate resources and return the
* @device_free_chan_resources: release DMA channel's resources
* @device_prep_dma_memcpy: prepares a memcpy operation
* @device_prep_dma_xor: prepares a xor operation
- * @device_prep_dma_zero_sum: prepares a zero_sum operation
+ * @device_prep_dma_xor_val: prepares a xor validation operation
+ * @device_prep_dma_pq: prepares a pq operation
+ * @device_prep_dma_pq_val: prepares a pqzero_sum operation
* @device_prep_dma_memset: prepares a memset operation
* @device_prep_dma_interrupt: prepares an end of chain interrupt operation
* @device_prep_slave_sg: prepares a slave dma operation
struct list_head channels;
struct list_head global_node;
dma_cap_mask_t cap_mask;
- int max_xor;
+ unsigned short max_xor;
+ unsigned short max_pq;
+ u8 copy_align;
+ u8 xor_align;
+ u8 pq_align;
+ u8 fill_align;
+ #define DMA_HAS_PQ_CONTINUE (1 << 15)
int dev_id;
struct device *dev;
struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
unsigned int src_cnt, size_t len, unsigned long flags);
- struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
+ struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
- size_t len, u32 *result, unsigned long flags);
+ size_t len, enum sum_check_flags *result, unsigned long flags);
+ struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
+ struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, unsigned long flags);
+ struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
+ struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ enum sum_check_flags *pqres, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
unsigned long flags);
void (*device_issue_pending)(struct dma_chan *chan);
};
+static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
+{
+ size_t mask;
+
+ if (!align)
+ return true;
+ mask = (1 << align) - 1;
+ if (mask & (off1 | off2 | len))
+ return false;
+ return true;
+}
+
+static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1,
+ size_t off2, size_t len)
+{
+ return dmaengine_check_align(dev->copy_align, off1, off2, len);
+}
+
+static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1,
+ size_t off2, size_t len)
+{
+ return dmaengine_check_align(dev->xor_align, off1, off2, len);
+}
+
+static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
+ size_t off2, size_t len)
+{
+ return dmaengine_check_align(dev->pq_align, off1, off2, len);
+}
+
+static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
+ size_t off2, size_t len)
+{
+ return dmaengine_check_align(dev->fill_align, off1, off2, len);
+}
+
+static inline void
+dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
+{
+ dma->max_pq = maxpq;
+ if (has_pq_continue)
+ dma->max_pq |= DMA_HAS_PQ_CONTINUE;
+}
+
+static inline bool dmaf_continue(enum dma_ctrl_flags flags)
+{
+ return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
+}
+
+static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
+{
+ enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
+
+ return (flags & mask) == mask;
+}
+
+static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
+{
+ return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
+}
+
+static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
+{
+ return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
+}
+
+/* dma_maxpq - reduce maxpq in the face of continued operations
+ * @dma - dma device with PQ capability
+ * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
+ *
+ * When an engine does not support native continuation we need 3 extra
+ * source slots to reuse P and Q with the following coefficients:
+ * 1/ {00} * P : remove P from Q', but use it as a source for P'
+ * 2/ {01} * Q : use Q to continue Q' calculation
+ * 3/ {00} * Q : subtract Q from P' to cancel (2)
+ *
+ * In the case where P is disabled we only need 1 extra source:
+ * 1/ {01} * Q : use Q to continue Q' calculation
+ */
+static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
+{
+ if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
+ return dma_dev_to_maxpq(dma);
+ else if (dmaf_p_disabled_continue(flags))
+ return dma_dev_to_maxpq(dma) - 1;
+ else if (dmaf_continue(flags))
+ return dma_dev_to_maxpq(dma) - 3;
+ BUG();
+}
+
/* --- public DMA engine API --- */
#ifdef CONFIG_DMA_ENGINE
#ifdef CONFIG_ASYNC_TX_DMA
#define async_dmaengine_get() dmaengine_get()
#define async_dmaengine_put() dmaengine_put()
+#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
+#else
#define async_dma_find_channel(type) dma_find_channel(type)
+#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
#else
static inline void async_dmaengine_get(void)
{
{
return NULL;
}
-#endif
+#endif /* CONFIG_ASYNC_TX_DMA */
dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
void *dest, void *src, size_t len);
int (*launder_page) (struct page *);
int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
unsigned long);
+ int (*error_remove_page)(struct address_space *, struct page *);
};
/*
struct super_block * bd_super;
int bd_openers;
struct mutex bd_mutex; /* open/close mutex */
- struct semaphore bd_mount_sem;
struct list_head bd_inodes;
void * bd_holder;
int bd_holders;
unsigned long s_blocksize;
unsigned char s_blocksize_bits;
unsigned char s_dirt;
- unsigned long long s_maxbytes; /* Max file size */
+ loff_t s_maxbytes; /* Max file size */
struct file_system_type *s_type;
const struct super_operations *s_op;
const struct dquot_operations *dq_op;
extern int inode_needs_sync(struct inode *inode);
extern void generic_delete_inode(struct inode *inode);
extern void generic_drop_inode(struct inode *inode);
+extern int generic_detach_inode(struct inode *inode);
extern struct inode *ilookup5_nowait(struct super_block *sb,
unsigned long hashval, int (*test)(struct inode *, void *),
extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
extern struct super_block *get_super(struct block_device *);
+extern struct super_block *get_active_super(struct block_device *bdev);
extern struct super_block *user_get_super(dev_t);
extern void drop_super(struct super_block *sb);
#define buffer_migrate_page NULL
#endif
-extern int inode_change_ok(struct inode *, struct iattr *);
+extern int inode_change_ok(const struct inode *, struct iattr *);
+extern int inode_newsize_ok(const struct inode *, loff_t offset);
extern int __must_check inode_setattr(struct inode *, struct iattr *);
extern void file_update_time(struct file *file);
size_t len, loff_t *ppos);
struct ctl_table;
-int proc_nr_files(struct ctl_table *table, int write, struct file *filp,
+int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
int __init get_filesystem_list(char *buf);
extern int ftrace_enabled;
extern int
ftrace_enable_sysctl(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
extern int stack_tracer_enabled;
int
stack_trace_sysctl(struct ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
#endif
#include <linux/compiler.h>
#include <linux/types.h>
-struct inode;
-struct mm_struct;
-struct task_struct;
-union ktime;
-
/* Second argument to futex syscall */
#define FUTEX_BITSET_MATCH_ANY 0xffffffff
#ifdef __KERNEL__
+struct inode;
+struct mm_struct;
+struct task_struct;
+union ktime;
+
long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
}
void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
-int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
-int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
-int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
+int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **,
--- /dev/null
+/*
+ * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _ADP5588_H
+#define _ADP5588_H
+
+#define DEV_ID 0x00 /* Device ID */
+#define CFG 0x01 /* Configuration Register1 */
+#define INT_STAT 0x02 /* Interrupt Status Register */
+#define KEY_LCK_EC_STAT 0x03 /* Key Lock and Event Counter Register */
+#define Key_EVENTA 0x04 /* Key Event Register A */
+#define Key_EVENTB 0x05 /* Key Event Register B */
+#define Key_EVENTC 0x06 /* Key Event Register C */
+#define Key_EVENTD 0x07 /* Key Event Register D */
+#define Key_EVENTE 0x08 /* Key Event Register E */
+#define Key_EVENTF 0x09 /* Key Event Register F */
+#define Key_EVENTG 0x0A /* Key Event Register G */
+#define Key_EVENTH 0x0B /* Key Event Register H */
+#define Key_EVENTI 0x0C /* Key Event Register I */
+#define Key_EVENTJ 0x0D /* Key Event Register J */
+#define KP_LCK_TMR 0x0E /* Keypad Lock1 to Lock2 Timer */
+#define UNLOCK1 0x0F /* Unlock Key1 */
+#define UNLOCK2 0x10 /* Unlock Key2 */
+#define GPIO_INT_STAT1 0x11 /* GPIO Interrupt Status */
+#define GPIO_INT_STAT2 0x12 /* GPIO Interrupt Status */
+#define GPIO_INT_STAT3 0x13 /* GPIO Interrupt Status */
+#define GPIO_DAT_STAT1 0x14 /* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_STAT2 0x15 /* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_STAT3 0x16 /* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_OUT1 0x17 /* GPIO DATA OUT */
+#define GPIO_DAT_OUT2 0x18 /* GPIO DATA OUT */
+#define GPIO_DAT_OUT3 0x19 /* GPIO DATA OUT */
+#define GPIO_INT_EN1 0x1A /* GPIO Interrupt Enable */
+#define GPIO_INT_EN2 0x1B /* GPIO Interrupt Enable */
+#define GPIO_INT_EN3 0x1C /* GPIO Interrupt Enable */
+#define KP_GPIO1 0x1D /* Keypad or GPIO Selection */
+#define KP_GPIO2 0x1E /* Keypad or GPIO Selection */
+#define KP_GPIO3 0x1F /* Keypad or GPIO Selection */
+#define GPI_EM1 0x20 /* GPI Event Mode 1 */
+#define GPI_EM2 0x21 /* GPI Event Mode 2 */
+#define GPI_EM3 0x22 /* GPI Event Mode 3 */
+#define GPIO_DIR1 0x23 /* GPIO Data Direction */
+#define GPIO_DIR2 0x24 /* GPIO Data Direction */
+#define GPIO_DIR3 0x25 /* GPIO Data Direction */
+#define GPIO_INT_LVL1 0x26 /* GPIO Edge/Level Detect */
+#define GPIO_INT_LVL2 0x27 /* GPIO Edge/Level Detect */
+#define GPIO_INT_LVL3 0x28 /* GPIO Edge/Level Detect */
+#define Debounce_DIS1 0x29 /* Debounce Disable */
+#define Debounce_DIS2 0x2A /* Debounce Disable */
+#define Debounce_DIS3 0x2B /* Debounce Disable */
+#define GPIO_PULL1 0x2C /* GPIO Pull Disable */
+#define GPIO_PULL2 0x2D /* GPIO Pull Disable */
+#define GPIO_PULL3 0x2E /* GPIO Pull Disable */
+#define CMP_CFG_STAT 0x30 /* Comparator Configuration and Status Register */
+#define CMP_CONFG_SENS1 0x31 /* Sensor1 Comparator Configuration Register */
+#define CMP_CONFG_SENS2 0x32 /* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */
+#define CMP1_LVL2_TRIP 0x33 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */
+#define CMP1_LVL2_HYS 0x34 /* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */
+#define CMP1_LVL3_TRIP 0x35 /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */
+#define CMP1_LVL3_HYS 0x36 /* Sensor 2 Comparator Configuration Register */
+#define CMP2_LVL2_TRIP 0x37 /* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */
+#define CMP2_LVL2_HYS 0x38 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */
+#define CMP2_LVL3_TRIP 0x39 /* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */
+#define CMP2_LVL3_HYS 0x3A /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */
+#define CMP1_ADC_DAT_R1 0x3B /* Comparator 1 ADC data Register1 */
+#define CMP1_ADC_DAT_R2 0x3C /* Comparator 1 ADC data Register2 */
+#define CMP2_ADC_DAT_R1 0x3D /* Comparator 2 ADC data Register1 */
+#define CMP2_ADC_DAT_R2 0x3E /* Comparator 2 ADC data Register2 */
+
+#define ADP5588_DEVICE_ID_MASK 0xF
+
+/* Put one of these structures in i2c_board_info platform_data */
+
+#define ADP5588_KEYMAPSIZE 80
+
+struct adp5588_kpad_platform_data {
+ int rows; /* Number of rows */
+ int cols; /* Number of columns */
+ const unsigned short *keymap; /* Pointer to keymap */
+ unsigned short keymapsize; /* Keymap size */
+ unsigned repeat:1; /* Enable key repeat */
+ unsigned en_keylock:1; /* Enable Key Lock feature */
+ unsigned short unlock_key1; /* Unlock Key 1 */
+ unsigned short unlock_key2; /* Unlock Key 2 */
+};
+
+#endif
--- /dev/null
+/*
+ * mcs5000_ts.h
+ *
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MCS5000_TS_H
+#define __LINUX_MCS5000_TS_H
+
+/* platform data for the MELFAS MCS-5000 touchscreen driver */
+struct mcs5000_ts_platform_data {
+ void (*cfg_pin)(void);
+ int x_size;
+ int y_size;
+};
+
+#endif /* __LINUX_MCS5000_TS_H */
* the Free Software Foundation.
*/
+#include <linux/types.h>
/*
* Standard commands.
#define I8042_CMD_MUX_PFX 0x0090
#define I8042_CMD_MUX_SEND 0x1090
+struct serio;
+
+#if defined(CONFIG_SERIO_I8042) || defined(CONFIG_SERIO_I8042_MODULE)
+
+void i8042_lock_chip(void);
+void i8042_unlock_chip(void);
int i8042_command(unsigned char *param, int command);
+bool i8042_check_port_owner(const struct serio *);
+
+#else
+
+void i8042_lock_chip(void)
+{
+}
+
+void i8042_unlock_chip(void)
+{
+}
+
+int i8042_command(unsigned char *param, int command)
+{
+ return -ENOSYS;
+}
+
+bool i8042_check_port_owner(const struct serio *serio)
+{
+ return false;
+}
+
+#endif
#endif
struct mutex mutex;
unsigned int users;
- int going_away;
+ bool going_away;
struct device dev;
* struct irqaction - per interrupt action descriptor
* @handler: interrupt handler function
* @flags: flags (see IRQF_* above)
- * @mask: no comment as it is useless and about to be removed
* @name: name of the device
* @dev_id: cookie to identify the device
* @next: pointer to the next irqaction for shared interrupts
struct irqaction {
irq_handler_t handler;
unsigned long flags;
- cpumask_t mask;
const char *name;
void *dev_id;
struct irqaction *next;
void ps2_init(struct ps2dev *ps2dev, struct serio *serio);
int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout);
void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout);
+void ps2_begin_command(struct ps2dev *ps2dev);
+void ps2_end_command(struct ps2dev *ps2dev);
int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data);
#ifdef __ASSEMBLY__
+#ifndef LINKER_SCRIPT
#define ALIGN __ALIGN
#define ALIGN_STR __ALIGN_STR
ALIGN; \
name:
#endif
+#endif /* LINKER_SCRIPT */
#ifndef WEAK
#define WEAK(name) \
extern bool mem_cgroup_oom_called(struct task_struct *task);
void mem_cgroup_update_mapped_file_stat(struct page *page, int val);
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+ gfp_t gfp_mask, int nid,
+ int zid);
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct mem_cgroup;
{
}
+static inline
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+ gfp_t gfp_mask, int nid, int zid)
+{
+ return 0;
+}
+
#endif /* CONFIG_CGROUP_MEM_CONT */
#endif /* _LINUX_MEMCONTROL_H */
#define VM_FAULT_SIGBUS 0x0002
#define VM_FAULT_MAJOR 0x0004
#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */
+#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */
#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */
#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */
-#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS)
+#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)
/*
* Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
unmap_mapping_range(mapping, holebegin, holelen, 0);
}
-extern int vmtruncate(struct inode * inode, loff_t offset);
-extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
+extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
+extern int vmtruncate(struct inode *inode, loff_t offset);
+extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
+
+int truncate_inode_page(struct address_space *mapping, struct page *page);
+int generic_error_remove_page(struct address_space *mapping, struct page *page);
+
+int invalidate_inode_page(struct page *page);
#ifdef CONFIG_MMU
extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
#endif /* __HAVE_ARCH_GATE_AREA */
-int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
+int drop_caches_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
unsigned long lru_pages);
extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
size_t size);
extern void refund_locked_memory(struct mm_struct *mm, size_t size);
+
+extern void memory_failure(unsigned long pfn, int trapno);
+extern int __memory_failure(unsigned long pfn, int trapno, int ref);
+extern int sysctl_memory_failure_early_kill;
+extern int sysctl_memory_failure_recovery;
+extern atomic_long_t mce_bad_pages;
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
+ struct linux_binfmt *binfmt;
+
cpumask_t cpu_vm_mask;
/* Architecture-specific MM context */
unsigned long flags; /* Must use atomic bitops to access the bits */
struct core_state *core_state; /* coredumping support */
-
- /* aio bits */
+#ifdef CONFIG_AIO
spinlock_t ioctx_lock;
struct hlist_head ioctx_list;
-
+#endif
#ifdef CONFIG_MM_OWNER
/*
* "owner" points to a task that is regarded as the canonical
/* These two functions are used to setup the per zone pages min values */
struct ctl_table;
-struct file;
-int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *,
+int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
+int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
+int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
- struct file *, void __user *, size_t *, loff_t *);
+ void __user *, size_t *, loff_t *);
int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
- struct file *, void __user *, size_t *, loff_t *);
+ void __user *, size_t *, loff_t *);
extern int numa_zonelist_order_handler(struct ctl_table *, int,
- struct file *, void __user *, size_t *, loff_t *);
+ void __user *, size_t *, loff_t *);
extern char numa_zonelist_order[];
#define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */
*/
#define MODULE_LICENSE(_license) MODULE_INFO(license, _license)
-/* Author, ideally of form NAME[, NAME]*[ and NAME] */
+/*
+ * Author(s), use "Name <email>" or just "Name", for multiple
+ * authors use multiple MODULE_AUTHOR() statements/lines.
+ */
#define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
/* What your module does. */
#endif
#ifdef CONFIG_KALLSYMS
- /* We keep the symbol and string tables for kallsyms. */
- Elf_Sym *symtab;
- unsigned int num_symtab;
- char *strtab;
+ /*
+ * We keep the symbol and string tables for kallsyms.
+ * The core_* fields below are temporary, loader-only (they
+ * could really be discarded after module init).
+ */
+ Elf_Sym *symtab, *core_symtab;
+ unsigned int num_symtab, core_num_syms;
+ char *strtab, *core_strtab;
/* Section attributes */
struct module_sect_attrs *sect_attrs;
extern void netlink_kernel_release(struct sock *sk);
extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
+extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group);
extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
extern int netlink_has_listeners(struct sock *sk, unsigned int group);
* PG_buddy is set to indicate that the page is free and in the buddy system
* (see mm/page_alloc.c).
*
+ * PG_hwpoison indicates that a page got corrupted in hardware and contains
+ * data with incorrect ECC bits that triggered a machine check. Accessing is
+ * not safe since it may cause another machine check. Don't touch!
*/
/*
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
PG_uncached, /* Page has been mapped as uncached */
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+ PG_hwpoison, /* hardware poisoned page. Don't touch */
#endif
__NR_PAGEFLAGS,
PAGEFLAG_FALSE(Uncached)
#endif
+#ifdef CONFIG_MEMORY_FAILURE
+PAGEFLAG(HWPoison, hwpoison)
+TESTSETFLAG(HWPoison, hwpoison)
+#define __PG_HWPOISON (1UL << PG_hwpoison)
+#else
+PAGEFLAG_FALSE(HWPoison)
+#define __PG_HWPOISON 0
+#endif
+
static inline int PageUptodate(struct page *page)
{
int ret = test_bit(PG_uptodate, &(page)->flags);
1 << PG_private | 1 << PG_private_2 | \
1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \
1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
- 1 << PG_unevictable | __PG_MLOCKED)
+ 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)
/*
* Flags checked when a page is prepped for return by the page allocator.
PCG_LOCK, /* page cgroup is locked */
PCG_CACHE, /* charged as cache */
PCG_USED, /* this object is in use. */
+ PCG_ACCT_LRU, /* page has been accounted for */
};
#define TESTPCGFLAG(uname, lname) \
static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
{ clear_bit(PCG_##lname, &pc->flags); }
+#define TESTCLEARPCGFLAG(uname, lname) \
+static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \
+ { return test_and_clear_bit(PCG_##lname, &pc->flags); }
+
/* Cache flag is set only once (at allocation) */
TESTPCGFLAG(Cache, CACHE)
+CLEARPCGFLAG(Cache, CACHE)
+SETPCGFLAG(Cache, CACHE)
TESTPCGFLAG(Used, USED)
CLEARPCGFLAG(Used, USED)
+SETPCGFLAG(Used, USED)
+
+SETPCGFLAG(AcctLRU, ACCT_LRU)
+CLEARPCGFLAG(AcctLRU, ACCT_LRU)
+TESTPCGFLAG(AcctLRU, ACCT_LRU)
+TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
static inline int page_cgroup_nid(struct page_cgroup *pc)
{
#define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e
#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b
#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719
#define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14
#define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16
#define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18
#define PNPIPE_IFINDEX 2
#define PNADDR_ANY 0
+#define PNADDR_BROADCAST 0xFC
#define PNPORT_RESOURCE_ROUTING 0
/* Values for PNPIPE_ENCAP option */
#define PR_TASK_PERF_EVENTS_DISABLE 31
#define PR_TASK_PERF_EVENTS_ENABLE 32
+#define PR_MCE_KILL 33
+
#endif /* _LINUX_PRCTL_H */
* cause relay_open() to create a single global buffer rather
* than the default set of per-cpu buffers.
*
- * See Documentation/filesystems/relayfs.txt for more info.
+ * See Documentation/filesystems/relay.txt for more info.
*/
struct dentry *(*create_buf_file)(const char *filename,
struct dentry *parent,
* the limit that usage cannot exceed
*/
unsigned long long limit;
+ /*
+ * the limit that usage can be exceed
+ */
+ unsigned long long soft_limit;
/*
* the number of unsuccessful attempts to consume the resource
*/
RES_MAX_USAGE,
RES_LIMIT,
RES_FAILCNT,
+ RES_SOFT_LIMIT,
};
/*
int __must_check res_counter_charge_locked(struct res_counter *counter,
unsigned long val);
int __must_check res_counter_charge(struct res_counter *counter,
- unsigned long val, struct res_counter **limit_fail_at);
+ unsigned long val, struct res_counter **limit_fail_at,
+ struct res_counter **soft_limit_at);
/*
* uncharge - tell that some portion of the resource is released
*/
void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val);
+void res_counter_uncharge(struct res_counter *counter, unsigned long val,
+ bool *was_soft_limit_excess);
static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
{
return false;
}
+static inline bool res_counter_soft_limit_check_locked(struct res_counter *cnt)
+{
+ if (cnt->usage < cnt->soft_limit)
+ return true;
+
+ return false;
+}
+
+/**
+ * Get the difference between the usage and the soft limit
+ * @cnt: The counter
+ *
+ * Returns 0 if usage is less than or equal to soft limit
+ * The difference between usage and soft limit, otherwise.
+ */
+static inline unsigned long long
+res_counter_soft_limit_excess(struct res_counter *cnt)
+{
+ unsigned long long excess;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cnt->lock, flags);
+ if (cnt->usage <= cnt->soft_limit)
+ excess = 0;
+ else
+ excess = cnt->usage - cnt->soft_limit;
+ spin_unlock_irqrestore(&cnt->lock, flags);
+ return excess;
+}
+
/*
* Helper function to detect if the cgroup is within it's limit or
* not. It's currently called from cgroup_rss_prepare()
return ret;
}
+static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt)
+{
+ bool ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cnt->lock, flags);
+ ret = res_counter_soft_limit_check_locked(cnt);
+ spin_unlock_irqrestore(&cnt->lock, flags);
+ return ret;
+}
+
static inline void res_counter_reset_max(struct res_counter *cnt)
{
unsigned long flags;
return ret;
}
+static inline int
+res_counter_set_soft_limit(struct res_counter *cnt,
+ unsigned long long soft_limit)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cnt->lock, flags);
+ cnt->soft_limit = soft_limit;
+ spin_unlock_irqrestore(&cnt->lock, flags);
+ return 0;
+}
+
#endif
*/
int page_referenced(struct page *, int is_locked,
struct mem_cgroup *cnt, unsigned long *vm_flags);
-int try_to_unmap(struct page *, int ignore_refs);
+enum ttu_flags {
+ TTU_UNMAP = 0, /* unmap mode */
+ TTU_MIGRATION = 1, /* migration mode */
+ TTU_MUNLOCK = 2, /* munlock mode */
+ TTU_ACTION_MASK = 0xff,
+
+ TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */
+ TTU_IGNORE_ACCESS = (1 << 9), /* don't age */
+ TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
+};
+#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
+
+int try_to_unmap(struct page *, enum ttu_flags flags);
/*
* Called from mm/filemap_xip.c to unmap empty zero page
*/
int try_to_munlock(struct page *);
+/*
+ * Called by memory-failure.c to kill processes.
+ */
+struct anon_vma *page_lock_anon_vma(struct page *page);
+void page_unlock_anon_vma(struct anon_vma *anon_vma);
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
+
#else /* !CONFIG_MMU */
#define anon_vma_init() do {} while (0)
extern void touch_softlockup_watchdog(void);
extern void touch_all_softlockup_watchdogs(void);
extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos);
extern unsigned int softlockup_panic;
extern int softlockup_thresh;
extern unsigned long sysctl_hung_task_timeout_secs;
extern unsigned long sysctl_hung_task_warnings;
extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos);
#endif
struct mm_struct *mm, *active_mm;
/* task state */
- struct linux_binfmt *binfmt;
int exit_state;
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies */
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
+#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
#define PF_DUMPCORE 0x00000200 /* dumped core */
#define PF_SIGNALED 0x00000400 /* killed by a signal */
#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */
+#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */
return 0;
}
#endif
+
+#ifndef CONFIG_CPUMASK_OFFSTACK
static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
{
return set_cpus_allowed_ptr(p, &new_mask);
}
+#endif
/*
* Architectures can set this to 1 if they have specified
extern unsigned int sysctl_timer_migration;
int sched_nr_latency_handler(struct ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length,
+ void __user *buffer, size_t *length,
loff_t *ppos);
#endif
#ifdef CONFIG_SCHED_DEBUG
extern int sysctl_sched_rt_runtime;
int sched_rt_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
extern unsigned int sysctl_sched_compat_yield;
extern int kill_pid(struct pid *pid, int sig, int priv);
extern int kill_proc_info(int, struct siginfo *, pid_t);
extern int do_notify_parent(struct task_struct *, int);
+extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
extern void force_sig(int, struct task_struct *);
extern void force_sig_specific(int, struct task_struct *);
extern int send_sig(int, struct task_struct *, int);
return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
}
-extern int __fatal_signal_pending(struct task_struct *p);
+static inline int __fatal_signal_pending(struct task_struct *p)
+{
+ return unlikely(sigismember(&p->pending.signal, SIGKILL));
+}
static inline int fatal_signal_pending(struct task_struct *p)
{
return PAGE_ALIGN(mmap_min_addr);
return hint;
}
-extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+extern int mmap_min_addr_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#ifdef CONFIG_SECURITY
#define SEQ_SKIP 1
+/**
+ * seq_get_buf - get buffer to write arbitrary data to
+ * @m: the seq_file handle
+ * @bufp: the beginning of the buffer is stored here
+ *
+ * Return the number of bytes available in the buffer, or zero if
+ * there's no space.
+ */
+static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
+{
+ BUG_ON(m->count > m->size);
+ if (m->count < m->size)
+ *bufp = m->buf + m->count;
+ else
+ *bufp = NULL;
+
+ return m->size - m->count;
+}
+
+/**
+ * seq_commit - commit data to the buffer
+ * @m: the seq_file handle
+ * @num: the number of bytes to commit
+ *
+ * Commit @num bytes of data written to a buffer previously acquired
+ * by seq_buf_get. To signal an error condition, or that the data
+ * didn't fit in the available space, pass a negative @num value.
+ */
+static inline void seq_commit(struct seq_file *m, int num)
+{
+ if (num < 0) {
+ m->count = m->size;
+ } else {
+ BUG_ON(m->count + num > m->size);
+ m->count += num;
+ }
+}
+
char *mangle_path(char *s, char *p, char *esc);
int seq_open(struct file *, const struct seq_operations *);
ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
}
extern int next_signal(struct sigpending *pending, sigset_t *mask);
+extern int do_send_sig_info(int sig, struct siginfo *info,
+ struct task_struct *p, bool group);
extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
void smp_call_function_many(const struct cpumask *mask,
void (*func)(void *info), void *info, bool wait);
-/* Deprecated: Use smp_call_function_many which takes a pointer to the mask. */
-static inline int
-smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
- int wait)
-{
- smp_call_function_many(&mask, func, info, wait);
- return 0;
-}
-
void __smp_call_function_single(int cpuid, struct call_single_data *data,
int wait);
static inline void smp_send_reschedule(int cpu) { }
#define num_booting_cpus() 1
#define smp_prepare_boot_cpu() do {} while (0)
-#define smp_call_function_mask(mask, func, info, wait) \
- (up_smp_call_function(func, info))
#define smp_call_function_many(mask, func, info, wait) \
(up_smp_call_function(func, info))
static inline void init_call_single_data(void)
#include <linux/uio.h>
#include <asm/byteorder.h>
+#include <asm/unaligned.h>
#include <linux/scatterlist.h>
/*
static inline __be32 *
xdr_encode_hyper(__be32 *p, __u64 val)
{
- *(__be64 *)p = cpu_to_be64(val);
+ put_unaligned_be64(val, p);
return p + 2;
}
static inline __be32 *
xdr_decode_hyper(__be32 *p, __u64 *valp)
{
- *valp = be64_to_cpup((__be64 *)p);
+ *valp = get_unaligned_be64(p);
return p + 2;
}
* the type/offset into the pte as 5/27 as well.
*/
#define MAX_SWAPFILES_SHIFT 5
-#ifndef CONFIG_MIGRATION
-#define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT)
+
+/*
+ * Use some of the swap files numbers for other purposes. This
+ * is a convenient way to hook into the VM to trigger special
+ * actions on faults.
+ */
+
+/*
+ * NUMA node memory migration support
+ */
+#ifdef CONFIG_MIGRATION
+#define SWP_MIGRATION_NUM 2
+#define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM)
+#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1)
#else
-/* Use last two entries for page migration swap entries */
-#define MAX_SWAPFILES ((1 << MAX_SWAPFILES_SHIFT)-2)
-#define SWP_MIGRATION_READ MAX_SWAPFILES
-#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + 1)
+#define SWP_MIGRATION_NUM 0
#endif
+/*
+ * Handling of hardware poisoned pages with memory corruption.
+ */
+#ifdef CONFIG_MEMORY_FAILURE
+#define SWP_HWPOISON_NUM 1
+#define SWP_HWPOISON MAX_SWAPFILES
+#else
+#define SWP_HWPOISON_NUM 0
+#endif
+
+#define MAX_SWAPFILES \
+ ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+
/*
* Magic header for a swap area. The first part of the union is
* what the swap magic looks like for the old (limited to 128MB)
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
unsigned int swappiness);
+extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+ gfp_t gfp_mask, bool noswap,
+ unsigned int swappiness,
+ struct zone *zone,
+ int nid);
extern int __isolate_lru_page(struct page *page, int mode, int file);
extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
extern void scan_mapping_unevictable_pages(struct address_space *);
extern unsigned long scan_unevictable_pages;
-extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
+extern int scan_unevictable_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern int scan_unevictable_register_node(struct node *node);
extern void scan_unevictable_unregister_node(struct node *node);
#endif
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+ return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_HWPOISON;
+}
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+ return 0;
+}
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
+static inline int non_swap_entry(swp_entry_t entry)
+{
+ return swp_type(entry) >= MAX_SWAPFILES;
+}
+#else
+static inline int non_swap_entry(swp_entry_t entry)
+{
+ return 0;
+}
+#endif
#include <linux/types.h>
#include <linux/compiler.h>
-struct file;
struct completion;
#define CTL_MAXNAME 10 /* how many path components do we allow in a
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen);
-typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp,
+typedef int proc_handler (struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-extern int proc_dostring(struct ctl_table *, int, struct file *,
+extern int proc_dostring(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int proc_dointvec(struct ctl_table *, int, struct file *,
+extern int proc_dointvec(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_minmax(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_jiffies(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_ms_jiffies(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *,
+extern int proc_doulongvec_minmax(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
- struct file *, void __user *, size_t *, loff_t *);
+ void __user *, size_t *, loff_t *);
extern int do_sysctl (int __user *name, int nlen,
void __user *oldval, size_t __user *oldlenp,
struct tms;
extern void do_sys_times(struct tms *);
+/*
+ * Similar to the struct tm in userspace <time.h>, but it needs to be here so
+ * that the kernel source is self contained.
+ */
+struct tm {
+ /*
+ * the number of seconds after the minute, normally in the range
+ * 0 to 59, but can be up to 60 to allow for leap seconds
+ */
+ int tm_sec;
+ /* the number of minutes after the hour, in the range 0 to 59*/
+ int tm_min;
+ /* the number of hours past midnight, in the range 0 to 23 */
+ int tm_hour;
+ /* the day of the month, in the range 1 to 31 */
+ int tm_mday;
+ /* the number of months since January, in the range 0 to 11 */
+ int tm_mon;
+ /* the number of years since 1900 */
+ long tm_year;
+ /* the number of days since Sunday, in the range 0 to 6 */
+ int tm_wday;
+ /* the number of days since January 1, in the range 0 to 365 */
+ int tm_yday;
+};
+
+void time_to_tm(time_t totalsecs, int offset, struct tm *result);
+
/**
* timespec_to_ns - Convert timespec to nanoseconds
* @ts: pointer to the timespec variable to be converted
#ifndef topology_core_id
#define topology_core_id(cpu) ((void)(cpu), 0)
#endif
-#ifndef topology_thread_siblings
-#define topology_thread_siblings(cpu) cpumask_of_cpu(cpu)
-#endif
-#ifndef topology_core_siblings
-#define topology_core_siblings(cpu) cpumask_of_cpu(cpu)
-#endif
#ifndef topology_thread_cpumask
#define topology_thread_cpumask(cpu) cpumask_of(cpu)
#endif
/*
* Tracing hooks
*
- * Copyright (C) 2008 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
/**
* tracehook_notify_jctl - report about job control stop/continue
- * @notify: nonzero if this is the last thread in the group to stop
+ * @notify: zero, %CLD_STOPPED or %CLD_CONTINUED
* @why: %CLD_STOPPED or %CLD_CONTINUED
*
* This is called when we might call do_notify_parent_cldstop().
- * It's called when about to stop for job control; we are already in
- * %TASK_STOPPED state, about to call schedule(). It's also called when
- * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made.
*
- * Return nonzero to generate a %SIGCHLD with @why, which is
- * normal if @notify is nonzero.
+ * @notify is zero if we would not ordinarily send a %SIGCHLD,
+ * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD.
*
- * Called with no locks held.
+ * @why is %CLD_STOPPED when about to stop for job control;
+ * we are already in %TASK_STOPPED state, about to call schedule().
+ * It might also be that we have just exited (check %PF_EXITING),
+ * but need to report that a group-wide stop is complete.
+ *
+ * @why is %CLD_CONTINUED when waking up after job control stop and
+ * ready to make a delayed @notify report.
+ *
+ * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal.
+ *
+ * Called with the siglock held.
*/
static inline int tracehook_notify_jctl(int notify, int why)
{
- return notify || (current->ptrace & PT_PTRACED);
+ return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
+}
+
+/**
+ * tracehook_finish_jctl - report about return from job control stop
+ *
+ * This is called by do_signal_stop() after wakeup.
+ */
+static inline void tracehook_finish_jctl(void)
+{
}
#define DEATH_REAP -1
/*
* Kernel Tracepoint API.
*
- * See Documentation/tracepoint.txt.
+ * See Documentation/trace/tracepoints.txt.
*
* (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
*
#ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H
#define _LINUX_UNALIGNED_BE_BYTESHIFT_H
-#include <linux/kernel.h>
+#include <linux/types.h>
static inline u16 __get_unaligned_be16(const u8 *p)
{
#ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H
#define _LINUX_UNALIGNED_LE_BYTESHIFT_H
-#include <linux/kernel.h>
+#include <linux/types.h>
static inline u16 __get_unaligned_le16(const u8 *p)
{
#define FLAG_FRAMING_AX 0x0040 /* AX88772/178 packets */
#define FLAG_WLAN 0x0080 /* use "wlan%d" names */
#define FLAG_AVOID_UNLINK_URBS 0x0100 /* don't unlink urbs at usbnet_stop() */
+#define FLAG_SEND_ZLP 0x0200 /* hw requires ZLPs are sent */
/* init device ... can sleep, or cause probe() failure */
#include <linux/kref.h>
#include <linux/nsproxy.h>
#include <linux/err.h>
-#include <asm/atomic.h>
struct uts_namespace {
struct kref kref;
/*
- * vgaarb.c
+ * The VGA aribiter manages VGA space routing and VGA resource decode to
+ * allow multiple VGA devices to be used in a system in a safe way.
*
* (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
* (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
extern unsigned long determine_dirtyable_memory(void);
extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
extern int dirty_ratio_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
extern int dirty_bytes_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos);
struct ctl_table;
-struct file;
-int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
+int dirty_writeback_centisecs_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
* @P9_DEBUG_SLABS: memory management tracing
* @P9_DEBUG_FCALL: verbose dump of protocol messages
* @P9_DEBUG_FID: fid allocation/deallocation tracking
+ * @P9_DEBUG_PKT: packet marshalling/unmarshalling
+ * @P9_DEBUG_FSC: FS-cache tracing
*
* These flags are passed at mount time to turn on various levels of
* verbosity and tracing which will be output to the system logs.
P9_DEBUG_FCALL = (1<<8),
P9_DEBUG_FID = (1<<9),
P9_DEBUG_PKT = (1<<10),
+ P9_DEBUG_FSC = (1<<11),
};
#ifdef CONFIG_NET_9P_DEBUG
* fed into the routing cache should use these handlers.
*/
int ipv4_doint_and_flush(ctl_table *ctl, int write,
- struct file* filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos);
int ipv4_doint_and_flush_strategy(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
struct ip_tunnel *next;
struct net_device *dev;
- int recursion; /* Depth of hard_start_xmit recursion */
int err_count; /* Number of arrived ICMP errors */
unsigned long err_time; /* Time when the last ICMP error arrived */
#include <net/neighbour.h>
struct ctl_table;
-struct file;
struct inet6_dev;
struct net_device;
struct net_proto_family;
#ifdef CONFIG_SYSCTL
extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl,
int write,
- struct file * filp,
void __user *buffer,
size_t *lenp,
loff_t *ppos);
SLUB sysfs support. /sys/slab will not exist and there will be
no support for cache validation etc.
-config STRIP_ASM_SYMS
- bool "Strip assembler-generated symbols during link"
- default n
- help
- Strip internal assembler-generated symbols during a link (symbols
- that look like '.Lxxx') so they don't pollute the output of
- get_wchan() and suchlike.
-
config COMPAT_BRK
bool "Disable heap randomization"
default y
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/delay.h>
-#include <linux/utsname.h>
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
#else
-#if NR_CPUS > BITS_PER_LONG
-cpumask_t cpu_mask_all __read_mostly = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_mask_all);
-#endif
-
/* Setup number of possible processor ids */
int nr_cpu_ids __read_mostly = NR_CPUS;
EXPORT_SYMBOL(nr_cpu_ids);
}
#ifdef CONFIG_PROC_SYSCTL
-static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
+static int proc_ipc_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
- return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
+ return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
}
static int proc_ipc_callback_dointvec(ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
size_t lenp_bef = *lenp;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
- rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
+ rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
if (write && !rc && lenp_bef == *lenp)
/*
}
static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
- return proc_doulongvec_minmax(&ipc_table, write, filp, buffer,
+ return proc_doulongvec_minmax(&ipc_table, write, buffer,
lenp, ppos);
}
}
static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
size_t lenp_bef = *lenp;
ipc_table.data = get_ipc(table);
oldval = *((int *)(ipc_table.data));
- rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
+ rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
if (write && !rc && lenp_bef == *lenp) {
int newval = *((int *)(ipc_table.data));
return which;
}
-static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
+static int proc_mq_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table mq_table;
memcpy(&mq_table, table, sizeof(mq_table));
mq_table.data = get_mq(table);
- return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos);
+ return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
}
static int proc_mq_dointvec_minmax(ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table mq_table;
memcpy(&mq_table, table, sizeof(mq_table));
mq_table.data = get_mq(table);
- return proc_dointvec_minmax(&mq_table, write, filp, buffer,
+ return proc_dointvec_minmax(&mq_table, write, buffer,
lenp, ppos);
}
#else
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup.o
-obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
break;
}
case AUDIT_SIGNAL_INFO:
- err = security_secid_to_secctx(audit_sig_sid, &ctx, &len);
- if (err)
- return err;
+ len = 0;
+ if (audit_sig_sid) {
+ err = security_secid_to_secctx(audit_sig_sid, &ctx, &len);
+ if (err)
+ return err;
+ }
sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
if (!sig_data) {
- security_release_secctx(ctx, len);
+ if (audit_sig_sid)
+ security_release_secctx(ctx, len);
return -ENOMEM;
}
sig_data->uid = audit_sig_uid;
sig_data->pid = audit_sig_pid;
- memcpy(sig_data->ctx, ctx, len);
- security_release_secctx(ctx, len);
+ if (audit_sig_sid) {
+ memcpy(sig_data->ctx, ctx, len);
+ security_release_secctx(ctx, len);
+ }
audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO,
0, 0, sig_data, sizeof(*sig_data) + len);
kfree(sig_data);
struct audit_watch {
atomic_t count; /* reference count */
- char *path; /* insertion path */
dev_t dev; /* associated superblock device */
+ char *path; /* insertion path */
unsigned long ino; /* associated inode number */
struct audit_parent *parent; /* associated parent */
struct list_head wlist; /* entry in parent->watches list */
int in_syscall; /* 1 if task is in a syscall */
enum audit_state state, current_state;
unsigned int serial; /* serial number for record */
- struct timespec ctime; /* time of syscall entry */
int major; /* syscall number */
+ struct timespec ctime; /* time of syscall entry */
unsigned long argv[4]; /* syscall arguments */
- int return_valid; /* return code is valid */
long return_code;/* syscall return code */
u64 prio;
+ int return_valid; /* return code is valid */
int name_count;
struct audit_names names[AUDIT_NAMES];
char * filterkey; /* key for rule that triggered record */
char target_comm[TASK_COMM_LEN];
struct audit_tree_refs *trees, *first_trees;
- int tree_count;
struct list_head killed_trees;
+ int tree_count;
int type;
union {
*/
#include <linux/cgroup.h>
+#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/namei.h>
#include <linux/smp_lock.h>
#include <linux/pid_namespace.h>
+#include <linux/idr.h>
+#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
#include <asm/atomic.h>
#include <linux/cgroup_subsys.h>
};
+#define MAX_CGROUP_ROOT_NAMELEN 64
+
/*
* A cgroupfs_root represents the root of a cgroup hierarchy,
* and may be associated with a superblock to form an active
*/
unsigned long subsys_bits;
+ /* Unique id for this hierarchy. */
+ int hierarchy_id;
+
/* The bitmask of subsystems currently attached to this hierarchy */
unsigned long actual_subsys_bits;
/* The path to use for release notifications. */
char release_agent_path[PATH_MAX];
+
+ /* The name for this hierarchy - may be empty */
+ char name[MAX_CGROUP_ROOT_NAMELEN];
};
/*
static LIST_HEAD(roots);
static int root_count;
+static DEFINE_IDA(hierarchy_ida);
+static int next_hierarchy_id;
+static DEFINE_SPINLOCK(hierarchy_id_lock);
+
/* dummytop is a shorthand for the dummy hierarchy's top cgroup */
#define dummytop (&rootnode.top_cgroup)
* cgroup, anchored on cgroup->css_sets
*/
struct list_head cgrp_link_list;
+ struct cgroup *cgrp;
/*
* List running through cg_cgroup_links pointing at a
* single css_set object, anchored on css_set->cg_links
static DEFINE_RWLOCK(css_set_lock);
static int css_set_count;
-/* hash table for cgroup groups. This improves the performance to
- * find an existing css_set */
+/*
+ * hash table for cgroup groups. This improves the performance to find
+ * an existing css_set. This hash doesn't (currently) take into
+ * account cgroups in empty hierarchies.
+ */
#define CSS_SET_HASH_BITS 7
#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
return &css_set_table[index];
}
+static void free_css_set_rcu(struct rcu_head *obj)
+{
+ struct css_set *cg = container_of(obj, struct css_set, rcu_head);
+ kfree(cg);
+}
+
/* We don't maintain the lists running through each css_set to its
* task until after the first call to cgroup_iter_start(). This
* reduces the fork()/exit() overhead for people who have cgroups
* compiled into their kernel but not actually in use */
static int use_task_css_set_links __read_mostly;
-/* When we create or destroy a css_set, the operation simply
- * takes/releases a reference count on all the cgroups referenced
- * by subsystems in this css_set. This can end up multiple-counting
- * some cgroups, but that's OK - the ref-count is just a
- * busy/not-busy indicator; ensuring that we only count each cgroup
- * once would require taking a global lock to ensure that no
- * subsystems moved between hierarchies while we were doing so.
- *
- * Possible TODO: decide at boot time based on the number of
- * registered subsystems and the number of CPUs or NUMA nodes whether
- * it's better for performance to ref-count every subsystem, or to
- * take a global lock and only add one ref count to each hierarchy.
- */
-
-/*
- * unlink a css_set from the list and free it
- */
-static void unlink_css_set(struct css_set *cg)
+static void __put_css_set(struct css_set *cg, int taskexit)
{
struct cg_cgroup_link *link;
struct cg_cgroup_link *saved_link;
-
- hlist_del(&cg->hlist);
- css_set_count--;
-
- list_for_each_entry_safe(link, saved_link, &cg->cg_links,
- cg_link_list) {
- list_del(&link->cg_link_list);
- list_del(&link->cgrp_link_list);
- kfree(link);
- }
-}
-
-static void __put_css_set(struct css_set *cg, int taskexit)
-{
- int i;
/*
* Ensure that the refcount doesn't hit zero while any readers
* can see it. Similar to atomic_dec_and_lock(), but for an
write_unlock(&css_set_lock);
return;
}
- unlink_css_set(cg);
- write_unlock(&css_set_lock);
- rcu_read_lock();
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
- struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
+ /* This css_set is dead. unlink it and release cgroup refcounts */
+ hlist_del(&cg->hlist);
+ css_set_count--;
+
+ list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+ cg_link_list) {
+ struct cgroup *cgrp = link->cgrp;
+ list_del(&link->cg_link_list);
+ list_del(&link->cgrp_link_list);
if (atomic_dec_and_test(&cgrp->count) &&
notify_on_release(cgrp)) {
if (taskexit)
set_bit(CGRP_RELEASABLE, &cgrp->flags);
check_for_release(cgrp);
}
+
+ kfree(link);
}
- rcu_read_unlock();
- kfree(cg);
+
+ write_unlock(&css_set_lock);
+ call_rcu(&cg->rcu_head, free_css_set_rcu);
}
/*
__put_css_set(cg, 1);
}
+/*
+ * compare_css_sets - helper function for find_existing_css_set().
+ * @cg: candidate css_set being tested
+ * @old_cg: existing css_set for a task
+ * @new_cgrp: cgroup that's being entered by the task
+ * @template: desired set of css pointers in css_set (pre-calculated)
+ *
+ * Returns true if "cg" matches "old_cg" except for the hierarchy
+ * which "new_cgrp" belongs to, for which it should match "new_cgrp".
+ */
+static bool compare_css_sets(struct css_set *cg,
+ struct css_set *old_cg,
+ struct cgroup *new_cgrp,
+ struct cgroup_subsys_state *template[])
+{
+ struct list_head *l1, *l2;
+
+ if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
+ /* Not all subsystems matched */
+ return false;
+ }
+
+ /*
+ * Compare cgroup pointers in order to distinguish between
+ * different cgroups in heirarchies with no subsystems. We
+ * could get by with just this check alone (and skip the
+ * memcmp above) but on most setups the memcmp check will
+ * avoid the need for this more expensive check on almost all
+ * candidates.
+ */
+
+ l1 = &cg->cg_links;
+ l2 = &old_cg->cg_links;
+ while (1) {
+ struct cg_cgroup_link *cgl1, *cgl2;
+ struct cgroup *cg1, *cg2;
+
+ l1 = l1->next;
+ l2 = l2->next;
+ /* See if we reached the end - both lists are equal length. */
+ if (l1 == &cg->cg_links) {
+ BUG_ON(l2 != &old_cg->cg_links);
+ break;
+ } else {
+ BUG_ON(l2 == &old_cg->cg_links);
+ }
+ /* Locate the cgroups associated with these links. */
+ cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
+ cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
+ cg1 = cgl1->cgrp;
+ cg2 = cgl2->cgrp;
+ /* Hierarchies should be linked in the same order. */
+ BUG_ON(cg1->root != cg2->root);
+
+ /*
+ * If this hierarchy is the hierarchy of the cgroup
+ * that's changing, then we need to check that this
+ * css_set points to the new cgroup; if it's any other
+ * hierarchy, then this css_set should point to the
+ * same cgroup as the old css_set.
+ */
+ if (cg1->root == new_cgrp->root) {
+ if (cg1 != new_cgrp)
+ return false;
+ } else {
+ if (cg1 != cg2)
+ return false;
+ }
+ }
+ return true;
+}
+
/*
* find_existing_css_set() is a helper for
* find_css_set(), and checks to see whether an existing
hhead = css_set_hash(template);
hlist_for_each_entry(cg, node, hhead, hlist) {
- if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
- /* All subsystems matched */
- return cg;
- }
+ if (!compare_css_sets(cg, oldcg, cgrp, template))
+ continue;
+
+ /* This css_set matches what we need */
+ return cg;
}
/* No existing cgroup group matched */
link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
cgrp_link_list);
link->cg = cg;
+ link->cgrp = cgrp;
+ atomic_inc(&cgrp->count);
list_move(&link->cgrp_link_list, &cgrp->css_sets);
- list_add(&link->cg_link_list, &cg->cg_links);
+ /*
+ * Always add links to the tail of the list so that the list
+ * is sorted by order of hierarchy creation
+ */
+ list_add_tail(&link->cg_link_list, &cg->cg_links);
}
/*
{
struct css_set *res;
struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
- int i;
struct list_head tmp_cg_links;
struct hlist_head *hhead;
+ struct cg_cgroup_link *link;
/* First see if we already have a cgroup group that matches
* the desired set */
write_lock(&css_set_lock);
/* Add reference counts and links from the new css_set. */
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
- struct cgroup *cgrp = res->subsys[i]->cgroup;
- struct cgroup_subsys *ss = subsys[i];
- atomic_inc(&cgrp->count);
- /*
- * We want to add a link once per cgroup, so we
- * only do it for the first subsystem in each
- * hierarchy
- */
- if (ss->root->subsys_list.next == &ss->sibling)
- link_css_set(&tmp_cg_links, res, cgrp);
+ list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
+ struct cgroup *c = link->cgrp;
+ if (c->root == cgrp->root)
+ c = cgrp;
+ link_css_set(&tmp_cg_links, res, c);
}
- if (list_empty(&rootnode.subsys_list))
- link_css_set(&tmp_cg_links, res, dummytop);
BUG_ON(!list_empty(&tmp_cg_links));
return res;
}
+/*
+ * Return the cgroup for "task" from the given hierarchy. Must be
+ * called with cgroup_mutex held.
+ */
+static struct cgroup *task_cgroup_from_root(struct task_struct *task,
+ struct cgroupfs_root *root)
+{
+ struct css_set *css;
+ struct cgroup *res = NULL;
+
+ BUG_ON(!mutex_is_locked(&cgroup_mutex));
+ read_lock(&css_set_lock);
+ /*
+ * No need to lock the task - since we hold cgroup_mutex the
+ * task can't change groups, so the only thing that can happen
+ * is that it exits and its css is set back to init_css_set.
+ */
+ css = task->cgroups;
+ if (css == &init_css_set) {
+ res = &root->top_cgroup;
+ } else {
+ struct cg_cgroup_link *link;
+ list_for_each_entry(link, &css->cg_links, cg_link_list) {
+ struct cgroup *c = link->cgrp;
+ if (c->root == root) {
+ res = c;
+ break;
+ }
+ }
+ }
+ read_unlock(&css_set_lock);
+ BUG_ON(!res);
+ return res;
+}
+
/*
* There is one global cgroup mutex. We also require taking
* task_lock() when dereferencing a task's cgroup subsys pointers.
*/
deactivate_super(cgrp->root->sb);
+ /*
+ * if we're getting rid of the cgroup, refcount should ensure
+ * that there are no pidlists left.
+ */
+ BUG_ON(!list_empty(&cgrp->pidlists));
+
call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
}
iput(inode);
seq_puts(seq, ",noprefix");
if (strlen(root->release_agent_path))
seq_printf(seq, ",release_agent=%s", root->release_agent_path);
+ if (strlen(root->name))
+ seq_printf(seq, ",name=%s", root->name);
mutex_unlock(&cgroup_mutex);
return 0;
}
unsigned long subsys_bits;
unsigned long flags;
char *release_agent;
+ char *name;
+ /* User explicitly requested empty subsystem */
+ bool none;
+
+ struct cgroupfs_root *new_root;
+
};
/* Convert a hierarchy specifier into a bitmask of subsystems and
mask = ~(1UL << cpuset_subsys_id);
#endif
- opts->subsys_bits = 0;
- opts->flags = 0;
- opts->release_agent = NULL;
+ memset(opts, 0, sizeof(*opts));
while ((token = strsep(&o, ",")) != NULL) {
if (!*token)
if (!ss->disabled)
opts->subsys_bits |= 1ul << i;
}
+ } else if (!strcmp(token, "none")) {
+ /* Explicitly have no subsystems */
+ opts->none = true;
} else if (!strcmp(token, "noprefix")) {
set_bit(ROOT_NOPREFIX, &opts->flags);
} else if (!strncmp(token, "release_agent=", 14)) {
/* Specifying two release agents is forbidden */
if (opts->release_agent)
return -EINVAL;
- opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);
+ opts->release_agent =
+ kstrndup(token + 14, PATH_MAX, GFP_KERNEL);
if (!opts->release_agent)
return -ENOMEM;
- strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
- opts->release_agent[PATH_MAX - 1] = 0;
+ } else if (!strncmp(token, "name=", 5)) {
+ int i;
+ const char *name = token + 5;
+ /* Can't specify an empty name */
+ if (!strlen(name))
+ return -EINVAL;
+ /* Must match [\w.-]+ */
+ for (i = 0; i < strlen(name); i++) {
+ char c = name[i];
+ if (isalnum(c))
+ continue;
+ if ((c == '.') || (c == '-') || (c == '_'))
+ continue;
+ return -EINVAL;
+ }
+ /* Specifying two names is forbidden */
+ if (opts->name)
+ return -EINVAL;
+ opts->name = kstrndup(name,
+ MAX_CGROUP_ROOT_NAMELEN,
+ GFP_KERNEL);
+ if (!opts->name)
+ return -ENOMEM;
} else {
struct cgroup_subsys *ss;
int i;
}
}
+ /* Consistency checks */
+
/*
* Option noprefix was introduced just for backward compatibility
* with the old cpuset, so we allow noprefix only if mounting just
(opts->subsys_bits & mask))
return -EINVAL;
- /* We can't have an empty hierarchy */
- if (!opts->subsys_bits)
+
+ /* Can't specify "none" and some subsystems */
+ if (opts->subsys_bits && opts->none)
+ return -EINVAL;
+
+ /*
+ * We either have to specify by name or by subsystems. (So all
+ * empty hierarchies must have a name).
+ */
+ if (!opts->subsys_bits && !opts->name)
return -EINVAL;
return 0;
goto out_unlock;
}
+ /* Don't allow name to change at remount */
+ if (opts.name && strcmp(opts.name, root->name)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
ret = rebind_subsystems(root, opts.subsys_bits);
if (ret)
goto out_unlock;
strcpy(root->release_agent_path, opts.release_agent);
out_unlock:
kfree(opts.release_agent);
+ kfree(opts.name);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
unlock_kernel();
INIT_LIST_HEAD(&cgrp->children);
INIT_LIST_HEAD(&cgrp->css_sets);
INIT_LIST_HEAD(&cgrp->release_list);
- INIT_LIST_HEAD(&cgrp->pids_list);
- init_rwsem(&cgrp->pids_mutex);
+ INIT_LIST_HEAD(&cgrp->pidlists);
+ mutex_init(&cgrp->pidlist_mutex);
}
+
static void init_cgroup_root(struct cgroupfs_root *root)
{
struct cgroup *cgrp = &root->top_cgroup;
init_cgroup_housekeeping(cgrp);
}
+static bool init_root_id(struct cgroupfs_root *root)
+{
+ int ret = 0;
+
+ do {
+ if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
+ return false;
+ spin_lock(&hierarchy_id_lock);
+ /* Try to allocate the next unused ID */
+ ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
+ &root->hierarchy_id);
+ if (ret == -ENOSPC)
+ /* Try again starting from 0 */
+ ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
+ if (!ret) {
+ next_hierarchy_id = root->hierarchy_id + 1;
+ } else if (ret != -EAGAIN) {
+ /* Can only get here if the 31-bit IDR is full ... */
+ BUG_ON(ret);
+ }
+ spin_unlock(&hierarchy_id_lock);
+ } while (ret);
+ return true;
+}
+
static int cgroup_test_super(struct super_block *sb, void *data)
{
- struct cgroupfs_root *new = data;
+ struct cgroup_sb_opts *opts = data;
struct cgroupfs_root *root = sb->s_fs_info;
- /* First check subsystems */
- if (new->subsys_bits != root->subsys_bits)
- return 0;
+ /* If we asked for a name then it must match */
+ if (opts->name && strcmp(opts->name, root->name))
+ return 0;
- /* Next check flags */
- if (new->flags != root->flags)
+ /*
+ * If we asked for subsystems (or explicitly for no
+ * subsystems) then they must match
+ */
+ if ((opts->subsys_bits || opts->none)
+ && (opts->subsys_bits != root->subsys_bits))
return 0;
return 1;
}
+static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
+{
+ struct cgroupfs_root *root;
+
+ if (!opts->subsys_bits && !opts->none)
+ return NULL;
+
+ root = kzalloc(sizeof(*root), GFP_KERNEL);
+ if (!root)
+ return ERR_PTR(-ENOMEM);
+
+ if (!init_root_id(root)) {
+ kfree(root);
+ return ERR_PTR(-ENOMEM);
+ }
+ init_cgroup_root(root);
+
+ root->subsys_bits = opts->subsys_bits;
+ root->flags = opts->flags;
+ if (opts->release_agent)
+ strcpy(root->release_agent_path, opts->release_agent);
+ if (opts->name)
+ strcpy(root->name, opts->name);
+ return root;
+}
+
+static void cgroup_drop_root(struct cgroupfs_root *root)
+{
+ if (!root)
+ return;
+
+ BUG_ON(!root->hierarchy_id);
+ spin_lock(&hierarchy_id_lock);
+ ida_remove(&hierarchy_ida, root->hierarchy_id);
+ spin_unlock(&hierarchy_id_lock);
+ kfree(root);
+}
+
static int cgroup_set_super(struct super_block *sb, void *data)
{
int ret;
- struct cgroupfs_root *root = data;
+ struct cgroup_sb_opts *opts = data;
+
+ /* If we don't have a new root, we can't set up a new sb */
+ if (!opts->new_root)
+ return -EINVAL;
+
+ BUG_ON(!opts->subsys_bits && !opts->none);
ret = set_anon_super(sb, NULL);
if (ret)
return ret;
- sb->s_fs_info = root;
- root->sb = sb;
+ sb->s_fs_info = opts->new_root;
+ opts->new_root->sb = sb;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
void *data, struct vfsmount *mnt)
{
struct cgroup_sb_opts opts;
+ struct cgroupfs_root *root;
int ret = 0;
struct super_block *sb;
- struct cgroupfs_root *root;
- struct list_head tmp_cg_links;
+ struct cgroupfs_root *new_root;
/* First find the desired set of subsystems */
ret = parse_cgroupfs_options(data, &opts);
- if (ret) {
- kfree(opts.release_agent);
- return ret;
- }
-
- root = kzalloc(sizeof(*root), GFP_KERNEL);
- if (!root) {
- kfree(opts.release_agent);
- return -ENOMEM;
- }
+ if (ret)
+ goto out_err;
- init_cgroup_root(root);
- root->subsys_bits = opts.subsys_bits;
- root->flags = opts.flags;
- if (opts.release_agent) {
- strcpy(root->release_agent_path, opts.release_agent);
- kfree(opts.release_agent);
+ /*
+ * Allocate a new cgroup root. We may not need it if we're
+ * reusing an existing hierarchy.
+ */
+ new_root = cgroup_root_from_opts(&opts);
+ if (IS_ERR(new_root)) {
+ ret = PTR_ERR(new_root);
+ goto out_err;
}
+ opts.new_root = new_root;
- sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
-
+ /* Locate an existing or new sb for this hierarchy */
+ sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
if (IS_ERR(sb)) {
- kfree(root);
- return PTR_ERR(sb);
+ ret = PTR_ERR(sb);
+ cgroup_drop_root(opts.new_root);
+ goto out_err;
}
- if (sb->s_fs_info != root) {
- /* Reusing an existing superblock */
- BUG_ON(sb->s_root == NULL);
- kfree(root);
- root = NULL;
- } else {
- /* New superblock */
+ root = sb->s_fs_info;
+ BUG_ON(!root);
+ if (root == opts.new_root) {
+ /* We used the new root structure, so this is a new hierarchy */
+ struct list_head tmp_cg_links;
struct cgroup *root_cgrp = &root->top_cgroup;
struct inode *inode;
+ struct cgroupfs_root *existing_root;
int i;
BUG_ON(sb->s_root != NULL);
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
+ if (strlen(root->name)) {
+ /* Check for name clashes with existing mounts */
+ for_each_active_root(existing_root) {
+ if (!strcmp(existing_root->name, root->name)) {
+ ret = -EBUSY;
+ mutex_unlock(&cgroup_mutex);
+ mutex_unlock(&inode->i_mutex);
+ goto drop_new_super;
+ }
+ }
+ }
+
/*
* We're accessing css_set_count without locking
* css_set_lock here, but that's OK - it can only be
if (ret == -EBUSY) {
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
- goto free_cg_links;
+ free_cg_links(&tmp_cg_links);
+ goto drop_new_super;
}
/* EBUSY should be the only error here */
BUG_ON(root->number_of_cgroups != 1);
cgroup_populate_dir(root_cgrp);
- mutex_unlock(&inode->i_mutex);
mutex_unlock(&cgroup_mutex);
+ mutex_unlock(&inode->i_mutex);
+ } else {
+ /*
+ * We re-used an existing hierarchy - the new root (if
+ * any) is not needed
+ */
+ cgroup_drop_root(opts.new_root);
}
simple_set_mnt(mnt, sb);
+ kfree(opts.release_agent);
+ kfree(opts.name);
return 0;
- free_cg_links:
- free_cg_links(&tmp_cg_links);
drop_new_super:
deactivate_locked_super(sb);
+ out_err:
+ kfree(opts.release_agent);
+ kfree(opts.name);
+
return ret;
}
mutex_unlock(&cgroup_mutex);
kill_litter_super(sb);
- kfree(root);
+ cgroup_drop_root(root);
}
static struct file_system_type cgroup_fs_type = {
return 0;
}
-/*
- * Return the first subsystem attached to a cgroup's hierarchy, and
- * its subsystem id.
- */
-
-static void get_first_subsys(const struct cgroup *cgrp,
- struct cgroup_subsys_state **css, int *subsys_id)
-{
- const struct cgroupfs_root *root = cgrp->root;
- const struct cgroup_subsys *test_ss;
- BUG_ON(list_empty(&root->subsys_list));
- test_ss = list_entry(root->subsys_list.next,
- struct cgroup_subsys, sibling);
- if (css) {
- *css = cgrp->subsys[test_ss->subsys_id];
- BUG_ON(!*css);
- }
- if (subsys_id)
- *subsys_id = test_ss->subsys_id;
-}
-
/**
* cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
* @cgrp: the cgroup the task is attaching to
struct css_set *cg;
struct css_set *newcg;
struct cgroupfs_root *root = cgrp->root;
- int subsys_id;
-
- get_first_subsys(cgrp, NULL, &subsys_id);
/* Nothing to do if the task is already in that cgroup */
- oldcgrp = task_cgroup(tsk, subsys_id);
+ oldcgrp = task_cgroup_from_root(tsk, root);
if (cgrp == oldcgrp)
return 0;
for_each_subsys(root, ss) {
if (ss->can_attach) {
- retval = ss->can_attach(ss, cgrp, tsk);
+ retval = ss->can_attach(ss, cgrp, tsk, false);
if (retval)
return retval;
}
for_each_subsys(root, ss) {
if (ss->attach)
- ss->attach(ss, cgrp, oldcgrp, tsk);
+ ss->attach(ss, cgrp, oldcgrp, tsk, false);
}
set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
synchronize_rcu();
return ret;
}
-/* The various types of files and directories in a cgroup file system */
-enum cgroup_filetype {
- FILE_ROOT,
- FILE_DIR,
- FILE_TASKLIST,
- FILE_NOTIFY_ON_RELEASE,
- FILE_RELEASE_AGENT,
-};
-
/**
* cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
* @cgrp: the cgroup to be checked for liveness
* the start of a css_set
*/
static void cgroup_advance_iter(struct cgroup *cgrp,
- struct cgroup_iter *it)
+ struct cgroup_iter *it)
{
struct list_head *l = it->cg_link;
struct cg_cgroup_link *link;
}
/*
- * Stuff for reading the 'tasks' file.
+ * Stuff for reading the 'tasks'/'procs' files.
*
* Reading this file can return large amounts of data if a cgroup has
* *lots* of attached tasks. So it may need several calls to read(),
*/
/*
- * Load into 'pidarray' up to 'npids' of the tasks using cgroup
- * 'cgrp'. Return actual number of pids loaded. No need to
- * task_lock(p) when reading out p->cgroup, since we're in an RCU
- * read section, so the css_set can't go away, and is
- * immutable after creation.
+ * The following two functions "fix" the issue where there are more pids
+ * than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
+ * TODO: replace with a kernel-wide solution to this problem
+ */
+#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
+static void *pidlist_allocate(int count)
+{
+ if (PIDLIST_TOO_LARGE(count))
+ return vmalloc(count * sizeof(pid_t));
+ else
+ return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
+}
+static void pidlist_free(void *p)
+{
+ if (is_vmalloc_addr(p))
+ vfree(p);
+ else
+ kfree(p);
+}
+static void *pidlist_resize(void *p, int newcount)
+{
+ void *newlist;
+ /* note: if new alloc fails, old p will still be valid either way */
+ if (is_vmalloc_addr(p)) {
+ newlist = vmalloc(newcount * sizeof(pid_t));
+ if (!newlist)
+ return NULL;
+ memcpy(newlist, p, newcount * sizeof(pid_t));
+ vfree(p);
+ } else {
+ newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
+ }
+ return newlist;
+}
+
+/*
+ * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
+ * If the new stripped list is sufficiently smaller and there's enough memory
+ * to allocate a new buffer, will let go of the unneeded memory. Returns the
+ * number of unique elements.
+ */
+/* is the size difference enough that we should re-allocate the array? */
+#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
+static int pidlist_uniq(pid_t **p, int length)
+{
+ int src, dest = 1;
+ pid_t *list = *p;
+ pid_t *newlist;
+
+ /*
+ * we presume the 0th element is unique, so i starts at 1. trivial
+ * edge cases first; no work needs to be done for either
+ */
+ if (length == 0 || length == 1)
+ return length;
+ /* src and dest walk down the list; dest counts unique elements */
+ for (src = 1; src < length; src++) {
+ /* find next unique element */
+ while (list[src] == list[src-1]) {
+ src++;
+ if (src == length)
+ goto after;
+ }
+ /* dest always points to where the next unique element goes */
+ list[dest] = list[src];
+ dest++;
+ }
+after:
+ /*
+ * if the length difference is large enough, we want to allocate a
+ * smaller buffer to save memory. if this fails due to out of memory,
+ * we'll just stay with what we've got.
+ */
+ if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
+ newlist = pidlist_resize(list, dest);
+ if (newlist)
+ *p = newlist;
+ }
+ return dest;
+}
+
+static int cmppid(const void *a, const void *b)
+{
+ return *(pid_t *)a - *(pid_t *)b;
+}
+
+/*
+ * find the appropriate pidlist for our purpose (given procs vs tasks)
+ * returns with the lock on that pidlist already held, and takes care
+ * of the use count, or returns NULL with no locks held if we're out of
+ * memory.
*/
-static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
+static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
+ enum cgroup_filetype type)
{
- int n = 0, pid;
+ struct cgroup_pidlist *l;
+ /* don't need task_nsproxy() if we're looking at ourself */
+ struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns);
+ /*
+ * We can't drop the pidlist_mutex before taking the l->mutex in case
+ * the last ref-holder is trying to remove l from the list at the same
+ * time. Holding the pidlist_mutex precludes somebody taking whichever
+ * list we find out from under us - compare release_pid_array().
+ */
+ mutex_lock(&cgrp->pidlist_mutex);
+ list_for_each_entry(l, &cgrp->pidlists, links) {
+ if (l->key.type == type && l->key.ns == ns) {
+ /* found a matching list - drop the extra refcount */
+ put_pid_ns(ns);
+ /* make sure l doesn't vanish out from under us */
+ down_write(&l->mutex);
+ mutex_unlock(&cgrp->pidlist_mutex);
+ l->use_count++;
+ return l;
+ }
+ }
+ /* entry not found; create a new one */
+ l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
+ if (!l) {
+ mutex_unlock(&cgrp->pidlist_mutex);
+ put_pid_ns(ns);
+ return l;
+ }
+ init_rwsem(&l->mutex);
+ down_write(&l->mutex);
+ l->key.type = type;
+ l->key.ns = ns;
+ l->use_count = 0; /* don't increment here */
+ l->list = NULL;
+ l->owner = cgrp;
+ list_add(&l->links, &cgrp->pidlists);
+ mutex_unlock(&cgrp->pidlist_mutex);
+ return l;
+}
+
+/*
+ * Load a cgroup's pidarray with either procs' tgids or tasks' pids
+ */
+static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
+ struct cgroup_pidlist **lp)
+{
+ pid_t *array;
+ int length;
+ int pid, n = 0; /* used for populating the array */
struct cgroup_iter it;
struct task_struct *tsk;
+ struct cgroup_pidlist *l;
+
+ /*
+ * If cgroup gets more users after we read count, we won't have
+ * enough space - tough. This race is indistinguishable to the
+ * caller from the case that the additional cgroup users didn't
+ * show up until sometime later on.
+ */
+ length = cgroup_task_count(cgrp);
+ array = pidlist_allocate(length);
+ if (!array)
+ return -ENOMEM;
+ /* now, populate the array */
cgroup_iter_start(cgrp, &it);
while ((tsk = cgroup_iter_next(cgrp, &it))) {
- if (unlikely(n == npids))
+ if (unlikely(n == length))
break;
- pid = task_pid_vnr(tsk);
- if (pid > 0)
- pidarray[n++] = pid;
+ /* get tgid or pid for procs or tasks file respectively */
+ if (type == CGROUP_FILE_PROCS)
+ pid = task_tgid_vnr(tsk);
+ else
+ pid = task_pid_vnr(tsk);
+ if (pid > 0) /* make sure to only use valid results */
+ array[n++] = pid;
}
cgroup_iter_end(cgrp, &it);
- return n;
+ length = n;
+ /* now sort & (if procs) strip out duplicates */
+ sort(array, length, sizeof(pid_t), cmppid, NULL);
+ if (type == CGROUP_FILE_PROCS)
+ length = pidlist_uniq(&array, length);
+ l = cgroup_pidlist_find(cgrp, type);
+ if (!l) {
+ pidlist_free(array);
+ return -ENOMEM;
+ }
+ /* store array, freeing old if necessary - lock already held */
+ pidlist_free(l->list);
+ l->list = array;
+ l->length = length;
+ l->use_count++;
+ up_write(&l->mutex);
+ *lp = l;
+ return 0;
}
/**
return ret;
}
-/*
- * Cache pids for all threads in the same pid namespace that are
- * opening the same "tasks" file.
- */
-struct cgroup_pids {
- /* The node in cgrp->pids_list */
- struct list_head list;
- /* The cgroup those pids belong to */
- struct cgroup *cgrp;
- /* The namepsace those pids belong to */
- struct pid_namespace *ns;
- /* Array of process ids in the cgroup */
- pid_t *tasks_pids;
- /* How many files are using the this tasks_pids array */
- int use_count;
- /* Length of the current tasks_pids array */
- int length;
-};
-
-static int cmppid(const void *a, const void *b)
-{
- return *(pid_t *)a - *(pid_t *)b;
-}
/*
- * seq_file methods for the "tasks" file. The seq_file position is the
+ * seq_file methods for the tasks/procs files. The seq_file position is the
* next pid to display; the seq_file iterator is a pointer to the pid
- * in the cgroup->tasks_pids array.
+ * in the cgroup->l->list array.
*/
-static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
+static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
{
/*
* Initially we receive a position value that corresponds to
* after a seek to the start). Use a binary-search to find the
* next pid to display, if any
*/
- struct cgroup_pids *cp = s->private;
- struct cgroup *cgrp = cp->cgrp;
+ struct cgroup_pidlist *l = s->private;
int index = 0, pid = *pos;
int *iter;
- down_read(&cgrp->pids_mutex);
+ down_read(&l->mutex);
if (pid) {
- int end = cp->length;
+ int end = l->length;
while (index < end) {
int mid = (index + end) / 2;
- if (cp->tasks_pids[mid] == pid) {
+ if (l->list[mid] == pid) {
index = mid;
break;
- } else if (cp->tasks_pids[mid] <= pid)
+ } else if (l->list[mid] <= pid)
index = mid + 1;
else
end = mid;
}
}
/* If we're off the end of the array, we're done */
- if (index >= cp->length)
+ if (index >= l->length)
return NULL;
/* Update the abstract position to be the actual pid that we found */
- iter = cp->tasks_pids + index;
+ iter = l->list + index;
*pos = *iter;
return iter;
}
-static void cgroup_tasks_stop(struct seq_file *s, void *v)
+static void cgroup_pidlist_stop(struct seq_file *s, void *v)
{
- struct cgroup_pids *cp = s->private;
- struct cgroup *cgrp = cp->cgrp;
- up_read(&cgrp->pids_mutex);
+ struct cgroup_pidlist *l = s->private;
+ up_read(&l->mutex);
}
-static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct cgroup_pids *cp = s->private;
- int *p = v;
- int *end = cp->tasks_pids + cp->length;
-
+ struct cgroup_pidlist *l = s->private;
+ pid_t *p = v;
+ pid_t *end = l->list + l->length;
/*
* Advance to the next pid in the array. If this goes off the
* end, we're done
}
}
-static int cgroup_tasks_show(struct seq_file *s, void *v)
+static int cgroup_pidlist_show(struct seq_file *s, void *v)
{
return seq_printf(s, "%d\n", *(int *)v);
}
-static const struct seq_operations cgroup_tasks_seq_operations = {
- .start = cgroup_tasks_start,
- .stop = cgroup_tasks_stop,
- .next = cgroup_tasks_next,
- .show = cgroup_tasks_show,
+/*
+ * seq_operations functions for iterating on pidlists through seq_file -
+ * independent of whether it's tasks or procs
+ */
+static const struct seq_operations cgroup_pidlist_seq_operations = {
+ .start = cgroup_pidlist_start,
+ .stop = cgroup_pidlist_stop,
+ .next = cgroup_pidlist_next,
+ .show = cgroup_pidlist_show,
};
-static void release_cgroup_pid_array(struct cgroup_pids *cp)
+static void cgroup_release_pid_array(struct cgroup_pidlist *l)
{
- struct cgroup *cgrp = cp->cgrp;
-
- down_write(&cgrp->pids_mutex);
- BUG_ON(!cp->use_count);
- if (!--cp->use_count) {
- list_del(&cp->list);
- put_pid_ns(cp->ns);
- kfree(cp->tasks_pids);
- kfree(cp);
+ /*
+ * the case where we're the last user of this particular pidlist will
+ * have us remove it from the cgroup's list, which entails taking the
+ * mutex. since in pidlist_find the pidlist->lock depends on cgroup->
+ * pidlist_mutex, we have to take pidlist_mutex first.
+ */
+ mutex_lock(&l->owner->pidlist_mutex);
+ down_write(&l->mutex);
+ BUG_ON(!l->use_count);
+ if (!--l->use_count) {
+ /* we're the last user if refcount is 0; remove and free */
+ list_del(&l->links);
+ mutex_unlock(&l->owner->pidlist_mutex);
+ pidlist_free(l->list);
+ put_pid_ns(l->key.ns);
+ up_write(&l->mutex);
+ kfree(l);
+ return;
}
- up_write(&cgrp->pids_mutex);
+ mutex_unlock(&l->owner->pidlist_mutex);
+ up_write(&l->mutex);
}
-static int cgroup_tasks_release(struct inode *inode, struct file *file)
+static int cgroup_pidlist_release(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
- struct cgroup_pids *cp;
-
+ struct cgroup_pidlist *l;
if (!(file->f_mode & FMODE_READ))
return 0;
-
- seq = file->private_data;
- cp = seq->private;
-
- release_cgroup_pid_array(cp);
+ /*
+ * the seq_file will only be initialized if the file was opened for
+ * reading; hence we check if it's not null only in that case.
+ */
+ l = ((struct seq_file *)file->private_data)->private;
+ cgroup_release_pid_array(l);
return seq_release(inode, file);
}
-static struct file_operations cgroup_tasks_operations = {
+static const struct file_operations cgroup_pidlist_operations = {
.read = seq_read,
.llseek = seq_lseek,
.write = cgroup_file_write,
- .release = cgroup_tasks_release,
+ .release = cgroup_pidlist_release,
};
/*
- * Handle an open on 'tasks' file. Prepare an array containing the
- * process id's of tasks currently attached to the cgroup being opened.
+ * The following functions handle opens on a file that displays a pidlist
+ * (tasks or procs). Prepare an array of the process/thread IDs of whoever's
+ * in the cgroup.
*/
-
-static int cgroup_tasks_open(struct inode *unused, struct file *file)
+/* helper function for the two below it */
+static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
{
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
- struct pid_namespace *ns = current->nsproxy->pid_ns;
- struct cgroup_pids *cp;
- pid_t *pidarray;
- int npids;
+ struct cgroup_pidlist *l;
int retval;
/* Nothing to do for write-only files */
if (!(file->f_mode & FMODE_READ))
return 0;
- /*
- * If cgroup gets more users after we read count, we won't have
- * enough space - tough. This race is indistinguishable to the
- * caller from the case that the additional cgroup users didn't
- * show up until sometime later on.
- */
- npids = cgroup_task_count(cgrp);
- pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
- if (!pidarray)
- return -ENOMEM;
- npids = pid_array_load(pidarray, npids, cgrp);
- sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
- /*
- * Store the array in the cgroup, freeing the old
- * array if necessary
- */
- down_write(&cgrp->pids_mutex);
-
- list_for_each_entry(cp, &cgrp->pids_list, list) {
- if (ns == cp->ns)
- goto found;
- }
-
- cp = kzalloc(sizeof(*cp), GFP_KERNEL);
- if (!cp) {
- up_write(&cgrp->pids_mutex);
- kfree(pidarray);
- return -ENOMEM;
- }
- cp->cgrp = cgrp;
- cp->ns = ns;
- get_pid_ns(ns);
- list_add(&cp->list, &cgrp->pids_list);
-found:
- kfree(cp->tasks_pids);
- cp->tasks_pids = pidarray;
- cp->length = npids;
- cp->use_count++;
- up_write(&cgrp->pids_mutex);
-
- file->f_op = &cgroup_tasks_operations;
+ /* have the array populated */
+ retval = pidlist_array_load(cgrp, type, &l);
+ if (retval)
+ return retval;
+ /* configure file information */
+ file->f_op = &cgroup_pidlist_operations;
- retval = seq_open(file, &cgroup_tasks_seq_operations);
+ retval = seq_open(file, &cgroup_pidlist_seq_operations);
if (retval) {
- release_cgroup_pid_array(cp);
+ cgroup_release_pid_array(l);
return retval;
}
- ((struct seq_file *)file->private_data)->private = cp;
+ ((struct seq_file *)file->private_data)->private = l;
return 0;
}
+static int cgroup_tasks_open(struct inode *unused, struct file *file)
+{
+ return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
+}
+static int cgroup_procs_open(struct inode *unused, struct file *file)
+{
+ return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
+}
static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
struct cftype *cft)
/*
* for the common functions, 'private' gives the type of file
*/
+/* for hysterical raisins, we can't put this on the older files */
+#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
static struct cftype files[] = {
{
.name = "tasks",
.open = cgroup_tasks_open,
.write_u64 = cgroup_tasks_write,
- .release = cgroup_tasks_release,
- .private = FILE_TASKLIST,
+ .release = cgroup_pidlist_release,
.mode = S_IRUGO | S_IWUSR,
},
-
+ {
+ .name = CGROUP_FILE_GENERIC_PREFIX "procs",
+ .open = cgroup_procs_open,
+ /* .write_u64 = cgroup_procs_write, TODO */
+ .release = cgroup_pidlist_release,
+ .mode = S_IRUGO,
+ },
{
.name = "notify_on_release",
.read_u64 = cgroup_read_notify_on_release,
.write_u64 = cgroup_write_notify_on_release,
- .private = FILE_NOTIFY_ON_RELEASE,
},
};
.read_seq_string = cgroup_release_agent_show,
.write_string = cgroup_release_agent_write,
.max_write_len = PATH_MAX,
- .private = FILE_RELEASE_AGENT,
};
static int cgroup_populate_dir(struct cgroup *cgrp)
init_task.cgroups = &init_css_set;
init_css_set_link.cg = &init_css_set;
+ init_css_set_link.cgrp = dummytop;
list_add(&init_css_set_link.cgrp_link_list,
&rootnode.top_cgroup.css_sets);
list_add(&init_css_set_link.cg_link_list,
/* Add init_css_set to the hash table */
hhead = css_set_hash(init_css_set.subsys);
hlist_add_head(&init_css_set.hlist, hhead);
-
+ BUG_ON(!init_root_id(&rootnode));
err = register_filesystem(&cgroup_fs_type);
if (err < 0)
goto out;
for_each_active_root(root) {
struct cgroup_subsys *ss;
struct cgroup *cgrp;
- int subsys_id;
int count = 0;
- seq_printf(m, "%lu:", root->subsys_bits);
+ seq_printf(m, "%d:", root->hierarchy_id);
for_each_subsys(root, ss)
seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
+ if (strlen(root->name))
+ seq_printf(m, "%sname=%s", count ? "," : "",
+ root->name);
seq_putc(m, ':');
- get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
- cgrp = task_cgroup(tsk, subsys_id);
+ cgrp = task_cgroup_from_root(tsk, root);
retval = cgroup_path(cgrp, buf, PAGE_SIZE);
if (retval < 0)
goto out_unlock;
mutex_lock(&cgroup_mutex);
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
- seq_printf(m, "%s\t%lu\t%d\t%d\n",
- ss->name, ss->root->subsys_bits,
+ seq_printf(m, "%s\t%d\t%d\t%d\n",
+ ss->name, ss->root->hierarchy_id,
ss->root->number_of_cgroups, !ss->disabled);
}
mutex_unlock(&cgroup_mutex);
{
int ret;
struct cgroup *target;
- int subsys_id;
if (cgrp == dummytop)
return 1;
- get_first_subsys(cgrp, NULL, &subsys_id);
- target = task_cgroup(task, subsys_id);
+ target = task_cgroup_from_root(task, cgrp->root);
while (cgrp != target && cgrp!= cgrp->top_cgroup)
cgrp = cgrp->parent;
ret = (cgrp == target);
return ret;
}
+#ifdef CONFIG_CGROUP_DEBUG
+static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
+ struct cgroup *cont)
+{
+ struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
+
+ if (!css)
+ return ERR_PTR(-ENOMEM);
+
+ return css;
+}
+
+static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ kfree(cont->subsys[debug_subsys_id]);
+}
+
+static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
+{
+ return atomic_read(&cont->count);
+}
+
+static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
+{
+ return cgroup_task_count(cont);
+}
+
+static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
+{
+ return (u64)(unsigned long)current->cgroups;
+}
+
+static u64 current_css_set_refcount_read(struct cgroup *cont,
+ struct cftype *cft)
+{
+ u64 count;
+
+ rcu_read_lock();
+ count = atomic_read(¤t->cgroups->refcount);
+ rcu_read_unlock();
+ return count;
+}
+
+static int current_css_set_cg_links_read(struct cgroup *cont,
+ struct cftype *cft,
+ struct seq_file *seq)
+{
+ struct cg_cgroup_link *link;
+ struct css_set *cg;
+
+ read_lock(&css_set_lock);
+ rcu_read_lock();
+ cg = rcu_dereference(current->cgroups);
+ list_for_each_entry(link, &cg->cg_links, cg_link_list) {
+ struct cgroup *c = link->cgrp;
+ const char *name;
+
+ if (c->dentry)
+ name = c->dentry->d_name.name;
+ else
+ name = "?";
+ seq_printf(seq, "Root %d group %s\n",
+ c->root->hierarchy_id, name);
+ }
+ rcu_read_unlock();
+ read_unlock(&css_set_lock);
+ return 0;
+}
+
+#define MAX_TASKS_SHOWN_PER_CSS 25
+static int cgroup_css_links_read(struct cgroup *cont,
+ struct cftype *cft,
+ struct seq_file *seq)
+{
+ struct cg_cgroup_link *link;
+
+ read_lock(&css_set_lock);
+ list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
+ struct css_set *cg = link->cg;
+ struct task_struct *task;
+ int count = 0;
+ seq_printf(seq, "css_set %p\n", cg);
+ list_for_each_entry(task, &cg->tasks, cg_list) {
+ if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
+ seq_puts(seq, " ...\n");
+ break;
+ } else {
+ seq_printf(seq, " task %d\n",
+ task_pid_vnr(task));
+ }
+ }
+ }
+ read_unlock(&css_set_lock);
+ return 0;
+}
+
+static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
+{
+ return test_bit(CGRP_RELEASABLE, &cgrp->flags);
+}
+
+static struct cftype debug_files[] = {
+ {
+ .name = "cgroup_refcount",
+ .read_u64 = cgroup_refcount_read,
+ },
+ {
+ .name = "taskcount",
+ .read_u64 = debug_taskcount_read,
+ },
+
+ {
+ .name = "current_css_set",
+ .read_u64 = current_css_set_read,
+ },
+
+ {
+ .name = "current_css_set_refcount",
+ .read_u64 = current_css_set_refcount_read,
+ },
+
+ {
+ .name = "current_css_set_cg_links",
+ .read_seq_string = current_css_set_cg_links_read,
+ },
+
+ {
+ .name = "cgroup_css_links",
+ .read_seq_string = cgroup_css_links_read,
+ },
+
+ {
+ .name = "releasable",
+ .read_u64 = releasable_read,
+ },
+};
+
+static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ return cgroup_add_files(cont, ss, debug_files,
+ ARRAY_SIZE(debug_files));
+}
+
+struct cgroup_subsys debug_subsys = {
+ .name = "debug",
+ .create = debug_create,
+ .destroy = debug_destroy,
+ .populate = debug_populate,
+ .subsys_id = debug_subsys_id,
+};
+#endif /* CONFIG_CGROUP_DEBUG */
+++ /dev/null
-/*
- * kernel/cgroup_debug.c - Example cgroup subsystem that
- * exposes debug info
- *
- * Copyright (C) Google Inc, 2007
- *
- * Developed by Paul Menage (menage@google.com)
- *
- */
-
-#include <linux/cgroup.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/rcupdate.h>
-
-#include <asm/atomic.h>
-
-static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
- struct cgroup *cont)
-{
- struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
-
- if (!css)
- return ERR_PTR(-ENOMEM);
-
- return css;
-}
-
-static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
-{
- kfree(cont->subsys[debug_subsys_id]);
-}
-
-static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
-{
- return atomic_read(&cont->count);
-}
-
-static u64 taskcount_read(struct cgroup *cont, struct cftype *cft)
-{
- u64 count;
-
- count = cgroup_task_count(cont);
- return count;
-}
-
-static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
-{
- return (u64)(long)current->cgroups;
-}
-
-static u64 current_css_set_refcount_read(struct cgroup *cont,
- struct cftype *cft)
-{
- u64 count;
-
- rcu_read_lock();
- count = atomic_read(¤t->cgroups->refcount);
- rcu_read_unlock();
- return count;
-}
-
-static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
-{
- return test_bit(CGRP_RELEASABLE, &cgrp->flags);
-}
-
-static struct cftype files[] = {
- {
- .name = "cgroup_refcount",
- .read_u64 = cgroup_refcount_read,
- },
- {
- .name = "taskcount",
- .read_u64 = taskcount_read,
- },
-
- {
- .name = "current_css_set",
- .read_u64 = current_css_set_read,
- },
-
- {
- .name = "current_css_set_refcount",
- .read_u64 = current_css_set_refcount_read,
- },
-
- {
- .name = "releasable",
- .read_u64 = releasable_read,
- },
-};
-
-static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
-{
- return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
-}
-
-struct cgroup_subsys debug_subsys = {
- .name = "debug",
- .create = debug_create,
- .destroy = debug_destroy,
- .populate = debug_populate,
- .subsys_id = debug_subsys_id,
-};
*/
static int freezer_can_attach(struct cgroup_subsys *ss,
struct cgroup *new_cgroup,
- struct task_struct *task)
+ struct task_struct *task, bool threadgroup)
{
struct freezer *freezer;
if (freezer->state == CGROUP_FROZEN)
return -EBUSY;
+ if (threadgroup) {
+ struct task_struct *c;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+ if (is_task_frozen_enough(c)) {
+ rcu_read_unlock();
+ return -EBUSY;
+ }
+ }
+ rcu_read_unlock();
+ }
+
return 0;
}
static cpumask_var_t cpus_attach;
/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys *ss,
- struct cgroup *cont, struct task_struct *tsk)
+static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+ struct task_struct *tsk, bool threadgroup)
{
+ int ret;
struct cpuset *cs = cgroup_cs(cont);
if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
if (tsk->flags & PF_THREAD_BOUND)
return -EINVAL;
- return security_task_setscheduler(tsk, 0, NULL);
+ ret = security_task_setscheduler(tsk, 0, NULL);
+ if (ret)
+ return ret;
+ if (threadgroup) {
+ struct task_struct *c;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+ ret = security_task_setscheduler(c, 0, NULL);
+ if (ret) {
+ rcu_read_unlock();
+ return ret;
+ }
+ }
+ rcu_read_unlock();
+ }
+ return 0;
+}
+
+static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
+ struct cpuset *cs)
+{
+ int err;
+ /*
+ * can_attach beforehand should guarantee that this doesn't fail.
+ * TODO: have a better way to handle failure here
+ */
+ err = set_cpus_allowed_ptr(tsk, cpus_attach);
+ WARN_ON_ONCE(err);
+
+ task_lock(tsk);
+ cpuset_change_task_nodemask(tsk, to);
+ task_unlock(tsk);
+ cpuset_update_task_spread_flag(cs, tsk);
+
}
-static void cpuset_attach(struct cgroup_subsys *ss,
- struct cgroup *cont, struct cgroup *oldcont,
- struct task_struct *tsk)
+static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+ struct cgroup *oldcont, struct task_struct *tsk,
+ bool threadgroup)
{
nodemask_t from, to;
struct mm_struct *mm;
struct cpuset *cs = cgroup_cs(cont);
struct cpuset *oldcs = cgroup_cs(oldcont);
- int err;
if (cs == &top_cpuset) {
cpumask_copy(cpus_attach, cpu_possible_mask);
guarantee_online_cpus(cs, cpus_attach);
guarantee_online_mems(cs, &to);
}
- err = set_cpus_allowed_ptr(tsk, cpus_attach);
- if (err)
- return;
- task_lock(tsk);
- cpuset_change_task_nodemask(tsk, &to);
- task_unlock(tsk);
- cpuset_update_task_spread_flag(cs, tsk);
+ /* do per-task migration stuff possibly for each in the threadgroup */
+ cpuset_attach_task(tsk, &to, cs);
+ if (threadgroup) {
+ struct task_struct *c;
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+ cpuset_attach_task(c, &to, cs);
+ }
+ rcu_read_unlock();
+ }
+ /* change mm; only needs to be done once even if threadgroup */
from = oldcs->mems_allowed;
to = cs->mems_allowed;
mm = get_task_mm(tsk);
#ifdef CONFIG_DEBUG_CREDENTIALS
+bool creds_are_invalid(const struct cred *cred)
+{
+ if (cred->magic != CRED_MAGIC)
+ return true;
+ if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
+ return true;
+#ifdef CONFIG_SECURITY_SELINUX
+ if (selinux_is_enabled()) {
+ if ((unsigned long) cred->security < PAGE_SIZE)
+ return true;
+ if ((*(u32 *)cred->security & 0xffffff00) ==
+ (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
+ return true;
+ }
+#endif
+ return false;
+}
+EXPORT_SYMBOL(creds_are_invalid);
+
/*
* dump invalid credentials
*/
disassociate_ctty(1);
module_put(task_thread_info(tsk)->exec_domain->module);
- if (tsk->binfmt)
- module_put(tsk->binfmt->module);
proc_exit_connector(tsk);
int __user *wo_stat;
struct rusage __user *wo_rusage;
+ wait_queue_t child_wait;
int notask_error;
};
-static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
+static inline
+struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
{
- struct pid *pid = NULL;
- if (type == PIDTYPE_PID)
- pid = task->pids[type].pid;
- else if (type < PIDTYPE_MAX)
- pid = task->group_leader->pids[type].pid;
- return pid;
+ if (type != PIDTYPE_PID)
+ task = task->group_leader;
+ return task->pids[type].pid;
}
-static int eligible_child(struct wait_opts *wo, struct task_struct *p)
+static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
{
- int err;
-
- if (wo->wo_type < PIDTYPE_MAX) {
- if (task_pid_type(p, wo->wo_type) != wo->wo_pid)
- return 0;
- }
+ return wo->wo_type == PIDTYPE_MAX ||
+ task_pid_type(p, wo->wo_type) == wo->wo_pid;
+}
+static int eligible_child(struct wait_opts *wo, struct task_struct *p)
+{
+ if (!eligible_pid(wo, p))
+ return 0;
/* Wait for all children (clone and not) if __WALL is set;
* otherwise, wait for clone children *only* if __WCLONE is
* set; otherwise, wait for non-clone children *only*. (Note:
&& !(wo->wo_flags & __WALL))
return 0;
- err = security_task_wait(p);
- if (err)
- return err;
-
return 1;
}
put_task_struct(p);
infop = wo->wo_info;
- if (!retval)
- retval = put_user(SIGCHLD, &infop->si_signo);
- if (!retval)
- retval = put_user(0, &infop->si_errno);
- if (!retval)
- retval = put_user((short)why, &infop->si_code);
- if (!retval)
- retval = put_user(pid, &infop->si_pid);
- if (!retval)
- retval = put_user(uid, &infop->si_uid);
- if (!retval)
- retval = put_user(status, &infop->si_status);
+ if (infop) {
+ if (!retval)
+ retval = put_user(SIGCHLD, &infop->si_signo);
+ if (!retval)
+ retval = put_user(0, &infop->si_errno);
+ if (!retval)
+ retval = put_user((short)why, &infop->si_code);
+ if (!retval)
+ retval = put_user(pid, &infop->si_pid);
+ if (!retval)
+ retval = put_user(uid, &infop->si_uid);
+ if (!retval)
+ retval = put_user(status, &infop->si_status);
+ }
if (!retval)
retval = pid;
return retval;
* then ->notask_error is 0 if @p is an eligible child,
* or another error from security_task_wait(), or still -ECHILD.
*/
-static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent,
- int ptrace, struct task_struct *p)
+static int wait_consider_task(struct wait_opts *wo, int ptrace,
+ struct task_struct *p)
{
int ret = eligible_child(wo, p);
if (!ret)
return ret;
+ ret = security_task_wait(p);
if (unlikely(ret < 0)) {
/*
* If we have not yet seen any eligible child,
* Do not consider detached threads.
*/
if (!task_detached(p)) {
- int ret = wait_consider_task(wo, tsk, 0, p);
+ int ret = wait_consider_task(wo, 0, p);
if (ret)
return ret;
}
struct task_struct *p;
list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
- int ret = wait_consider_task(wo, tsk, 1, p);
+ int ret = wait_consider_task(wo, 1, p);
if (ret)
return ret;
}
return 0;
}
+static int child_wait_callback(wait_queue_t *wait, unsigned mode,
+ int sync, void *key)
+{
+ struct wait_opts *wo = container_of(wait, struct wait_opts,
+ child_wait);
+ struct task_struct *p = key;
+
+ if (!eligible_pid(wo, p))
+ return 0;
+
+ if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
+ return 0;
+
+ return default_wake_function(wait, mode, sync, key);
+}
+
+void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
+{
+ __wake_up_sync_key(&parent->signal->wait_chldexit,
+ TASK_INTERRUPTIBLE, 1, p);
+}
+
static long do_wait(struct wait_opts *wo)
{
- DECLARE_WAITQUEUE(wait, current);
struct task_struct *tsk;
int retval;
trace_sched_process_wait(wo->wo_pid);
- add_wait_queue(¤t->signal->wait_chldexit,&wait);
+ init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
+ wo->child_wait.private = current;
+ add_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait);
repeat:
/*
* If there is nothing that can match our critiera just get out.
}
end:
__set_current_state(TASK_RUNNING);
- remove_wait_queue(¤t->signal->wait_chldexit,&wait);
- if (wo->wo_info) {
- struct siginfo __user *infop = wo->wo_info;
-
- if (retval > 0)
- retval = 0;
- else {
- /*
- * For a WNOHANG return, clear out all the fields
- * we would set so the user can easily tell the
- * difference.
- */
- if (!retval)
- retval = put_user(0, &infop->si_signo);
- if (!retval)
- retval = put_user(0, &infop->si_errno);
- if (!retval)
- retval = put_user(0, &infop->si_code);
- if (!retval)
- retval = put_user(0, &infop->si_pid);
- if (!retval)
- retval = put_user(0, &infop->si_uid);
- if (!retval)
- retval = put_user(0, &infop->si_status);
- }
- }
+ remove_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait);
return retval;
}
wo.wo_stat = NULL;
wo.wo_rusage = ru;
ret = do_wait(&wo);
+
+ if (ret > 0) {
+ ret = 0;
+ } else if (infop) {
+ /*
+ * For a WNOHANG return, clear out all the fields
+ * we would set so the user can easily tell the
+ * difference.
+ */
+ if (!ret)
+ ret = put_user(0, &infop->si_signo);
+ if (!ret)
+ ret = put_user(0, &infop->si_errno);
+ if (!ret)
+ ret = put_user(0, &infop->si_code);
+ if (!ret)
+ ret = put_user(0, &infop->si_pid);
+ if (!ret)
+ ret = put_user(0, &infop->si_uid);
+ if (!ret)
+ ret = put_user(0, &infop->si_status);
+ }
+
put_pid(pid);
/* avoid REGPARM breakage on x86: */
#include <linux/init_task.h>
+static void mm_init_aio(struct mm_struct *mm)
+{
+#ifdef CONFIG_AIO
+ spin_lock_init(&mm->ioctx_lock);
+ INIT_HLIST_HEAD(&mm->ioctx_list);
+#endif
+}
+
static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
{
atomic_set(&mm->mm_users, 1);
set_mm_counter(mm, file_rss, 0);
set_mm_counter(mm, anon_rss, 0);
spin_lock_init(&mm->page_table_lock);
- spin_lock_init(&mm->ioctx_lock);
- INIT_HLIST_HEAD(&mm->ioctx_list);
mm->free_area_cache = TASK_UNMAPPED_BASE;
mm->cached_hole_size = ~0UL;
+ mm_init_aio(mm);
mm_init_owner(mm, p);
if (likely(!mm_alloc_pgd(mm))) {
spin_unlock(&mmlist_lock);
}
put_swap_token(mm);
+ if (mm->binfmt)
+ module_put(mm->binfmt->module);
mmdrop(mm);
}
}
mm->hiwater_rss = get_mm_rss(mm);
mm->hiwater_vm = mm->total_vm;
+ if (mm->binfmt && !try_module_get(mm->binfmt->module))
+ goto free_pt;
+
return mm;
free_pt:
+ /* don't put binfmt in mmput, we haven't got module yet */
+ mm->binfmt = NULL;
mmput(mm);
fail_nomem:
if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
return ERR_PTR(-EINVAL);
+ /*
+ * Siblings of global init remain as zombies on exit since they are
+ * not reaped by their parent (swapper). To solve this and to avoid
+ * multi-rooted process trees, prevent global and container-inits
+ * from creating siblings.
+ */
+ if ((clone_flags & CLONE_PARENT) &&
+ current->signal->flags & SIGNAL_UNKILLABLE)
+ return ERR_PTR(-EINVAL);
+
retval = security_task_create(clone_flags);
if (retval)
goto fork_out;
if (!try_module_get(task_thread_info(p)->exec_domain->module))
goto bad_fork_cleanup_count;
- if (p->binfmt && !try_module_get(p->binfmt->module))
- goto bad_fork_cleanup_put_domain;
-
p->did_exec = 0;
delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
copy_flags(clone_flags, p);
#endif
cgroup_exit(p, cgroup_callbacks_done);
delayacct_tsk_free(p);
- if (p->binfmt)
- module_put(p->binfmt->module);
-bad_fork_cleanup_put_domain:
module_put(task_thread_info(p)->exec_domain->module);
bad_fork_cleanup_count:
atomic_dec(&p->cred->user->processes);
config GCOV_PROFILE_ALL
bool "Profile entire Kernel"
depends on GCOV_KERNEL
- depends on S390 || X86 || (PPC && EXPERIMENTAL)
+ depends on S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE
default n
---help---
This options activates profiling for the entire kernel.
* Process updating of timeout sysctl
*/
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
int ret;
- ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+ ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
goto out;
static int ____call_usermodehelper(void *data)
{
struct subprocess_info *sub_info = data;
- enum umh_wait wait = sub_info->wait;
int retval;
BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
*/
set_user_nice(current, 0);
- if (wait == UMH_WAIT_EXEC)
- complete(sub_info->complete);
-
retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp);
/* Exec failed? */
- if (wait != UMH_WAIT_EXEC)
- sub_info->retval = retval;
+ sub_info->retval = retval;
do_exit(0);
}
switch (wait) {
case UMH_NO_WAIT:
- case UMH_WAIT_EXEC:
break;
case UMH_WAIT_PROC:
if (pid > 0)
break;
sub_info->retval = pid;
- break;
+ /* FALLTHROUGH */
+
+ case UMH_WAIT_EXEC:
+ complete(sub_info->complete);
}
}
}
}
+static void free_modinfo(struct module *mod)
+{
+ struct module_attribute *attr;
+ int i;
+
+ for (i = 0; (attr = modinfo_attrs[i]); i++) {
+ if (attr->free)
+ attr->free(mod);
+ }
+}
+
#ifdef CONFIG_KALLSYMS
/* lookup symbol in given range of kernel_symbols */
return '?';
}
+static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
+ unsigned int shnum)
+{
+ const Elf_Shdr *sec;
+
+ if (src->st_shndx == SHN_UNDEF
+ || src->st_shndx >= shnum
+ || !src->st_name)
+ return false;
+
+ sec = sechdrs + src->st_shndx;
+ if (!(sec->sh_flags & SHF_ALLOC)
+#ifndef CONFIG_KALLSYMS_ALL
+ || !(sec->sh_flags & SHF_EXECINSTR)
+#endif
+ || (sec->sh_entsize & INIT_OFFSET_MASK))
+ return false;
+
+ return true;
+}
+
+static unsigned long layout_symtab(struct module *mod,
+ Elf_Shdr *sechdrs,
+ unsigned int symindex,
+ unsigned int strindex,
+ const Elf_Ehdr *hdr,
+ const char *secstrings,
+ unsigned long *pstroffs,
+ unsigned long *strmap)
+{
+ unsigned long symoffs;
+ Elf_Shdr *symsect = sechdrs + symindex;
+ Elf_Shdr *strsect = sechdrs + strindex;
+ const Elf_Sym *src;
+ const char *strtab;
+ unsigned int i, nsrc, ndst;
+
+ /* Put symbol section at end of init part of module. */
+ symsect->sh_flags |= SHF_ALLOC;
+ symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
+ symindex) | INIT_OFFSET_MASK;
+ DEBUGP("\t%s\n", secstrings + symsect->sh_name);
+
+ src = (void *)hdr + symsect->sh_offset;
+ nsrc = symsect->sh_size / sizeof(*src);
+ strtab = (void *)hdr + strsect->sh_offset;
+ for (ndst = i = 1; i < nsrc; ++i, ++src)
+ if (is_core_symbol(src, sechdrs, hdr->e_shnum)) {
+ unsigned int j = src->st_name;
+
+ while(!__test_and_set_bit(j, strmap) && strtab[j])
+ ++j;
+ ++ndst;
+ }
+
+ /* Append room for core symbols at end of core part. */
+ symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
+ mod->core_size = symoffs + ndst * sizeof(Elf_Sym);
+
+ /* Put string table section at end of init part of module. */
+ strsect->sh_flags |= SHF_ALLOC;
+ strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
+ strindex) | INIT_OFFSET_MASK;
+ DEBUGP("\t%s\n", secstrings + strsect->sh_name);
+
+ /* Append room for core symbols' strings at end of core part. */
+ *pstroffs = mod->core_size;
+ __set_bit(0, strmap);
+ mod->core_size += bitmap_weight(strmap, strsect->sh_size);
+
+ return symoffs;
+}
+
static void add_kallsyms(struct module *mod,
Elf_Shdr *sechdrs,
+ unsigned int shnum,
unsigned int symindex,
unsigned int strindex,
- const char *secstrings)
+ unsigned long symoffs,
+ unsigned long stroffs,
+ const char *secstrings,
+ unsigned long *strmap)
{
- unsigned int i;
+ unsigned int i, ndst;
+ const Elf_Sym *src;
+ Elf_Sym *dst;
+ char *s;
mod->symtab = (void *)sechdrs[symindex].sh_addr;
mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
for (i = 0; i < mod->num_symtab; i++)
mod->symtab[i].st_info
= elf_type(&mod->symtab[i], sechdrs, secstrings, mod);
+
+ mod->core_symtab = dst = mod->module_core + symoffs;
+ src = mod->symtab;
+ *dst = *src;
+ for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
+ if (!is_core_symbol(src, sechdrs, shnum))
+ continue;
+ dst[ndst] = *src;
+ dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name);
+ ++ndst;
+ }
+ mod->core_num_syms = ndst;
+
+ mod->core_strtab = s = mod->module_core + stroffs;
+ for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i)
+ if (test_bit(i, strmap))
+ *++s = mod->strtab[i];
}
#else
+static inline unsigned long layout_symtab(struct module *mod,
+ Elf_Shdr *sechdrs,
+ unsigned int symindex,
+ unsigned int strindex,
+ const Elf_Hdr *hdr,
+ const char *secstrings,
+ unsigned long *pstroffs,
+ unsigned long *strmap)
+{
+}
static inline void add_kallsyms(struct module *mod,
Elf_Shdr *sechdrs,
+ unsigned int shnum,
unsigned int symindex,
unsigned int strindex,
- const char *secstrings)
+ unsigned long symoffs,
+ unsigned long stroffs,
+ const char *secstrings,
+ const unsigned long *strmap)
{
}
#endif /* CONFIG_KALLSYMS */
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+#ifdef CONFIG_KALLSYMS
+ unsigned long symoffs, stroffs, *strmap;
+#endif
mm_segment_t old_fs;
DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
/* Don't keep modinfo and version sections. */
sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
-#ifdef CONFIG_KALLSYMS
- /* Keep symbol and string tables for decoding later. */
- sechdrs[symindex].sh_flags |= SHF_ALLOC;
- sechdrs[strindex].sh_flags |= SHF_ALLOC;
-#endif
/* Check module struct version now, before we try to use module. */
if (!check_modstruct_version(sechdrs, versindex, mod)) {
goto free_hdr;
}
+ strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size)
+ * sizeof(long), GFP_KERNEL);
+ if (!strmap) {
+ err = -ENOMEM;
+ goto free_mod;
+ }
+
if (find_module(mod->name)) {
err = -EEXIST;
goto free_mod;
this is done generically; there doesn't appear to be any
special cases for the architectures. */
layout_sections(mod, hdr, sechdrs, secstrings);
+ symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr,
+ secstrings, &stroffs, strmap);
/* Do the allocs. */
ptr = module_alloc_update_bounds(mod->core_size);
percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
sechdrs[pcpuindex].sh_size);
- add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+ add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
+ symoffs, stroffs, secstrings, strmap);
+ kfree(strmap);
+ strmap = NULL;
if (!mod->taints) {
struct _ddebug *debug;
synchronize_sched();
module_arch_cleanup(mod);
cleanup:
+ free_modinfo(mod);
kobject_del(&mod->mkobj.kobj);
kobject_put(&mod->mkobj.kobj);
free_unload:
module_unload_free(mod);
#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
- free_init:
percpu_modfree(mod->refptr);
+ free_init:
#endif
module_free(mod, mod->module_init);
free_core:
percpu_modfree(percpu);
free_mod:
kfree(args);
+ kfree(strmap);
free_hdr:
vfree(hdr);
return ERR_PTR(err);
/* Drop initial reference. */
module_put(mod);
trim_init_extable(mod);
+#ifdef CONFIG_KALLSYMS
+ mod->num_symtab = mod->core_num_syms;
+ mod->symtab = mod->core_symtab;
+ mod->strtab = mod->core_strtab;
+#endif
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
* (hence either you are in the same cgroup as task, or in an
* ancestor cgroup thereof)
*/
-static int ns_can_attach(struct cgroup_subsys *ss,
- struct cgroup *new_cgroup, struct task_struct *task)
+static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup,
+ struct task_struct *task, bool threadgroup)
{
if (current != task) {
if (!capable(CAP_SYS_ADMIN))
if (!cgroup_is_descendant(new_cgroup, task))
return -EPERM;
+ if (threadgroup) {
+ struct task_struct *c;
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+ if (!cgroup_is_descendant(new_cgroup, c)) {
+ rcu_read_unlock();
+ return -EPERM;
+ }
+ }
+ rcu_read_unlock();
+ }
+
return 0;
}
#include <linux/device.h>
#include <linux/err.h>
#include <linux/slab.h>
+#include <linux/ctype.h>
#if 0
#define DEBUGP printk
}
for (i = 0; args[i]; i++) {
- if (args[i] == ' ' && !in_quote)
+ if (isspace(args[i]) && !in_quote)
break;
if (equals == 0) {
if (args[i] == '=')
next = args + i;
/* Chew up trailing spaces. */
- while (*next == ' ')
+ while (isspace(*next))
next++;
return next;
}
DEBUGP("Parsing ARGS: %s\n", args);
/* Chew leading spaces */
- while (*args == ' ')
+ while (isspace(*args))
args++;
while (*args) {
{
if (!(flags & CLONE_NEWPID))
return get_pid_ns(old_ns);
- if (flags & CLONE_THREAD)
+ if (flags & (CLONE_THREAD|CLONE_PARENT))
return ERR_PTR(-EINVAL);
return create_pid_namespace(old_ns);
}
#include <linux/module.h>
#include <linux/file.h>
-#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/bitops.h>
#include <linux/genhd.h>
* or self-reaping. Do notification now if it would have happened earlier.
* If it should reap itself, return true.
*
- * If it's our own child, there is no notification to do.
- * But if our normal children self-reap, then this child
- * was prevented by ptrace and we must reap it now.
+ * If it's our own child, there is no notification to do. But if our normal
+ * children self-reap, then this child was prevented by ptrace and we must
+ * reap it now, in that case we must also wake up sub-threads sleeping in
+ * do_wait().
*/
static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
{
if (!task_detached(p) && thread_group_empty(p)) {
if (!same_thread_group(p->real_parent, tracer))
do_notify_parent(p, p->exit_signal);
- else if (ignoring_children(tracer->sighand))
+ else if (ignoring_children(tracer->sighand)) {
+ __wake_up_parent(p, tracer);
p->exit_signal = -1;
+ }
}
if (task_detached(p)) {
/* Mark it as in the process of being reaped. */
{
spin_lock_init(&counter->lock);
counter->limit = RESOURCE_MAX;
+ counter->soft_limit = RESOURCE_MAX;
counter->parent = parent;
}
}
int res_counter_charge(struct res_counter *counter, unsigned long val,
- struct res_counter **limit_fail_at)
+ struct res_counter **limit_fail_at,
+ struct res_counter **soft_limit_fail_at)
{
int ret;
unsigned long flags;
struct res_counter *c, *u;
*limit_fail_at = NULL;
+ if (soft_limit_fail_at)
+ *soft_limit_fail_at = NULL;
local_irq_save(flags);
for (c = counter; c != NULL; c = c->parent) {
spin_lock(&c->lock);
ret = res_counter_charge_locked(c, val);
+ /*
+ * With soft limits, we return the highest ancestor
+ * that exceeds its soft limit
+ */
+ if (soft_limit_fail_at &&
+ !res_counter_soft_limit_check_locked(c))
+ *soft_limit_fail_at = c;
spin_unlock(&c->lock);
if (ret < 0) {
*limit_fail_at = c;
counter->usage -= val;
}
-void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+void res_counter_uncharge(struct res_counter *counter, unsigned long val,
+ bool *was_soft_limit_excess)
{
unsigned long flags;
struct res_counter *c;
local_irq_save(flags);
for (c = counter; c != NULL; c = c->parent) {
spin_lock(&c->lock);
+ if (was_soft_limit_excess)
+ *was_soft_limit_excess =
+ !res_counter_soft_limit_check_locked(c);
res_counter_uncharge_locked(c, val);
spin_unlock(&c->lock);
}
return &counter->limit;
case RES_FAILCNT:
return &counter->failcnt;
+ case RES_SOFT_LIMIT:
+ return &counter->soft_limit;
};
BUG();
#endif /* CONFIG_RT_GROUP_SCHED */
int sched_rt_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
old_period = sysctl_sched_rt_period;
old_runtime = sysctl_sched_rt_runtime;
- ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret && write) {
ret = sched_rt_global_constraints();
}
static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct task_struct *tsk)
+cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
{
#ifdef CONFIG_RT_GROUP_SCHED
if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
if (tsk->sched_class != &fair_sched_class)
return -EINVAL;
#endif
+ return 0;
+}
+static int
+cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct task_struct *tsk, bool threadgroup)
+{
+ int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
+ if (retval)
+ return retval;
+ if (threadgroup) {
+ struct task_struct *c;
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+ retval = cpu_cgroup_can_attach_task(cgrp, c);
+ if (retval) {
+ rcu_read_unlock();
+ return retval;
+ }
+ }
+ rcu_read_unlock();
+ }
return 0;
}
static void
cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cont, struct task_struct *tsk)
+ struct cgroup *old_cont, struct task_struct *tsk,
+ bool threadgroup)
{
sched_move_task(tsk);
+ if (threadgroup) {
+ struct task_struct *c;
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+ sched_move_task(c);
+ }
+ rcu_read_unlock();
+ }
}
#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SCHED_DEBUG
int sched_nr_latency_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
- int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
return ret;
if (why) {
/*
- * The first thread which returns from finish_stop()
+ * The first thread which returns from do_signal_stop()
* will take ->siglock, notice SIGNAL_CLD_MASK, and
* notify its parent. See get_signal_to_deliver().
*/
return send_signal(sig, info, t, 0);
}
+int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
+ bool group)
+{
+ unsigned long flags;
+ int ret = -ESRCH;
+
+ if (lock_task_sighand(p, &flags)) {
+ ret = send_signal(sig, info, p, group);
+ unlock_task_sighand(p, &flags);
+ }
+
+ return ret;
+}
+
/*
* Force a signal that the process can't ignore: if necessary
* we unblock the signal and change any SIG_IGN to SIG_DFL.
}
}
-int __fatal_signal_pending(struct task_struct *tsk)
-{
- return sigismember(&tsk->pending.signal, SIGKILL);
-}
-EXPORT_SYMBOL(__fatal_signal_pending);
-
struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
{
struct sighand_struct *sighand;
*/
int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
- unsigned long flags;
- int ret;
+ int ret = check_kill_permission(sig, info, p);
- ret = check_kill_permission(sig, info, p);
-
- if (!ret && sig) {
- ret = -ESRCH;
- if (lock_task_sighand(p, &flags)) {
- ret = __group_send_sig_info(sig, info, p);
- unlock_task_sighand(p, &flags);
- }
- }
+ if (!ret && sig)
+ ret = do_send_sig_info(sig, info, p, true);
return ret;
}
* These are for backward compatibility with the rest of the kernel source.
*/
-/*
- * The caller must ensure the task can't exit.
- */
int
send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
- int ret;
- unsigned long flags;
-
/*
* Make sure legacy kernel users don't send in bad values
* (normal paths check this in check_kill_permission).
if (!valid_signal(sig))
return -EINVAL;
- spin_lock_irqsave(&p->sighand->siglock, flags);
- ret = specific_send_sig_info(sig, info, p);
- spin_unlock_irqrestore(&p->sighand->siglock, flags);
- return ret;
+ return do_send_sig_info(sig, info, p, false);
}
#define __si_special(priv) \
return ret;
}
-/*
- * Wake up any threads in the parent blocked in wait* syscalls.
- */
-static inline void __wake_up_parent(struct task_struct *p,
- struct task_struct *parent)
-{
- wake_up_interruptible_sync(&parent->signal->wait_chldexit);
-}
-
/*
* Let a parent know about the death of a child.
* For a stopped/continued status change, use do_notify_parent_cldstop instead.
spin_unlock_irq(¤t->sighand->siglock);
}
-static void
-finish_stop(int stop_count)
-{
- /*
- * If there are no other threads in the group, or if there is
- * a group stop in progress and we are the last to stop,
- * report to the parent. When ptraced, every thread reports itself.
- */
- if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) {
- read_lock(&tasklist_lock);
- do_notify_parent_cldstop(current, CLD_STOPPED);
- read_unlock(&tasklist_lock);
- }
-
- do {
- schedule();
- } while (try_to_freeze());
- /*
- * Now we don't run again until continued.
- */
- current->exit_code = 0;
-}
-
/*
* This performs the stopping for SIGSTOP and other stop signals.
* We have to stop all threads in the thread group.
static int do_signal_stop(int signr)
{
struct signal_struct *sig = current->signal;
- int stop_count;
+ int notify;
- if (sig->group_stop_count > 0) {
- /*
- * There is a group stop in progress. We don't need to
- * start another one.
- */
- stop_count = --sig->group_stop_count;
- } else {
+ if (!sig->group_stop_count) {
struct task_struct *t;
if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
*/
sig->group_exit_code = signr;
- stop_count = 0;
+ sig->group_stop_count = 1;
for (t = next_thread(current); t != current; t = next_thread(t))
/*
* Setting state to TASK_STOPPED for a group
*/
if (!(t->flags & PF_EXITING) &&
!task_is_stopped_or_traced(t)) {
- stop_count++;
+ sig->group_stop_count++;
signal_wake_up(t, 0);
}
- sig->group_stop_count = stop_count;
}
+ /*
+ * If there are no other threads in the group, or if there is
+ * a group stop in progress and we are the last to stop, report
+ * to the parent. When ptraced, every thread reports itself.
+ */
+ notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
+ notify = tracehook_notify_jctl(notify, CLD_STOPPED);
+ /*
+ * tracehook_notify_jctl() can drop and reacquire siglock, so
+ * we keep ->group_stop_count != 0 before the call. If SIGCONT
+ * or SIGKILL comes in between ->group_stop_count == 0.
+ */
+ if (sig->group_stop_count) {
+ if (!--sig->group_stop_count)
+ sig->flags = SIGNAL_STOP_STOPPED;
+ current->exit_code = sig->group_exit_code;
+ __set_current_state(TASK_STOPPED);
+ }
+ spin_unlock_irq(¤t->sighand->siglock);
- if (stop_count == 0)
- sig->flags = SIGNAL_STOP_STOPPED;
- current->exit_code = sig->group_exit_code;
- __set_current_state(TASK_STOPPED);
+ if (notify) {
+ read_lock(&tasklist_lock);
+ do_notify_parent_cldstop(current, notify);
+ read_unlock(&tasklist_lock);
+ }
+
+ /* Now we don't run again until woken by SIGCONT or SIGKILL */
+ do {
+ schedule();
+ } while (try_to_freeze());
+
+ tracehook_finish_jctl();
+ current->exit_code = 0;
- spin_unlock_irq(¤t->sighand->siglock);
- finish_stop(stop_count);
return 1;
}
int why = (signal->flags & SIGNAL_STOP_CONTINUED)
? CLD_CONTINUED : CLD_STOPPED;
signal->flags &= ~SIGNAL_CLD_MASK;
- spin_unlock_irq(&sighand->siglock);
- if (unlikely(!tracehook_notify_jctl(1, why)))
- goto relock;
+ why = tracehook_notify_jctl(why, CLD_CONTINUED);
+ spin_unlock_irq(&sighand->siglock);
- read_lock(&tasklist_lock);
- do_notify_parent_cldstop(current->group_leader, why);
- read_unlock(&tasklist_lock);
+ if (why) {
+ read_lock(&tasklist_lock);
+ do_notify_parent_cldstop(current->group_leader, why);
+ read_unlock(&tasklist_lock);
+ }
goto relock;
}
if (unlikely(tsk->signal->group_stop_count) &&
!--tsk->signal->group_stop_count) {
tsk->signal->flags = SIGNAL_STOP_STOPPED;
- group_stop = 1;
+ group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
}
out:
spin_unlock_irq(&tsk->sighand->siglock);
- if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) {
+ if (unlikely(group_stop)) {
read_lock(&tasklist_lock);
- do_notify_parent_cldstop(tsk, CLD_STOPPED);
+ do_notify_parent_cldstop(tsk, group_stop);
read_unlock(&tasklist_lock);
}
}
do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
{
struct task_struct *p;
- unsigned long flags;
int error = -ESRCH;
rcu_read_lock();
/*
* The null signal is a permissions and process existence
* probe. No signal is actually delivered.
- *
- * If lock_task_sighand() fails we pretend the task dies
- * after receiving the signal. The window is tiny, and the
- * signal is private anyway.
*/
- if (!error && sig && lock_task_sighand(p, &flags)) {
- error = specific_send_sig_info(sig, info, p);
- unlock_task_sighand(p, &flags);
+ if (!error && sig) {
+ error = do_send_sig_info(sig, info, p, false);
+ /*
+ * If lock_task_sighand() failed we pretend the task
+ * dies after receiving the signal. The window is tiny,
+ * and the signal is private anyway.
+ */
+ if (unlikely(error == -ESRCH))
+ error = 0;
}
}
rcu_read_unlock();
static void slow_work_oom_timeout(unsigned long);
#ifdef CONFIG_SYSCTL
-static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *,
+static int slow_work_min_threads_sysctl(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *,
+static int slow_work_max_threads_sysctl(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
#endif
* Handle adjustment of the minimum number of threads
*/
static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
- int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
int n;
if (ret == 0) {
* Handle adjustment of the maximum number of threads
*/
static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
- int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
int n;
if (ret == 0) {
generic_exec_single(cpu, data, wait);
}
-/* Deprecated: shim for archs using old arch_send_call_function_ipi API. */
-
-#ifndef arch_send_call_function_ipi_mask
-# define arch_send_call_function_ipi_mask(maskp) \
- arch_send_call_function_ipi(*(maskp))
-#endif
-
/**
* smp_call_function_many(): Run a function on a set of other CPUs.
* @mask: The set of cpus to run on (only runs on online subset).
EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
touch_all_softlockup_watchdogs();
- return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
}
/*
current->timer_slack_ns = arg2;
error = 0;
break;
+ case PR_MCE_KILL:
+ if (arg4 | arg5)
+ return -EINVAL;
+ switch (arg2) {
+ case 0:
+ if (arg3 != 0)
+ return -EINVAL;
+ current->flags &= ~PF_MCE_PROCESS;
+ break;
+ case 1:
+ current->flags |= PF_MCE_PROCESS;
+ if (arg3 != 0)
+ current->flags |= PF_MCE_EARLY;
+ else
+ current->flags &= ~PF_MCE_EARLY;
+ break;
+ default:
+ return -EINVAL;
+ }
+ error = 0;
+ break;
+
default:
error = -EINVAL;
break;
cond_syscall(compat_sys_sendmsg);
cond_syscall(sys_recvmsg);
cond_syscall(compat_sys_recvmsg);
+cond_syscall(compat_sys_recvfrom);
cond_syscall(sys_socketcall);
cond_syscall(sys_futex);
cond_syscall(compat_sys_futex);
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/ctype.h>
-#include <linux/utsname.h>
#include <linux/kmemcheck.h>
#include <linux/smp_lock.h>
#include <linux/fs.h>
extern int core_uses_pid;
extern int suid_dumpable;
extern char core_pattern[];
+extern unsigned int core_pipe_limit;
extern int pid_max;
extern int min_free_kbytes;
extern int pid_max_min, pid_max_max;
#endif
#ifdef CONFIG_PROC_SYSCTL
-static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
+static int proc_do_cad_pid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-static int proc_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#endif
.proc_handler = &proc_dostring,
.strategy = &sysctl_string,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "core_pipe_limit",
+ .data = &core_pipe_limit,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
.mode = 0644,
.proc_handler = &scan_unevictable_handler,
},
+#ifdef CONFIG_MEMORY_FAILURE
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "memory_failure_early_kill",
+ .data = &sysctl_memory_failure_early_kill,
+ .maxlen = sizeof(sysctl_memory_failure_early_kill),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "memory_failure_recovery",
+ .data = &sysctl_memory_failure_recovery,
+ .maxlen = sizeof(sysctl_memory_failure_recovery),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
+
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
#ifdef CONFIG_PROC_SYSCTL
static int _proc_do_string(void* data, int maxlen, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
size_t len;
* proc_dostring - read a string sysctl
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
*
* Returns 0 on success.
*/
-int proc_dostring(struct ctl_table *table, int write, struct file *filp,
+int proc_dostring(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return _proc_do_string(table->data, table->maxlen, write, filp,
+ return _proc_do_string(table->data, table->maxlen, write,
buffer, lenp, ppos);
}
}
static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
- int write, struct file *filp, void __user *buffer,
+ int write, void __user *buffer,
size_t *lenp, loff_t *ppos,
int (*conv)(int *negp, unsigned long *lvalp, int *valp,
int write, void *data),
#undef TMPBUFLEN
}
-static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+static int do_proc_dointvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos,
int (*conv)(int *negp, unsigned long *lvalp, int *valp,
int write, void *data),
void *data)
{
- return __do_proc_dointvec(table->data, table, write, filp,
+ return __do_proc_dointvec(table->data, table, write,
buffer, lenp, ppos, conv, data);
}
* proc_dointvec - read a vector of integers
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
*
* Returns 0 on success.
*/
-int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+ return do_proc_dointvec(table,write,buffer,lenp,ppos,
NULL,NULL);
}
* Taint values can only be increased
* This means we can safely use a temporary.
*/
-static int proc_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
t = *table;
t.data = &tmptaint;
- err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
+ err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
if (err < 0)
return err;
* proc_dointvec_minmax - read a vector of integers with min/max values
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
*
* Returns 0 on success.
*/
-int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct do_proc_dointvec_minmax_conv_param param = {
.min = (int *) table->extra1,
.max = (int *) table->extra2,
};
- return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
+ return do_proc_dointvec(table, write, buffer, lenp, ppos,
do_proc_dointvec_minmax_conv, ¶m);
}
static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
- struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos,
unsigned long convmul,
}
static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
- struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos,
unsigned long convmul,
unsigned long convdiv)
{
return __do_proc_doulongvec_minmax(table->data, table, write,
- filp, buffer, lenp, ppos, convmul, convdiv);
+ buffer, lenp, ppos, convmul, convdiv);
}
/**
* proc_doulongvec_minmax - read a vector of long integers with min/max values
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
*
* Returns 0 on success.
*/
-int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
+ return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
}
/**
* proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
* Returns 0 on success.
*/
int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
- struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
- return do_proc_doulongvec_minmax(table, write, filp, buffer,
+ return do_proc_doulongvec_minmax(table, write, buffer,
lenp, ppos, HZ, 1000l);
}
* proc_dointvec_jiffies - read a vector of integers as seconds
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
*
* Returns 0 on success.
*/
-int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+ return do_proc_dointvec(table,write,buffer,lenp,ppos,
do_proc_dointvec_jiffies_conv,NULL);
}
* proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: pointer to the file position
*
* Returns 0 on success.
*/
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+ return do_proc_dointvec(table,write,buffer,lenp,ppos,
do_proc_dointvec_userhz_jiffies_conv,NULL);
}
* proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
*
* Returns 0 on success.
*/
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
+ return do_proc_dointvec(table, write, buffer, lenp, ppos,
do_proc_dointvec_ms_jiffies_conv, NULL);
}
-static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
+static int proc_do_cad_pid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct pid *new_pid;
tmp = pid_vnr(cad_pid);
- r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
+ r = __do_proc_dointvec(&tmp, table, write, buffer,
lenp, ppos, NULL, NULL);
if (r || !write)
return r;
#else /* CONFIG_PROC_FS */
-int proc_dostring(struct ctl_table *table, int write, struct file *filp,
+int proc_dostring(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
- struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
-obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
+obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
--- /dev/null
+/*
+ * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+ * This file is part of the GNU C Library.
+ * Contributed by Paul Eggert (eggert@twinsun.com).
+ *
+ * The GNU C Library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * The GNU C Library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with the GNU C Library; see the file COPYING.LIB. If not,
+ * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Converts the calendar time to broken-down time representation
+ * Based on code from glibc-2.6
+ *
+ * 2009-7-14:
+ * Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com>
+ */
+
+#include <linux/time.h>
+#include <linux/module.h>
+
+/*
+ * Nonzero if YEAR is a leap year (every 4 years,
+ * except every 100th isn't, and every 400th is).
+ */
+static int __isleap(long year)
+{
+ return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0);
+}
+
+/* do a mathdiv for long type */
+static long math_div(long a, long b)
+{
+ return a / b - (a % b < 0);
+}
+
+/* How many leap years between y1 and y2, y1 must less or equal to y2 */
+static long leaps_between(long y1, long y2)
+{
+ long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100)
+ + math_div(y1 - 1, 400);
+ long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100)
+ + math_div(y2 - 1, 400);
+ return leaps2 - leaps1;
+}
+
+/* How many days come before each month (0-12). */
+static const unsigned short __mon_yday[2][13] = {
+ /* Normal years. */
+ {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
+ /* Leap years. */
+ {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
+};
+
+#define SECS_PER_HOUR (60 * 60)
+#define SECS_PER_DAY (SECS_PER_HOUR * 24)
+
+/**
+ * time_to_tm - converts the calendar time to local broken-down time
+ *
+ * @totalsecs the number of seconds elapsed since 00:00:00 on January 1, 1970,
+ * Coordinated Universal Time (UTC).
+ * @offset offset seconds adding to totalsecs.
+ * @result pointer to struct tm variable to receive broken-down time
+ */
+void time_to_tm(time_t totalsecs, int offset, struct tm *result)
+{
+ long days, rem, y;
+ const unsigned short *ip;
+
+ days = totalsecs / SECS_PER_DAY;
+ rem = totalsecs % SECS_PER_DAY;
+ rem += offset;
+ while (rem < 0) {
+ rem += SECS_PER_DAY;
+ --days;
+ }
+ while (rem >= SECS_PER_DAY) {
+ rem -= SECS_PER_DAY;
+ ++days;
+ }
+
+ result->tm_hour = rem / SECS_PER_HOUR;
+ rem %= SECS_PER_HOUR;
+ result->tm_min = rem / 60;
+ result->tm_sec = rem % 60;
+
+ /* January 1, 1970 was a Thursday. */
+ result->tm_wday = (4 + days) % 7;
+ if (result->tm_wday < 0)
+ result->tm_wday += 7;
+
+ y = 1970;
+
+ while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
+ /* Guess a corrected year, assuming 365 days per year. */
+ long yg = y + math_div(days, 365);
+
+ /* Adjust DAYS and Y to match the guessed year. */
+ days -= (yg - y) * 365 + leaps_between(y, yg);
+ y = yg;
+ }
+
+ result->tm_year = y - 1900;
+
+ result->tm_yday = days;
+
+ ip = __mon_yday[__isleap(y)];
+ for (y = 11; days < ip[y]; y--)
+ continue;
+ days -= ip[y];
+
+ result->tm_mon = y;
+ result->tm_mday = days + 1;
+}
+EXPORT_SYMBOL(time_to_tm);
int
ftrace_enable_sysctl(struct ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
mutex_lock(&ftrace_lock);
- ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
goto out;
if (current_trace)
*iter->trace = *current_trace;
- if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
goto fail;
- cpumask_clear(iter->started);
-
if (current_trace && current_trace->print_max)
iter->tr = &max_tr;
else
if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
goto out_free_buffer_mask;
- if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
goto out_free_tracing_cpumask;
/* To save memory, keep the ring buffer size to its minimum */
cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
cpumask_copy(tracing_cpumask, cpu_all_mask);
- cpumask_clear(tracing_reader_cpumask);
/* TODO: make the number of buffers hot pluggable with CPUS */
global_trace.buffer = ring_buffer_alloc(ring_buf_size,
int
stack_trace_sysctl(struct ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
mutex_lock(&stack_sysctl_mutex);
- ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret || !write ||
(last_stack_tracer_enabled == !!stack_tracer_enabled))
*/
#include <linux/mm.h>
-#include <linux/utsname.h>
#include <linux/mman.h>
#include <linux/notifier.h>
#include <linux/reboot.h>
* Special case of dostring for the UTS structure. This has locks
* to observe. Should this be in kernel/sys.c ????
*/
-static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+static int proc_do_uts_string(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table uts_table;
int r;
memcpy(&uts_table, table, sizeof(uts_table));
uts_table.data = get_uts(table, write);
- r = proc_dostring(&uts_table,write,filp,buffer,lenp, ppos);
+ r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
put_uts(table, write, uts_table.data);
return r;
}
keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
unless you really know what this hack does.
+config STRIP_ASM_SYMS
+ bool "Strip assembler-generated symbols during link"
+ default n
+ help
+ Strip internal assembler-generated symbols during a link (symbols
+ that look like '.Lxxx') so they don't pollute the output of
+ get_wchan() and suchlike.
+
config UNUSED_SYMBOLS
bool "Enable unused/obsolete exported symbols"
default y if X86
#define GZIP_IOBUF_SIZE (16*1024)
+static int nofill(void *buffer, unsigned int len)
+{
+ return -1;
+}
+
/* Included from initramfs et al code */
STATIC int INIT gunzip(unsigned char *buf, int len,
int(*fill)(void*, unsigned int),
goto gunzip_nomem4;
}
+ if (!fill)
+ fill = nofill;
+
if (len == 0)
len = fill(zbuf, GZIP_IOBUF_SIZE);
#define RC_MODEL_TOTAL_BITS 11
+static int nofill(void *buffer, unsigned int len)
+{
+ return -1;
+}
+
/* Called twice: once at startup and once in rc_normalize() */
static void INIT rc_read(struct rc *rc)
{
int (*fill)(void*, unsigned int),
char *buffer, int buffer_size)
{
- rc->fill = fill;
+ if (fill)
+ rc->fill = fill;
+ else
+ rc->fill = nofill;
rc->buffer = (uint8_t *)buffer;
rc->buffer_size = buffer_size;
rc->buffer_end = rc->buffer + rc->buffer_size;
return p;
}
-static char *ip6_compressed_string(char *p, const struct in6_addr *addr)
+static char *ip6_compressed_string(char *p, const char *addr)
{
int i;
int j;
u8 hi;
u8 lo;
bool needcolon = false;
- bool useIPv4 = ipv6_addr_v4mapped(addr) || ipv6_addr_is_isatap(addr);
+ bool useIPv4;
+ struct in6_addr in6;
+
+ memcpy(&in6, addr, sizeof(struct in6_addr));
+
+ useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6);
memset(zerolength, 0, sizeof(zerolength));
/* find position of longest 0 run */
for (i = 0; i < range; i++) {
for (j = i; j < range; j++) {
- if (addr->s6_addr16[j] != 0)
+ if (in6.s6_addr16[j] != 0)
break;
zerolength[i]++;
}
needcolon = false;
}
/* hex u16 without leading 0s */
- word = ntohs(addr->s6_addr16[i]);
+ word = ntohs(in6.s6_addr16[i]);
hi = word >> 8;
lo = word & 0xff;
if (hi) {
if (useIPv4) {
if (needcolon)
*p++ = ':';
- p = ip4_string(p, &addr->s6_addr[12], false);
+ p = ip4_string(p, &in6.s6_addr[12], false);
}
*p = '\0';
return p;
}
-static char *ip6_string(char *p, const struct in6_addr *addr, const char *fmt)
+static char *ip6_string(char *p, const char *addr, const char *fmt)
{
int i;
for (i = 0; i < 8; i++) {
- p = pack_hex_byte(p, addr->s6_addr[2 * i]);
- p = pack_hex_byte(p, addr->s6_addr[2 * i + 1]);
+ p = pack_hex_byte(p, *addr++);
+ p = pack_hex_byte(p, *addr++);
if (fmt[0] == 'I' && i != 7)
*p++ = ':';
}
char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")];
if (fmt[0] == 'I' && fmt[2] == 'c')
- ip6_compressed_string(ip6_addr, (const struct in6_addr *)addr);
+ ip6_compressed_string(ip6_addr, addr);
else
- ip6_string(ip6_addr, (const struct in6_addr *)addr, fmt);
+ ip6_string(ip6_addr, addr, fmt);
return string(buf, end, ip6_addr, spec);
}
/proc/sys/vm/mmap_min_addr tunable.
+config MEMORY_FAILURE
+ depends on MMU
+ depends on X86_MCE
+ bool "Enable recovery from hardware memory errors"
+ help
+ Enables code to recover from some memory failures on systems
+ with MCA recovery. This allows a system to continue running
+ even when some of its memory has uncorrected errors. This requires
+ special hardware support and typically ECC memory.
+
+config HWPOISON_INJECT
+ tristate "Poison pages injector"
+ depends on MEMORY_FAILURE && DEBUG_KERNEL
+
config NOMMU_INITIAL_TRIM_EXCESS
int "Turn on mmap() excess space trimming before booting"
depends on !MMU
endif
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
+obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
/*
* Lock ordering:
*
- * ->i_mmap_lock (vmtruncate)
+ * ->i_mmap_lock (truncate_pagecache)
* ->private_lock (__free_pte->__set_page_dirty_buffers)
* ->swap_lock (exclusive_swap_page, others)
* ->mapping->tree_lock
*
* ->task->proc_lock
* ->dcache_lock (proc_pid_lookup)
+ *
+ * (code doesn't rely on that order, so you could switch it around)
+ * ->tasklist_lock (memory_failure, collect_procs_ao)
+ * ->i_mmap_lock
*/
/*
#ifdef CONFIG_SYSCTL
int hugetlb_sysctl_handler(struct ctl_table *table, int write,
- struct file *file, void __user *buffer,
+ void __user *buffer,
size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
table->data = &tmp;
table->maxlen = sizeof(unsigned long);
- proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+ proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (write)
h->max_huge_pages = set_max_huge_pages(h, tmp);
}
int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
- struct file *file, void __user *buffer,
+ void __user *buffer,
size_t *length, loff_t *ppos)
{
- proc_dointvec(table, write, file, buffer, length, ppos);
+ proc_dointvec(table, write, buffer, length, ppos);
if (hugepages_treat_as_movable)
htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
else
}
int hugetlb_overcommit_handler(struct ctl_table *table, int write,
- struct file *file, void __user *buffer,
+ void __user *buffer,
size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
table->data = &tmp;
table->maxlen = sizeof(unsigned long);
- proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+ proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (write) {
spin_lock(&hugetlb_lock);
--- /dev/null
+/* Inject a hwpoison memory failure on a arbitary pfn */
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+static struct dentry *hwpoison_dir, *corrupt_pfn;
+
+static int hwpoison_inject(void *data, u64 val)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ printk(KERN_INFO "Injecting memory failure at pfn %Lx\n", val);
+ return __memory_failure(val, 18, 0);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
+
+static void pfn_inject_exit(void)
+{
+ if (hwpoison_dir)
+ debugfs_remove_recursive(hwpoison_dir);
+}
+
+static int pfn_inject_init(void)
+{
+ hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
+ if (hwpoison_dir == NULL)
+ return -ENOMEM;
+ corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
+ NULL, &hwpoison_fops);
+ if (corrupt_pfn == NULL) {
+ pfn_inject_exit();
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+module_init(pfn_inject_init);
+module_exit(pfn_inject_exit);
+MODULE_LICENSE("GPL");
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
#include <linux/ksm.h>
#include <asm/tlbflush.h>
static unsigned long ksm_rmap_items;
/* Limit on the number of unswappable pages used */
-static unsigned long ksm_max_kernel_pages = 2000;
+static unsigned long ksm_max_kernel_pages;
/* Number of pages ksmd should scan in one batch */
-static unsigned int ksm_thread_pages_to_scan = 200;
+static unsigned int ksm_thread_pages_to_scan = 100;
/* Milliseconds ksmd should sleep between batches */
static unsigned int ksm_thread_sleep_millisecs = 20;
#define KSM_RUN_STOP 0
#define KSM_RUN_MERGE 1
#define KSM_RUN_UNMERGE 2
-static unsigned int ksm_run = KSM_RUN_MERGE;
+static unsigned int ksm_run = KSM_RUN_STOP;
static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
static DEFINE_MUTEX(ksm_thread_mutex);
sizeof(struct __struct), __alignof__(struct __struct),\
(__flags), NULL)
+static void __init ksm_init_max_kernel_pages(void)
+{
+ ksm_max_kernel_pages = nr_free_buffer_pages() / 4;
+}
+
static int __init ksm_slab_init(void)
{
rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
struct task_struct *ksm_thread;
int err;
+ ksm_init_max_kernel_pages();
+
err = ksm_slab_init();
if (err)
goto out;
return error;
}
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Error injection support for memory error handling.
+ */
+static int madvise_hwpoison(unsigned long start, unsigned long end)
+{
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ for (; start < end; start += PAGE_SIZE) {
+ struct page *p;
+ int ret = get_user_pages(current, current->mm, start, 1,
+ 0, 0, &p, NULL);
+ if (ret != 1)
+ return ret;
+ printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
+ page_to_pfn(p), start);
+ /* Ignore return value for now */
+ __memory_failure(page_to_pfn(p), 0, 1);
+ put_page(p);
+ }
+ return ret;
+}
+#endif
+
static long
madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
unsigned long start, unsigned long end, int behavior)
int write;
size_t len;
+#ifdef CONFIG_MEMORY_FAILURE
+ if (behavior == MADV_HWPOISON)
+ return madvise_hwpoison(start, start+len_in);
+#endif
if (!madvise_behavior_valid(behavior))
return error;
#include <linux/rcupdate.h>
#include <linux/limits.h>
#include <linux/mutex.h>
+#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/spinlock.h>
struct cgroup_subsys mem_cgroup_subsys __read_mostly;
#define MEM_CGROUP_RECLAIM_RETRIES 5
+struct mem_cgroup *root_mem_cgroup __read_mostly;
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
#endif
static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */
+#define SOFTLIMIT_EVENTS_THRESH (1000)
/*
* Statistics for memory cgroup.
MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */
MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */
MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */
+ MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */
+ MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
MEM_CGROUP_STAT_NSTATS,
};
struct mem_cgroup_stat_cpu cpustat[0];
};
+static inline void
+__mem_cgroup_stat_reset_safe(struct mem_cgroup_stat_cpu *stat,
+ enum mem_cgroup_stat_index idx)
+{
+ stat->count[idx] = 0;
+}
+
+static inline s64
+__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat,
+ enum mem_cgroup_stat_index idx)
+{
+ return stat->count[idx];
+}
+
/*
* For accounting under irq disable, no need for increment preempt count.
*/
unsigned long count[NR_LRU_LISTS];
struct zone_reclaim_stat reclaim_stat;
+ struct rb_node tree_node; /* RB tree node */
+ unsigned long long usage_in_excess;/* Set to the value by which */
+ /* the soft limit is exceeded*/
+ bool on_tree;
+ struct mem_cgroup *mem; /* Back pointer, we cannot */
+ /* use container_of */
};
/* Macro for accessing counter */
#define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)])
struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES];
};
+/*
+ * Cgroups above their limits are maintained in a RB-Tree, independent of
+ * their hierarchy representation
+ */
+
+struct mem_cgroup_tree_per_zone {
+ struct rb_root rb_root;
+ spinlock_t lock;
+};
+
+struct mem_cgroup_tree_per_node {
+ struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
+};
+
+struct mem_cgroup_tree {
+ struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
+};
+
+static struct mem_cgroup_tree soft_limit_tree __read_mostly;
+
/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
struct mem_cgroup_stat stat;
};
+/*
+ * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
+ * limit reclaim to prevent infinite loops, if they ever occur.
+ */
+#define MEM_CGROUP_MAX_RECLAIM_LOOPS (100)
+#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS (2)
+
enum charge_type {
MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
MEM_CGROUP_CHARGE_TYPE_MAPPED,
#define PCGF_CACHE (1UL << PCG_CACHE)
#define PCGF_USED (1UL << PCG_USED)
#define PCGF_LOCK (1UL << PCG_LOCK)
-static const unsigned long
-pcg_default_flags[NR_CHARGE_TYPE] = {
- PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */
- PCGF_USED | PCGF_LOCK, /* Anon */
- PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
- 0, /* FORCE */
-};
+/* Not used, but added here for completeness */
+#define PCGF_ACCT (1UL << PCG_ACCT)
/* for encoding cft->private value on file */
#define _MEM (0)
#define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff)
#define MEMFILE_ATTR(val) ((val) & 0xffff)
+/*
+ * Reclaim flags for mem_cgroup_hierarchical_reclaim
+ */
+#define MEM_CGROUP_RECLAIM_NOSWAP_BIT 0x0
+#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
+#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1
+#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
+#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2
+#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
+
static void mem_cgroup_get(struct mem_cgroup *mem);
static void mem_cgroup_put(struct mem_cgroup *mem);
static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
+static struct mem_cgroup_per_zone *
+mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
+{
+ return &mem->info.nodeinfo[nid]->zoneinfo[zid];
+}
+
+static struct mem_cgroup_per_zone *
+page_cgroup_zoneinfo(struct page_cgroup *pc)
+{
+ struct mem_cgroup *mem = pc->mem_cgroup;
+ int nid = page_cgroup_nid(pc);
+ int zid = page_cgroup_zid(pc);
+
+ if (!mem)
+ return NULL;
+
+ return mem_cgroup_zoneinfo(mem, nid, zid);
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_node_zone(int nid, int zid)
+{
+ return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_from_page(struct page *page)
+{
+ int nid = page_to_nid(page);
+ int zid = page_zonenum(page);
+
+ return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static void
+__mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+ struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
+{
+ struct rb_node **p = &mctz->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct mem_cgroup_per_zone *mz_node;
+
+ if (mz->on_tree)
+ return;
+
+ mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res);
+ while (*p) {
+ parent = *p;
+ mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
+ tree_node);
+ if (mz->usage_in_excess < mz_node->usage_in_excess)
+ p = &(*p)->rb_left;
+ /*
+ * We can't avoid mem cgroups that are over their soft
+ * limit by the same amount
+ */
+ else if (mz->usage_in_excess >= mz_node->usage_in_excess)
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&mz->tree_node, parent, p);
+ rb_insert_color(&mz->tree_node, &mctz->rb_root);
+ mz->on_tree = true;
+}
+
+static void
+__mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+ struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
+{
+ if (!mz->on_tree)
+ return;
+ rb_erase(&mz->tree_node, &mctz->rb_root);
+ mz->on_tree = false;
+}
+
+static void
+mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+ struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
+{
+ spin_lock(&mctz->lock);
+ __mem_cgroup_insert_exceeded(mem, mz, mctz);
+ spin_unlock(&mctz->lock);
+}
+
+static void
+mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+ struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
+{
+ spin_lock(&mctz->lock);
+ __mem_cgroup_remove_exceeded(mem, mz, mctz);
+ spin_unlock(&mctz->lock);
+}
+
+static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
+{
+ bool ret = false;
+ int cpu;
+ s64 val;
+ struct mem_cgroup_stat_cpu *cpustat;
+
+ cpu = get_cpu();
+ cpustat = &mem->stat.cpustat[cpu];
+ val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_EVENTS);
+ if (unlikely(val > SOFTLIMIT_EVENTS_THRESH)) {
+ __mem_cgroup_stat_reset_safe(cpustat, MEM_CGROUP_STAT_EVENTS);
+ ret = true;
+ }
+ put_cpu();
+ return ret;
+}
+
+static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
+{
+ unsigned long long prev_usage_in_excess, new_usage_in_excess;
+ bool updated_tree = false;
+ struct mem_cgroup_per_zone *mz;
+ struct mem_cgroup_tree_per_zone *mctz;
+
+ mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page));
+ mctz = soft_limit_tree_from_page(page);
+
+ /*
+ * We do updates in lazy mode, mem's are removed
+ * lazily from the per-zone, per-node rb tree
+ */
+ prev_usage_in_excess = mz->usage_in_excess;
+
+ new_usage_in_excess = res_counter_soft_limit_excess(&mem->res);
+ if (prev_usage_in_excess) {
+ mem_cgroup_remove_exceeded(mem, mz, mctz);
+ updated_tree = true;
+ }
+ if (!new_usage_in_excess)
+ goto done;
+ mem_cgroup_insert_exceeded(mem, mz, mctz);
+
+done:
+ if (updated_tree) {
+ spin_lock(&mctz->lock);
+ mz->usage_in_excess = new_usage_in_excess;
+ spin_unlock(&mctz->lock);
+ }
+}
+
+static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
+{
+ int node, zone;
+ struct mem_cgroup_per_zone *mz;
+ struct mem_cgroup_tree_per_zone *mctz;
+
+ for_each_node_state(node, N_POSSIBLE) {
+ for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+ mz = mem_cgroup_zoneinfo(mem, node, zone);
+ mctz = soft_limit_tree_node_zone(node, zone);
+ mem_cgroup_remove_exceeded(mem, mz, mctz);
+ }
+ }
+}
+
+static inline unsigned long mem_cgroup_get_excess(struct mem_cgroup *mem)
+{
+ return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT;
+}
+
+static struct mem_cgroup_per_zone *
+__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+ struct rb_node *rightmost = NULL;
+ struct mem_cgroup_per_zone *mz = NULL;
+
+retry:
+ rightmost = rb_last(&mctz->rb_root);
+ if (!rightmost)
+ goto done; /* Nothing to reclaim from */
+
+ mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
+ /*
+ * Remove the node now but someone else can add it back,
+ * we will to add it back at the end of reclaim to its correct
+ * position in the tree.
+ */
+ __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
+ if (!res_counter_soft_limit_excess(&mz->mem->res) ||
+ !css_tryget(&mz->mem->css))
+ goto retry;
+done:
+ return mz;
+}
+
+static struct mem_cgroup_per_zone *
+mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+ struct mem_cgroup_per_zone *mz;
+
+ spin_lock(&mctz->lock);
+ mz = __mem_cgroup_largest_soft_limit_node(mctz);
+ spin_unlock(&mctz->lock);
+ return mz;
+}
+
+static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
+ bool charge)
+{
+ int val = (charge) ? 1 : -1;
+ struct mem_cgroup_stat *stat = &mem->stat;
+ struct mem_cgroup_stat_cpu *cpustat;
+ int cpu = get_cpu();
+
+ cpustat = &stat->cpustat[cpu];
+ __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val);
+ put_cpu();
+}
+
static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
struct page_cgroup *pc,
bool charge)
{
- int val = (charge)? 1 : -1;
+ int val = (charge) ? 1 : -1;
struct mem_cgroup_stat *stat = &mem->stat;
struct mem_cgroup_stat_cpu *cpustat;
int cpu = get_cpu();
else
__mem_cgroup_stat_add_safe(cpustat,
MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
+ __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_EVENTS, 1);
put_cpu();
}
-static struct mem_cgroup_per_zone *
-mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
-{
- return &mem->info.nodeinfo[nid]->zoneinfo[zid];
-}
-
-static struct mem_cgroup_per_zone *
-page_cgroup_zoneinfo(struct page_cgroup *pc)
-{
- struct mem_cgroup *mem = pc->mem_cgroup;
- int nid = page_cgroup_nid(pc);
- int zid = page_cgroup_zid(pc);
-
- if (!mem)
- return NULL;
-
- return mem_cgroup_zoneinfo(mem, nid, zid);
-}
-
static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
enum lru_list idx)
{
return ret;
}
+static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
+{
+ return (mem == root_mem_cgroup);
+}
+
/*
* Following LRU functions are allowed to be used without PCG_LOCK.
* Operations are called by routine of global LRU independently from memcg.
void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
{
struct page_cgroup *pc;
- struct mem_cgroup *mem;
struct mem_cgroup_per_zone *mz;
if (mem_cgroup_disabled())
return;
pc = lookup_page_cgroup(page);
/* can happen while we handle swapcache. */
- if (list_empty(&pc->lru) || !pc->mem_cgroup)
+ if (!TestClearPageCgroupAcctLRU(pc))
return;
+ VM_BUG_ON(!pc->mem_cgroup);
/*
* We don't check PCG_USED bit. It's cleared when the "page" is finally
* removed from global LRU.
*/
mz = page_cgroup_zoneinfo(pc);
- mem = pc->mem_cgroup;
MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+ if (mem_cgroup_is_root(pc->mem_cgroup))
+ return;
+ VM_BUG_ON(list_empty(&pc->lru));
list_del_init(&pc->lru);
return;
}
* For making pc->mem_cgroup visible, insert smp_rmb() here.
*/
smp_rmb();
- /* unused page is not rotated. */
- if (!PageCgroupUsed(pc))
+ /* unused or root page is not rotated. */
+ if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup))
return;
mz = page_cgroup_zoneinfo(pc);
list_move(&pc->lru, &mz->lists[lru]);
if (mem_cgroup_disabled())
return;
pc = lookup_page_cgroup(page);
+ VM_BUG_ON(PageCgroupAcctLRU(pc));
/*
* Used bit is set without atomic ops but after smp_wmb().
* For making pc->mem_cgroup visible, insert smp_rmb() here.
mz = page_cgroup_zoneinfo(pc);
MEM_CGROUP_ZSTAT(mz, lru) += 1;
+ SetPageCgroupAcctLRU(pc);
+ if (mem_cgroup_is_root(pc->mem_cgroup))
+ return;
list_add(&pc->lru, &mz->lists[lru]);
}
spin_lock_irqsave(&zone->lru_lock, flags);
/* link when the page is linked to LRU but page_cgroup isn't */
- if (PageLRU(page) && list_empty(&pc->lru))
+ if (PageLRU(page) && !PageCgroupAcctLRU(pc))
mem_cgroup_add_lru_list(page, page_lru(page));
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
* If shrink==true, for avoiding to free too much, this returns immedieately.
*/
static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
- gfp_t gfp_mask, bool noswap, bool shrink)
+ struct zone *zone,
+ gfp_t gfp_mask,
+ unsigned long reclaim_options)
{
struct mem_cgroup *victim;
int ret, total = 0;
int loop = 0;
+ bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
+ bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
+ bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
+ unsigned long excess = mem_cgroup_get_excess(root_mem);
/* If memsw_is_minimum==1, swap-out is of-no-use. */
if (root_mem->memsw_is_minimum)
noswap = true;
- while (loop < 2) {
+ while (1) {
victim = mem_cgroup_select_victim(root_mem);
- if (victim == root_mem)
+ if (victim == root_mem) {
loop++;
+ if (loop >= 2) {
+ /*
+ * If we have not been able to reclaim
+ * anything, it might because there are
+ * no reclaimable pages under this hierarchy
+ */
+ if (!check_soft || !total) {
+ css_put(&victim->css);
+ break;
+ }
+ /*
+ * We want to do more targetted reclaim.
+ * excess >> 2 is not to excessive so as to
+ * reclaim too much, nor too less that we keep
+ * coming back to reclaim from this cgroup
+ */
+ if (total >= (excess >> 2) ||
+ (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) {
+ css_put(&victim->css);
+ break;
+ }
+ }
+ }
if (!mem_cgroup_local_usage(&victim->stat)) {
/* this cgroup's local usage == 0 */
css_put(&victim->css);
continue;
}
/* we use swappiness of local cgroup */
- ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap,
- get_swappiness(victim));
+ if (check_soft)
+ ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
+ noswap, get_swappiness(victim), zone,
+ zone->zone_pgdat->node_id);
+ else
+ ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
+ noswap, get_swappiness(victim));
css_put(&victim->css);
/*
* At shrinking usage, we can't check we should stop here or
if (shrink)
return ret;
total += ret;
- if (mem_cgroup_check_under_limit(root_mem))
+ if (check_soft) {
+ if (res_counter_check_under_soft_limit(&root_mem->res))
+ return total;
+ } else if (mem_cgroup_check_under_limit(root_mem))
return 1 + total;
}
return total;
*/
static int __mem_cgroup_try_charge(struct mm_struct *mm,
gfp_t gfp_mask, struct mem_cgroup **memcg,
- bool oom)
+ bool oom, struct page *page)
{
- struct mem_cgroup *mem, *mem_over_limit;
+ struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit;
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
- struct res_counter *fail_res;
+ struct res_counter *fail_res, *soft_fail_res = NULL;
if (unlikely(test_thread_flag(TIF_MEMDIE))) {
/* Don't account this! */
VM_BUG_ON(css_is_removed(&mem->css));
while (1) {
- int ret;
- bool noswap = false;
+ int ret = 0;
+ unsigned long flags = 0;
- ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
+ if (mem_cgroup_is_root(mem))
+ goto done;
+ ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
+ &soft_fail_res);
if (likely(!ret)) {
if (!do_swap_account)
break;
ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
- &fail_res);
+ &fail_res, NULL);
if (likely(!ret))
break;
/* mem+swap counter fails */
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- noswap = true;
+ res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+ flags |= MEM_CGROUP_RECLAIM_NOSWAP;
mem_over_limit = mem_cgroup_from_res_counter(fail_res,
memsw);
} else
if (!(gfp_mask & __GFP_WAIT))
goto nomem;
- ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
- noswap, false);
+ ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
+ gfp_mask, flags);
if (ret)
continue;
goto nomem;
}
}
+ /*
+ * Insert just the ancestor, we should trickle down to the correct
+ * cgroup for reclaim, since the other nodes will be below their
+ * soft limit
+ */
+ if (soft_fail_res) {
+ mem_over_soft_limit =
+ mem_cgroup_from_res_counter(soft_fail_res, res);
+ if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
+ mem_cgroup_update_tree(mem_over_soft_limit, page);
+ }
+done:
return 0;
nomem:
css_put(&mem->css);
return -ENOMEM;
}
-
/*
* A helper function to get mem_cgroup from ID. must be called under
* rcu_read_lock(). The caller must check css_is_removed() or some if
lock_page_cgroup(pc);
if (unlikely(PageCgroupUsed(pc))) {
unlock_page_cgroup(pc);
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- if (do_swap_account)
- res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(mem)) {
+ res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+ if (do_swap_account)
+ res_counter_uncharge(&mem->memsw, PAGE_SIZE,
+ NULL);
+ }
css_put(&mem->css);
return;
}
+
pc->mem_cgroup = mem;
+ /*
+ * We access a page_cgroup asynchronously without lock_page_cgroup().
+ * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
+ * is accessed after testing USED bit. To make pc->mem_cgroup visible
+ * before USED bit, we need memory barrier here.
+ * See mem_cgroup_add_lru_list(), etc.
+ */
smp_wmb();
- pc->flags = pcg_default_flags[ctype];
+ switch (ctype) {
+ case MEM_CGROUP_CHARGE_TYPE_CACHE:
+ case MEM_CGROUP_CHARGE_TYPE_SHMEM:
+ SetPageCgroupCache(pc);
+ SetPageCgroupUsed(pc);
+ break;
+ case MEM_CGROUP_CHARGE_TYPE_MAPPED:
+ ClearPageCgroupCache(pc);
+ SetPageCgroupUsed(pc);
+ break;
+ default:
+ break;
+ }
mem_cgroup_charge_statistics(mem, pc, true);
if (pc->mem_cgroup != from)
goto out;
- res_counter_uncharge(&from->res, PAGE_SIZE);
+ if (!mem_cgroup_is_root(from))
+ res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
mem_cgroup_charge_statistics(from, pc, false);
page = pc->page;
1);
}
- if (do_swap_account)
- res_counter_uncharge(&from->memsw, PAGE_SIZE);
+ if (do_swap_account && !mem_cgroup_is_root(from))
+ res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
css_put(&from->css);
css_get(&to->css);
parent = mem_cgroup_from_cont(pcg);
- ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
+ ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page);
if (ret || !parent)
return ret;
/* drop extra refcnt by try_charge() */
css_put(&parent->css);
/* uncharge if move fails */
- res_counter_uncharge(&parent->res, PAGE_SIZE);
- if (do_swap_account)
- res_counter_uncharge(&parent->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(parent)) {
+ res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
+ if (do_swap_account)
+ res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+ }
return ret;
}
prefetchw(pc);
mem = memcg;
- ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
+ ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page);
if (ret || !mem)
return ret;
if (!mem)
goto charge_cur_mm;
*ptr = mem;
- ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
+ ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page);
/* drop extra refcnt from tryget */
css_put(&mem->css);
return ret;
charge_cur_mm:
if (unlikely(!mm))
mm = &init_mm;
- return __mem_cgroup_try_charge(mm, mask, ptr, true);
+ return __mem_cgroup_try_charge(mm, mask, ptr, true, page);
}
static void
* This recorded memcg can be obsolete one. So, avoid
* calling css_tryget
*/
- res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(memcg))
+ res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
+ NULL);
+ mem_cgroup_swap_statistics(memcg, false);
mem_cgroup_put(memcg);
}
rcu_read_unlock();
return;
if (!mem)
return;
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- if (do_swap_account)
- res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(mem)) {
+ res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+ if (do_swap_account)
+ res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+ }
css_put(&mem->css);
}
struct page_cgroup *pc;
struct mem_cgroup *mem = NULL;
struct mem_cgroup_per_zone *mz;
+ bool soft_limit_excess = false;
if (mem_cgroup_disabled())
return NULL;
break;
}
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
- res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(mem)) {
+ res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
+ if (do_swap_account &&
+ (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
+ res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+ }
+ if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
+ mem_cgroup_swap_statistics(mem, true);
mem_cgroup_charge_statistics(mem, pc, false);
ClearPageCgroupUsed(pc);
mz = page_cgroup_zoneinfo(pc);
unlock_page_cgroup(pc);
+ if (soft_limit_excess && mem_cgroup_soft_limit_check(mem))
+ mem_cgroup_update_tree(mem, page);
/* at swapout, this memcg will be accessed to record to swap */
if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
css_put(&mem->css);
* We uncharge this because swap is freed.
* This memcg can be obsolete one. We avoid calling css_tryget
*/
- res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(memcg))
+ res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+ mem_cgroup_swap_statistics(memcg, false);
mem_cgroup_put(memcg);
}
rcu_read_unlock();
unlock_page_cgroup(pc);
if (mem) {
- ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
+ ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false,
+ page);
css_put(&mem->css);
}
*ptr = mem;
if (!ret)
break;
- progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
- false, true);
+ progress = mem_cgroup_hierarchical_reclaim(memcg, NULL,
+ GFP_KERNEL,
+ MEM_CGROUP_RECLAIM_SHRINK);
curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
if (!ret)
break;
- mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true);
+ mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
+ MEM_CGROUP_RECLAIM_NOSWAP |
+ MEM_CGROUP_RECLAIM_SHRINK);
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
return ret;
}
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+ gfp_t gfp_mask, int nid,
+ int zid)
+{
+ unsigned long nr_reclaimed = 0;
+ struct mem_cgroup_per_zone *mz, *next_mz = NULL;
+ unsigned long reclaimed;
+ int loop = 0;
+ struct mem_cgroup_tree_per_zone *mctz;
+
+ if (order > 0)
+ return 0;
+
+ mctz = soft_limit_tree_node_zone(nid, zid);
+ /*
+ * This loop can run a while, specially if mem_cgroup's continuously
+ * keep exceeding their soft limit and putting the system under
+ * pressure
+ */
+ do {
+ if (next_mz)
+ mz = next_mz;
+ else
+ mz = mem_cgroup_largest_soft_limit_node(mctz);
+ if (!mz)
+ break;
+
+ reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
+ gfp_mask,
+ MEM_CGROUP_RECLAIM_SOFT);
+ nr_reclaimed += reclaimed;
+ spin_lock(&mctz->lock);
+
+ /*
+ * If we failed to reclaim anything from this memory cgroup
+ * it is time to move on to the next cgroup
+ */
+ next_mz = NULL;
+ if (!reclaimed) {
+ do {
+ /*
+ * Loop until we find yet another one.
+ *
+ * By the time we get the soft_limit lock
+ * again, someone might have aded the
+ * group back on the RB tree. Iterate to
+ * make sure we get a different mem.
+ * mem_cgroup_largest_soft_limit_node returns
+ * NULL if no other cgroup is present on
+ * the tree
+ */
+ next_mz =
+ __mem_cgroup_largest_soft_limit_node(mctz);
+ if (next_mz == mz) {
+ css_put(&next_mz->mem->css);
+ next_mz = NULL;
+ } else /* next_mz == NULL or other memcg */
+ break;
+ } while (1);
+ }
+ mz->usage_in_excess =
+ res_counter_soft_limit_excess(&mz->mem->res);
+ __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
+ /*
+ * One school of thought says that we should not add
+ * back the node to the tree if reclaim returns 0.
+ * But our reclaim could return 0, simply because due
+ * to priority we are exposing a smaller subset of
+ * memory to reclaim from. Consider this as a longer
+ * term TODO.
+ */
+ if (mz->usage_in_excess)
+ __mem_cgroup_insert_exceeded(mz->mem, mz, mctz);
+ spin_unlock(&mctz->lock);
+ css_put(&mz->mem->css);
+ loop++;
+ /*
+ * Could not reclaim anything and there are no more
+ * mem cgroups to try or we seem to be looping without
+ * reclaiming anything.
+ */
+ if (!nr_reclaimed &&
+ (next_mz == NULL ||
+ loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
+ break;
+ } while (!nr_reclaimed);
+ if (next_mz)
+ css_put(&next_mz->mem->css);
+ return nr_reclaimed;
+}
+
/*
* This routine traverse page_cgroup in given list and drop them all.
* *And* this routine doesn't reclaim page itself, just removes page_cgroup.
return retval;
}
+struct mem_cgroup_idx_data {
+ s64 val;
+ enum mem_cgroup_stat_index idx;
+};
+
+static int
+mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data)
+{
+ struct mem_cgroup_idx_data *d = data;
+ d->val += mem_cgroup_read_stat(&mem->stat, d->idx);
+ return 0;
+}
+
+static void
+mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem,
+ enum mem_cgroup_stat_index idx, s64 *val)
+{
+ struct mem_cgroup_idx_data d;
+ d.idx = idx;
+ d.val = 0;
+ mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat);
+ *val = d.val;
+}
+
static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
{
struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
- u64 val = 0;
+ u64 idx_val, val;
int type, name;
type = MEMFILE_TYPE(cft->private);
name = MEMFILE_ATTR(cft->private);
switch (type) {
case _MEM:
- val = res_counter_read_u64(&mem->res, name);
+ if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+ mem_cgroup_get_recursive_idx_stat(mem,
+ MEM_CGROUP_STAT_CACHE, &idx_val);
+ val = idx_val;
+ mem_cgroup_get_recursive_idx_stat(mem,
+ MEM_CGROUP_STAT_RSS, &idx_val);
+ val += idx_val;
+ val <<= PAGE_SHIFT;
+ } else
+ val = res_counter_read_u64(&mem->res, name);
break;
case _MEMSWAP:
- val = res_counter_read_u64(&mem->memsw, name);
+ if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+ mem_cgroup_get_recursive_idx_stat(mem,
+ MEM_CGROUP_STAT_CACHE, &idx_val);
+ val = idx_val;
+ mem_cgroup_get_recursive_idx_stat(mem,
+ MEM_CGROUP_STAT_RSS, &idx_val);
+ val += idx_val;
+ mem_cgroup_get_recursive_idx_stat(mem,
+ MEM_CGROUP_STAT_SWAPOUT, &idx_val);
+ val <<= PAGE_SHIFT;
+ } else
+ val = res_counter_read_u64(&mem->memsw, name);
break;
default:
BUG();
name = MEMFILE_ATTR(cft->private);
switch (name) {
case RES_LIMIT:
+ if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
+ ret = -EINVAL;
+ break;
+ }
/* This function does all necessary parse...reuse it */
ret = res_counter_memparse_write_strategy(buffer, &val);
if (ret)
else
ret = mem_cgroup_resize_memsw_limit(memcg, val);
break;
+ case RES_SOFT_LIMIT:
+ ret = res_counter_memparse_write_strategy(buffer, &val);
+ if (ret)
+ break;
+ /*
+ * For memsw, soft limits are hard to implement in terms
+ * of semantics, for now, we support soft limits for
+ * control without swap
+ */
+ if (type == _MEM)
+ ret = res_counter_set_soft_limit(&memcg->res, val);
+ else
+ ret = -EINVAL;
+ break;
default:
ret = -EINVAL; /* should be BUG() ? */
break;
res_counter_reset_failcnt(&mem->memsw);
break;
}
+
return 0;
}
MCS_MAPPED_FILE,
MCS_PGPGIN,
MCS_PGPGOUT,
+ MCS_SWAP,
MCS_INACTIVE_ANON,
MCS_ACTIVE_ANON,
MCS_INACTIVE_FILE,
{"mapped_file", "total_mapped_file"},
{"pgpgin", "total_pgpgin"},
{"pgpgout", "total_pgpgout"},
+ {"swap", "total_swap"},
{"inactive_anon", "total_inactive_anon"},
{"active_anon", "total_active_anon"},
{"inactive_file", "total_inactive_file"},
s->stat[MCS_PGPGIN] += val;
val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
s->stat[MCS_PGPGOUT] += val;
+ if (do_swap_account) {
+ val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT);
+ s->stat[MCS_SWAP] += val * PAGE_SIZE;
+ }
/* per zone stat */
val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);
memset(&mystat, 0, sizeof(mystat));
mem_cgroup_get_local_stat(mem_cont, &mystat);
- for (i = 0; i < NR_MCS_STAT; i++)
+ for (i = 0; i < NR_MCS_STAT; i++) {
+ if (i == MCS_SWAP && !do_swap_account)
+ continue;
cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]);
+ }
/* Hierarchical information */
{
memset(&mystat, 0, sizeof(mystat));
mem_cgroup_get_total_stat(mem_cont, &mystat);
- for (i = 0; i < NR_MCS_STAT; i++)
+ for (i = 0; i < NR_MCS_STAT; i++) {
+ if (i == MCS_SWAP && !do_swap_account)
+ continue;
cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]);
-
+ }
#ifdef CONFIG_DEBUG_VM
cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
.write_string = mem_cgroup_write,
.read_u64 = mem_cgroup_read,
},
+ {
+ .name = "soft_limit_in_bytes",
+ .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
+ .write_string = mem_cgroup_write,
+ .read_u64 = mem_cgroup_read,
+ },
{
.name = "failcnt",
.private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
mz = &pn->zoneinfo[zone];
for_each_lru(l)
INIT_LIST_HEAD(&mz->lists[l]);
+ mz->usage_in_excess = 0;
+ mz->on_tree = false;
+ mz->mem = mem;
}
return 0;
}
{
int node;
+ mem_cgroup_remove_from_trees(mem);
free_css_id(&mem_cgroup_subsys, &mem->css);
for_each_node_state(node, N_POSSIBLE)
}
#endif
+static int mem_cgroup_soft_limit_tree_init(void)
+{
+ struct mem_cgroup_tree_per_node *rtpn;
+ struct mem_cgroup_tree_per_zone *rtpz;
+ int tmp, node, zone;
+
+ for_each_node_state(node, N_POSSIBLE) {
+ tmp = node;
+ if (!node_state(node, N_NORMAL_MEMORY))
+ tmp = -1;
+ rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
+ if (!rtpn)
+ return 1;
+
+ soft_limit_tree.rb_tree_per_node[node] = rtpn;
+
+ for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+ rtpz = &rtpn->rb_tree_per_zone[zone];
+ rtpz->rb_root = RB_ROOT;
+ spin_lock_init(&rtpz->lock);
+ }
+ }
+ return 0;
+}
+
static struct cgroup_subsys_state * __ref
mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
{
for_each_node_state(node, N_POSSIBLE)
if (alloc_mem_cgroup_per_zone_info(mem, node))
goto free_out;
+
/* root ? */
if (cont->parent == NULL) {
enable_swap_cgroup();
parent = NULL;
+ root_mem_cgroup = mem;
+ if (mem_cgroup_soft_limit_tree_init())
+ goto free_out;
+
} else {
parent = mem_cgroup_from_cont(cont->parent);
mem->use_hierarchy = parent->use_hierarchy;
return &mem->css;
free_out:
__mem_cgroup_free(mem);
+ root_mem_cgroup = NULL;
return ERR_PTR(error);
}
static void mem_cgroup_move_task(struct cgroup_subsys *ss,
struct cgroup *cont,
struct cgroup *old_cont,
- struct task_struct *p)
+ struct task_struct *p,
+ bool threadgroup)
{
mutex_lock(&memcg_tasklist);
/*
--- /dev/null
+/*
+ * Copyright (C) 2008, 2009 Intel Corporation
+ * Authors: Andi Kleen, Fengguang Wu
+ *
+ * This software may be redistributed and/or modified under the terms of
+ * the GNU General Public License ("GPL") version 2 only as published by the
+ * Free Software Foundation.
+ *
+ * High level machine check handler. Handles pages reported by the
+ * hardware as being corrupted usually due to a 2bit ECC memory or cache
+ * failure.
+ *
+ * Handles page cache pages in various states. The tricky part
+ * here is that we can access any page asynchronous to other VM
+ * users, because memory failures could happen anytime and anywhere,
+ * possibly violating some of their assumptions. This is why this code
+ * has to be extremely careful. Generally it tries to use normal locking
+ * rules, as in get the standard locks, even if that means the
+ * error handling takes potentially a long time.
+ *
+ * The operation to map back from RMAP chains to processes has to walk
+ * the complete process list and has non linear complexity with the number
+ * mappings. In short it can be quite slow. But since memory corruptions
+ * are rare we hope to get away with this.
+ */
+
+/*
+ * Notebook:
+ * - hugetlb needs more code
+ * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages
+ * - pass bad pages to kdump next kernel
+ */
+#define DEBUG 1 /* remove me in 2.6.34 */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/page-flags.h>
+#include <linux/sched.h>
+#include <linux/rmap.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/backing-dev.h>
+#include "internal.h"
+
+int sysctl_memory_failure_early_kill __read_mostly = 0;
+
+int sysctl_memory_failure_recovery __read_mostly = 1;
+
+atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0);
+
+/*
+ * Send all the processes who have the page mapped an ``action optional''
+ * signal.
+ */
+static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
+ unsigned long pfn)
+{
+ struct siginfo si;
+ int ret;
+
+ printk(KERN_ERR
+ "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n",
+ pfn, t->comm, t->pid);
+ si.si_signo = SIGBUS;
+ si.si_errno = 0;
+ si.si_code = BUS_MCEERR_AO;
+ si.si_addr = (void *)addr;
+#ifdef __ARCH_SI_TRAPNO
+ si.si_trapno = trapno;
+#endif
+ si.si_addr_lsb = PAGE_SHIFT;
+ /*
+ * Don't use force here, it's convenient if the signal
+ * can be temporarily blocked.
+ * This could cause a loop when the user sets SIGBUS
+ * to SIG_IGN, but hopefully noone will do that?
+ */
+ ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
+ if (ret < 0)
+ printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
+ t->comm, t->pid, ret);
+ return ret;
+}
+
+/*
+ * Kill all processes that have a poisoned page mapped and then isolate
+ * the page.
+ *
+ * General strategy:
+ * Find all processes having the page mapped and kill them.
+ * But we keep a page reference around so that the page is not
+ * actually freed yet.
+ * Then stash the page away
+ *
+ * There's no convenient way to get back to mapped processes
+ * from the VMAs. So do a brute-force search over all
+ * running processes.
+ *
+ * Remember that machine checks are not common (or rather
+ * if they are common you have other problems), so this shouldn't
+ * be a performance issue.
+ *
+ * Also there are some races possible while we get from the
+ * error detection to actually handle it.
+ */
+
+struct to_kill {
+ struct list_head nd;
+ struct task_struct *tsk;
+ unsigned long addr;
+ unsigned addr_valid:1;
+};
+
+/*
+ * Failure handling: if we can't find or can't kill a process there's
+ * not much we can do. We just print a message and ignore otherwise.
+ */
+
+/*
+ * Schedule a process for later kill.
+ * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
+ * TBD would GFP_NOIO be enough?
+ */
+static void add_to_kill(struct task_struct *tsk, struct page *p,
+ struct vm_area_struct *vma,
+ struct list_head *to_kill,
+ struct to_kill **tkc)
+{
+ struct to_kill *tk;
+
+ if (*tkc) {
+ tk = *tkc;
+ *tkc = NULL;
+ } else {
+ tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
+ if (!tk) {
+ printk(KERN_ERR
+ "MCE: Out of memory while machine check handling\n");
+ return;
+ }
+ }
+ tk->addr = page_address_in_vma(p, vma);
+ tk->addr_valid = 1;
+
+ /*
+ * In theory we don't have to kill when the page was
+ * munmaped. But it could be also a mremap. Since that's
+ * likely very rare kill anyways just out of paranoia, but use
+ * a SIGKILL because the error is not contained anymore.
+ */
+ if (tk->addr == -EFAULT) {
+ pr_debug("MCE: Unable to find user space address %lx in %s\n",
+ page_to_pfn(p), tsk->comm);
+ tk->addr_valid = 0;
+ }
+ get_task_struct(tsk);
+ tk->tsk = tsk;
+ list_add_tail(&tk->nd, to_kill);
+}
+
+/*
+ * Kill the processes that have been collected earlier.
+ *
+ * Only do anything when DOIT is set, otherwise just free the list
+ * (this is used for clean pages which do not need killing)
+ * Also when FAIL is set do a force kill because something went
+ * wrong earlier.
+ */
+static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
+ int fail, unsigned long pfn)
+{
+ struct to_kill *tk, *next;
+
+ list_for_each_entry_safe (tk, next, to_kill, nd) {
+ if (doit) {
+ /*
+ * In case something went wrong with munmaping
+ * make sure the process doesn't catch the
+ * signal and then access the memory. Just kill it.
+ * the signal handlers
+ */
+ if (fail || tk->addr_valid == 0) {
+ printk(KERN_ERR
+ "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
+ pfn, tk->tsk->comm, tk->tsk->pid);
+ force_sig(SIGKILL, tk->tsk);
+ }
+
+ /*
+ * In theory the process could have mapped
+ * something else on the address in-between. We could
+ * check for that, but we need to tell the
+ * process anyways.
+ */
+ else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
+ pfn) < 0)
+ printk(KERN_ERR
+ "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
+ pfn, tk->tsk->comm, tk->tsk->pid);
+ }
+ put_task_struct(tk->tsk);
+ kfree(tk);
+ }
+}
+
+static int task_early_kill(struct task_struct *tsk)
+{
+ if (!tsk->mm)
+ return 0;
+ if (tsk->flags & PF_MCE_PROCESS)
+ return !!(tsk->flags & PF_MCE_EARLY);
+ return sysctl_memory_failure_early_kill;
+}
+
+/*
+ * Collect processes when the error hit an anonymous page.
+ */
+static void collect_procs_anon(struct page *page, struct list_head *to_kill,
+ struct to_kill **tkc)
+{
+ struct vm_area_struct *vma;
+ struct task_struct *tsk;
+ struct anon_vma *av;
+
+ read_lock(&tasklist_lock);
+ av = page_lock_anon_vma(page);
+ if (av == NULL) /* Not actually mapped anymore */
+ goto out;
+ for_each_process (tsk) {
+ if (!task_early_kill(tsk))
+ continue;
+ list_for_each_entry (vma, &av->head, anon_vma_node) {
+ if (!page_mapped_in_vma(page, vma))
+ continue;
+ if (vma->vm_mm == tsk->mm)
+ add_to_kill(tsk, page, vma, to_kill, tkc);
+ }
+ }
+ page_unlock_anon_vma(av);
+out:
+ read_unlock(&tasklist_lock);
+}
+
+/*
+ * Collect processes when the error hit a file mapped page.
+ */
+static void collect_procs_file(struct page *page, struct list_head *to_kill,
+ struct to_kill **tkc)
+{
+ struct vm_area_struct *vma;
+ struct task_struct *tsk;
+ struct prio_tree_iter iter;
+ struct address_space *mapping = page->mapping;
+
+ /*
+ * A note on the locking order between the two locks.
+ * We don't rely on this particular order.
+ * If you have some other code that needs a different order
+ * feel free to switch them around. Or add a reverse link
+ * from mm_struct to task_struct, then this could be all
+ * done without taking tasklist_lock and looping over all tasks.
+ */
+
+ read_lock(&tasklist_lock);
+ spin_lock(&mapping->i_mmap_lock);
+ for_each_process(tsk) {
+ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+ if (!task_early_kill(tsk))
+ continue;
+
+ vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff,
+ pgoff) {
+ /*
+ * Send early kill signal to tasks where a vma covers
+ * the page but the corrupted page is not necessarily
+ * mapped it in its pte.
+ * Assume applications who requested early kill want
+ * to be informed of all such data corruptions.
+ */
+ if (vma->vm_mm == tsk->mm)
+ add_to_kill(tsk, page, vma, to_kill, tkc);
+ }
+ }
+ spin_unlock(&mapping->i_mmap_lock);
+ read_unlock(&tasklist_lock);
+}
+
+/*
+ * Collect the processes who have the corrupted page mapped to kill.
+ * This is done in two steps for locking reasons.
+ * First preallocate one tokill structure outside the spin locks,
+ * so that we can kill at least one process reasonably reliable.
+ */
+static void collect_procs(struct page *page, struct list_head *tokill)
+{
+ struct to_kill *tk;
+
+ if (!page->mapping)
+ return;
+
+ tk = kmalloc(sizeof(struct to_kill), GFP_NOIO);
+ if (!tk)
+ return;
+ if (PageAnon(page))
+ collect_procs_anon(page, tokill, &tk);
+ else
+ collect_procs_file(page, tokill, &tk);
+ kfree(tk);
+}
+
+/*
+ * Error handlers for various types of pages.
+ */
+
+enum outcome {
+ FAILED, /* Error handling failed */
+ DELAYED, /* Will be handled later */
+ IGNORED, /* Error safely ignored */
+ RECOVERED, /* Successfully recovered */
+};
+
+static const char *action_name[] = {
+ [FAILED] = "Failed",
+ [DELAYED] = "Delayed",
+ [IGNORED] = "Ignored",
+ [RECOVERED] = "Recovered",
+};
+
+/*
+ * Error hit kernel page.
+ * Do nothing, try to be lucky and not touch this instead. For a few cases we
+ * could be more sophisticated.
+ */
+static int me_kernel(struct page *p, unsigned long pfn)
+{
+ return DELAYED;
+}
+
+/*
+ * Already poisoned page.
+ */
+static int me_ignore(struct page *p, unsigned long pfn)
+{
+ return IGNORED;
+}
+
+/*
+ * Page in unknown state. Do nothing.
+ */
+static int me_unknown(struct page *p, unsigned long pfn)
+{
+ printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn);
+ return FAILED;
+}
+
+/*
+ * Free memory
+ */
+static int me_free(struct page *p, unsigned long pfn)
+{
+ return DELAYED;
+}
+
+/*
+ * Clean (or cleaned) page cache page.
+ */
+static int me_pagecache_clean(struct page *p, unsigned long pfn)
+{
+ int err;
+ int ret = FAILED;
+ struct address_space *mapping;
+
+ if (!isolate_lru_page(p))
+ page_cache_release(p);
+
+ /*
+ * For anonymous pages we're done the only reference left
+ * should be the one m_f() holds.
+ */
+ if (PageAnon(p))
+ return RECOVERED;
+
+ /*
+ * Now truncate the page in the page cache. This is really
+ * more like a "temporary hole punch"
+ * Don't do this for block devices when someone else
+ * has a reference, because it could be file system metadata
+ * and that's not safe to truncate.
+ */
+ mapping = page_mapping(p);
+ if (!mapping) {
+ /*
+ * Page has been teared down in the meanwhile
+ */
+ return FAILED;
+ }
+
+ /*
+ * Truncation is a bit tricky. Enable it per file system for now.
+ *
+ * Open: to take i_mutex or not for this? Right now we don't.
+ */
+ if (mapping->a_ops->error_remove_page) {
+ err = mapping->a_ops->error_remove_page(mapping, p);
+ if (err != 0) {
+ printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n",
+ pfn, err);
+ } else if (page_has_private(p) &&
+ !try_to_release_page(p, GFP_NOIO)) {
+ pr_debug("MCE %#lx: failed to release buffers\n", pfn);
+ } else {
+ ret = RECOVERED;
+ }
+ } else {
+ /*
+ * If the file system doesn't support it just invalidate
+ * This fails on dirty or anything with private pages
+ */
+ if (invalidate_inode_page(p))
+ ret = RECOVERED;
+ else
+ printk(KERN_INFO "MCE %#lx: Failed to invalidate\n",
+ pfn);
+ }
+ return ret;
+}
+
+/*
+ * Dirty cache page page
+ * Issues: when the error hit a hole page the error is not properly
+ * propagated.
+ */
+static int me_pagecache_dirty(struct page *p, unsigned long pfn)
+{
+ struct address_space *mapping = page_mapping(p);
+
+ SetPageError(p);
+ /* TBD: print more information about the file. */
+ if (mapping) {
+ /*
+ * IO error will be reported by write(), fsync(), etc.
+ * who check the mapping.
+ * This way the application knows that something went
+ * wrong with its dirty file data.
+ *
+ * There's one open issue:
+ *
+ * The EIO will be only reported on the next IO
+ * operation and then cleared through the IO map.
+ * Normally Linux has two mechanisms to pass IO error
+ * first through the AS_EIO flag in the address space
+ * and then through the PageError flag in the page.
+ * Since we drop pages on memory failure handling the
+ * only mechanism open to use is through AS_AIO.
+ *
+ * This has the disadvantage that it gets cleared on
+ * the first operation that returns an error, while
+ * the PageError bit is more sticky and only cleared
+ * when the page is reread or dropped. If an
+ * application assumes it will always get error on
+ * fsync, but does other operations on the fd before
+ * and the page is dropped inbetween then the error
+ * will not be properly reported.
+ *
+ * This can already happen even without hwpoisoned
+ * pages: first on metadata IO errors (which only
+ * report through AS_EIO) or when the page is dropped
+ * at the wrong time.
+ *
+ * So right now we assume that the application DTRT on
+ * the first EIO, but we're not worse than other parts
+ * of the kernel.
+ */
+ mapping_set_error(mapping, EIO);
+ }
+
+ return me_pagecache_clean(p, pfn);
+}
+
+/*
+ * Clean and dirty swap cache.
+ *
+ * Dirty swap cache page is tricky to handle. The page could live both in page
+ * cache and swap cache(ie. page is freshly swapped in). So it could be
+ * referenced concurrently by 2 types of PTEs:
+ * normal PTEs and swap PTEs. We try to handle them consistently by calling
+ * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs,
+ * and then
+ * - clear dirty bit to prevent IO
+ * - remove from LRU
+ * - but keep in the swap cache, so that when we return to it on
+ * a later page fault, we know the application is accessing
+ * corrupted data and shall be killed (we installed simple
+ * interception code in do_swap_page to catch it).
+ *
+ * Clean swap cache pages can be directly isolated. A later page fault will
+ * bring in the known good data from disk.
+ */
+static int me_swapcache_dirty(struct page *p, unsigned long pfn)
+{
+ int ret = FAILED;
+
+ ClearPageDirty(p);
+ /* Trigger EIO in shmem: */
+ ClearPageUptodate(p);
+
+ if (!isolate_lru_page(p)) {
+ page_cache_release(p);
+ ret = DELAYED;
+ }
+
+ return ret;
+}
+
+static int me_swapcache_clean(struct page *p, unsigned long pfn)
+{
+ int ret = FAILED;
+
+ if (!isolate_lru_page(p)) {
+ page_cache_release(p);
+ ret = RECOVERED;
+ }
+ delete_from_swap_cache(p);
+ return ret;
+}
+
+/*
+ * Huge pages. Needs work.
+ * Issues:
+ * No rmap support so we cannot find the original mapper. In theory could walk
+ * all MMs and look for the mappings, but that would be non atomic and racy.
+ * Need rmap for hugepages for this. Alternatively we could employ a heuristic,
+ * like just walking the current process and hoping it has it mapped (that
+ * should be usually true for the common "shared database cache" case)
+ * Should handle free huge pages and dequeue them too, but this needs to
+ * handle huge page accounting correctly.
+ */
+static int me_huge_page(struct page *p, unsigned long pfn)
+{
+ return FAILED;
+}
+
+/*
+ * Various page states we can handle.
+ *
+ * A page state is defined by its current page->flags bits.
+ * The table matches them in order and calls the right handler.
+ *
+ * This is quite tricky because we can access page at any time
+ * in its live cycle, so all accesses have to be extremly careful.
+ *
+ * This is not complete. More states could be added.
+ * For any missing state don't attempt recovery.
+ */
+
+#define dirty (1UL << PG_dirty)
+#define sc (1UL << PG_swapcache)
+#define unevict (1UL << PG_unevictable)
+#define mlock (1UL << PG_mlocked)
+#define writeback (1UL << PG_writeback)
+#define lru (1UL << PG_lru)
+#define swapbacked (1UL << PG_swapbacked)
+#define head (1UL << PG_head)
+#define tail (1UL << PG_tail)
+#define compound (1UL << PG_compound)
+#define slab (1UL << PG_slab)
+#define buddy (1UL << PG_buddy)
+#define reserved (1UL << PG_reserved)
+
+static struct page_state {
+ unsigned long mask;
+ unsigned long res;
+ char *msg;
+ int (*action)(struct page *p, unsigned long pfn);
+} error_states[] = {
+ { reserved, reserved, "reserved kernel", me_ignore },
+ { buddy, buddy, "free kernel", me_free },
+
+ /*
+ * Could in theory check if slab page is free or if we can drop
+ * currently unused objects without touching them. But just
+ * treat it as standard kernel for now.
+ */
+ { slab, slab, "kernel slab", me_kernel },
+
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+ { head, head, "huge", me_huge_page },
+ { tail, tail, "huge", me_huge_page },
+#else
+ { compound, compound, "huge", me_huge_page },
+#endif
+
+ { sc|dirty, sc|dirty, "swapcache", me_swapcache_dirty },
+ { sc|dirty, sc, "swapcache", me_swapcache_clean },
+
+ { unevict|dirty, unevict|dirty, "unevictable LRU", me_pagecache_dirty},
+ { unevict, unevict, "unevictable LRU", me_pagecache_clean},
+
+#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
+ { mlock|dirty, mlock|dirty, "mlocked LRU", me_pagecache_dirty },
+ { mlock, mlock, "mlocked LRU", me_pagecache_clean },
+#endif
+
+ { lru|dirty, lru|dirty, "LRU", me_pagecache_dirty },
+ { lru|dirty, lru, "clean LRU", me_pagecache_clean },
+ { swapbacked, swapbacked, "anonymous", me_pagecache_clean },
+
+ /*
+ * Catchall entry: must be at end.
+ */
+ { 0, 0, "unknown page state", me_unknown },
+};
+
+#undef lru
+
+static void action_result(unsigned long pfn, char *msg, int result)
+{
+ struct page *page = NULL;
+ if (pfn_valid(pfn))
+ page = pfn_to_page(pfn);
+
+ printk(KERN_ERR "MCE %#lx: %s%s page recovery: %s\n",
+ pfn,
+ page && PageDirty(page) ? "dirty " : "",
+ msg, action_name[result]);
+}
+
+static int page_action(struct page_state *ps, struct page *p,
+ unsigned long pfn, int ref)
+{
+ int result;
+
+ result = ps->action(p, pfn);
+ action_result(pfn, ps->msg, result);
+ if (page_count(p) != 1 + ref)
+ printk(KERN_ERR
+ "MCE %#lx: %s page still referenced by %d users\n",
+ pfn, ps->msg, page_count(p) - 1);
+
+ /* Could do more checks here if page looks ok */
+ /*
+ * Could adjust zone counters here to correct for the missing page.
+ */
+
+ return result == RECOVERED ? 0 : -EBUSY;
+}
+
+#define N_UNMAP_TRIES 5
+
+/*
+ * Do all that is necessary to remove user space mappings. Unmap
+ * the pages and send SIGBUS to the processes if the data was dirty.
+ */
+static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
+ int trapno)
+{
+ enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+ struct address_space *mapping;
+ LIST_HEAD(tokill);
+ int ret;
+ int i;
+ int kill = 1;
+
+ if (PageReserved(p) || PageCompound(p) || PageSlab(p))
+ return;
+
+ if (!PageLRU(p))
+ lru_add_drain_all();
+
+ /*
+ * This check implies we don't kill processes if their pages
+ * are in the swap cache early. Those are always late kills.
+ */
+ if (!page_mapped(p))
+ return;
+
+ if (PageSwapCache(p)) {
+ printk(KERN_ERR
+ "MCE %#lx: keeping poisoned page in swap cache\n", pfn);
+ ttu |= TTU_IGNORE_HWPOISON;
+ }
+
+ /*
+ * Propagate the dirty bit from PTEs to struct page first, because we
+ * need this to decide if we should kill or just drop the page.
+ */
+ mapping = page_mapping(p);
+ if (!PageDirty(p) && mapping && mapping_cap_writeback_dirty(mapping)) {
+ if (page_mkclean(p)) {
+ SetPageDirty(p);
+ } else {
+ kill = 0;
+ ttu |= TTU_IGNORE_HWPOISON;
+ printk(KERN_INFO
+ "MCE %#lx: corrupted page was clean: dropped without side effects\n",
+ pfn);
+ }
+ }
+
+ /*
+ * First collect all the processes that have the page
+ * mapped in dirty form. This has to be done before try_to_unmap,
+ * because ttu takes the rmap data structures down.
+ *
+ * Error handling: We ignore errors here because
+ * there's nothing that can be done.
+ */
+ if (kill)
+ collect_procs(p, &tokill);
+
+ /*
+ * try_to_unmap can fail temporarily due to races.
+ * Try a few times (RED-PEN better strategy?)
+ */
+ for (i = 0; i < N_UNMAP_TRIES; i++) {
+ ret = try_to_unmap(p, ttu);
+ if (ret == SWAP_SUCCESS)
+ break;
+ pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn, ret);
+ }
+
+ if (ret != SWAP_SUCCESS)
+ printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
+ pfn, page_mapcount(p));
+
+ /*
+ * Now that the dirty bit has been propagated to the
+ * struct page and all unmaps done we can decide if
+ * killing is needed or not. Only kill when the page
+ * was dirty, otherwise the tokill list is merely
+ * freed. When there was a problem unmapping earlier
+ * use a more force-full uncatchable kill to prevent
+ * any accesses to the poisoned memory.
+ */
+ kill_procs_ao(&tokill, !!PageDirty(p), trapno,
+ ret != SWAP_SUCCESS, pfn);
+}
+
+int __memory_failure(unsigned long pfn, int trapno, int ref)
+{
+ struct page_state *ps;
+ struct page *p;
+ int res;
+
+ if (!sysctl_memory_failure_recovery)
+ panic("Memory failure from trap %d on page %lx", trapno, pfn);
+
+ if (!pfn_valid(pfn)) {
+ action_result(pfn, "memory outside kernel control", IGNORED);
+ return -EIO;
+ }
+
+ p = pfn_to_page(pfn);
+ if (TestSetPageHWPoison(p)) {
+ action_result(pfn, "already hardware poisoned", IGNORED);
+ return 0;
+ }
+
+ atomic_long_add(1, &mce_bad_pages);
+
+ /*
+ * We need/can do nothing about count=0 pages.
+ * 1) it's a free page, and therefore in safe hand:
+ * prep_new_page() will be the gate keeper.
+ * 2) it's part of a non-compound high order page.
+ * Implies some kernel user: cannot stop them from
+ * R/W the page; let's pray that the page has been
+ * used and will be freed some time later.
+ * In fact it's dangerous to directly bump up page count from 0,
+ * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
+ */
+ if (!get_page_unless_zero(compound_head(p))) {
+ action_result(pfn, "free or high order kernel", IGNORED);
+ return PageBuddy(compound_head(p)) ? 0 : -EBUSY;
+ }
+
+ /*
+ * Lock the page and wait for writeback to finish.
+ * It's very difficult to mess with pages currently under IO
+ * and in many cases impossible, so we just avoid it here.
+ */
+ lock_page_nosync(p);
+ wait_on_page_writeback(p);
+
+ /*
+ * Now take care of user space mappings.
+ */
+ hwpoison_user_mappings(p, pfn, trapno);
+
+ /*
+ * Torn down by someone else?
+ */
+ if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
+ action_result(pfn, "already truncated LRU", IGNORED);
+ res = 0;
+ goto out;
+ }
+
+ res = -EBUSY;
+ for (ps = error_states;; ps++) {
+ if ((p->flags & ps->mask) == ps->res) {
+ res = page_action(ps, p, pfn, ref);
+ break;
+ }
+ }
+out:
+ unlock_page(p);
+ return res;
+}
+EXPORT_SYMBOL_GPL(__memory_failure);
+
+/**
+ * memory_failure - Handle memory failure of a page.
+ * @pfn: Page Number of the corrupted page
+ * @trapno: Trap number reported in the signal to user space.
+ *
+ * This function is called by the low level machine check code
+ * of an architecture when it detects hardware memory corruption
+ * of a page. It tries its best to recover, which includes
+ * dropping pages, killing processes etc.
+ *
+ * The function is primarily of use for corruptions that
+ * happen outside the current execution context (e.g. when
+ * detected by a background scrubber)
+ *
+ * Must run in process context (e.g. a work queue) with interrupts
+ * enabled and no spinlocks hold.
+ */
+void memory_failure(unsigned long pfn, int trapno)
+{
+ __memory_failure(pfn, trapno, 0);
+}
unsigned long addr = vma->vm_start;
/*
- * Hide vma from rmap and vmtruncate before freeing pgtables
+ * Hide vma from rmap and truncate_pagecache before freeing
+ * pgtables
*/
anon_vma_unlink(vma);
unlink_file_vma(vma);
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
return i ? i : -ENOMEM;
- else if (ret & VM_FAULT_SIGBUS)
+ if (ret &
+ (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
return i ? i : -EFAULT;
BUG();
}
* @mapping: the address space containing mmaps to be unmapped.
* @holebegin: byte in first page to unmap, relative to the start of
* the underlying file. This will be rounded down to a PAGE_SIZE
- * boundary. Note that this is different from vmtruncate(), which
+ * boundary. Note that this is different from truncate_pagecache(), which
* must keep the partial page. In contrast, we must get rid of
* partial pages.
* @holelen: size of prospective hole in bytes. This will be rounded
}
EXPORT_SYMBOL(unmap_mapping_range);
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @offset: file offset to start truncating
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page. Ugly, but necessary.
- */
-int vmtruncate(struct inode * inode, loff_t offset)
-{
- if (inode->i_size < offset) {
- unsigned long limit;
-
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && offset > limit)
- goto out_sig;
- if (offset > inode->i_sb->s_maxbytes)
- goto out_big;
- i_size_write(inode, offset);
- } else {
- struct address_space *mapping = inode->i_mapping;
-
- /*
- * truncation of in-use swapfiles is disallowed - it would
- * cause subsequent swapout to scribble on the now-freed
- * blocks.
- */
- if (IS_SWAPFILE(inode))
- return -ETXTBSY;
- i_size_write(inode, offset);
-
- /*
- * unmap_mapping_range is called twice, first simply for
- * efficiency so that truncate_inode_pages does fewer
- * single-page unmaps. However after this first call, and
- * before truncate_inode_pages finishes, it is possible for
- * private pages to be COWed, which remain after
- * truncate_inode_pages finishes, hence the second
- * unmap_mapping_range call must be made for correctness.
- */
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(mapping, offset);
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- }
-
- if (inode->i_op->truncate)
- inode->i_op->truncate(inode);
- return 0;
-
-out_sig:
- send_sig(SIGXFSZ, current, 0);
-out_big:
- return -EFBIG;
-}
-EXPORT_SYMBOL(vmtruncate);
-
int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
{
struct address_space *mapping = inode->i_mapping;
goto out;
entry = pte_to_swp_entry(orig_pte);
- if (is_migration_entry(entry)) {
- migration_entry_wait(mm, pmd, address);
+ if (unlikely(non_swap_entry(entry))) {
+ if (is_migration_entry(entry)) {
+ migration_entry_wait(mm, pmd, address);
+ } else if (is_hwpoison_entry(entry)) {
+ ret = VM_FAULT_HWPOISON;
+ } else {
+ print_bad_pte(vma, address, orig_pte, NULL);
+ ret = VM_FAULT_OOM;
+ }
goto out;
}
delayacct_set_flag(DELAYACCT_PF_SWAPIN);
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
+ } else if (PageHWPoison(page)) {
+ ret = VM_FAULT_HWPOISON;
+ delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+ goto out;
}
lock_page(page);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
return ret;
+ if (unlikely(PageHWPoison(vmf.page))) {
+ if (ret & VM_FAULT_LOCKED)
+ unlock_page(vmf.page);
+ return VM_FAULT_HWPOISON;
+ }
+
/*
* For consistency in subsequent calls, make the faulted page always
* locked.
}
/* Establish migration ptes or remove ptes */
- try_to_unmap(page, 1);
+ try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
skip_unmap:
if (!page_mapped(page))
if (vma->vm_file) {
/*
* Subtle point from Rajesh Venkatasubramanian: before
- * moving file-based ptes, we must lock vmtruncate out,
- * since it might clean the dst vma before the src vma,
+ * moving file-based ptes, we must lock truncate_pagecache
+ * out, since it might clean the dst vma before the src vma,
* and we propagate stale pages into the dst afterward.
*/
mapping = vma->vm_file->f_mapping;
struct vm_operations_struct generic_file_vm_ops = {
};
-/*
- * Handle all mappings that got truncated by a "truncate()"
- * system call.
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page. Ugly, but necessary.
- */
-int vmtruncate(struct inode *inode, loff_t offset)
-{
- struct address_space *mapping = inode->i_mapping;
- unsigned long limit;
-
- if (inode->i_size < offset)
- goto do_expand;
- i_size_write(inode, offset);
-
- truncate_inode_pages(mapping, offset);
- goto out_truncate;
-
-do_expand:
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && offset > limit)
- goto out_sig;
- if (offset > inode->i_sb->s_maxbytes)
- goto out;
- i_size_write(inode, offset);
-
-out_truncate:
- if (inode->i_op->truncate)
- inode->i_op->truncate(inode);
- return 0;
-out_sig:
- send_sig(SIGXFSZ, current, 0);
-out:
- return -EFBIG;
-}
-
-EXPORT_SYMBOL(vmtruncate);
-
/*
* Return the total memory allocated for this pointer, not
* just what the caller asked for.
}
int dirty_background_ratio_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
- ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write)
dirty_background_bytes = 0;
return ret;
}
int dirty_background_bytes_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
- ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+ ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write)
dirty_background_ratio = 0;
return ret;
}
int dirty_ratio_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int old_ratio = vm_dirty_ratio;
int ret;
- ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
update_completion_period();
vm_dirty_bytes = 0;
int dirty_bytes_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
unsigned long old_bytes = vm_dirty_bytes;
int ret;
- ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+ ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
update_completion_period();
vm_dirty_ratio = 0;
* sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
*/
int dirty_writeback_centisecs_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
- proc_dointvec(table, write, file, buffer, length, ppos);
+ proc_dointvec(table, write, buffer, length, ppos);
return 0;
}
EXPORT_SYMBOL(redirty_page_for_writepage);
/*
+ * Dirty a page.
+ *
+ * For pages with a mapping this should be done under the page lock
+ * for the benefit of asynchronous memory errors who prefer a consistent
+ * dirty state. This rule can be broken in some special cases,
+ * but should be better not to.
+ *
* If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads.
*/
static unsigned long nr_shown;
static unsigned long nr_unshown;
+ /* Don't complain about poisoned pages */
+ if (PageHWPoison(page)) {
+ __ClearPageBuddy(page);
+ return;
+ }
+
/*
* Allow a burst of 60 reports, then keep quiet for that minute;
* or allow a steady drip of one report per second.
/*
* This page is about to be returned from the page allocator
*/
-static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+static inline int check_new_page(struct page *page)
{
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
bad_page(page);
return 1;
}
+ return 0;
+}
+
+static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+{
+ int i;
+
+ for (i = 0; i < (1 << order); i++) {
+ struct page *p = page + i;
+ if (unlikely(check_new_page(p)))
+ return 1;
+ }
set_page_private(page, 0);
set_page_refcounted(page);
* sysctl handler for numa_zonelist_order
*/
int numa_zonelist_order_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length,
+ void __user *buffer, size_t *length,
loff_t *ppos)
{
char saved_string[NUMA_ZONELIST_ORDER_LEN];
if (write)
strncpy(saved_string, (char*)table->data,
NUMA_ZONELIST_ORDER_LEN);
- ret = proc_dostring(table, write, file, buffer, length, ppos);
+ ret = proc_dostring(table, write, buffer, length, ppos);
if (ret)
return ret;
if (write) {
* changes.
*/
int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
- proc_dointvec(table, write, file, buffer, length, ppos);
+ proc_dointvec(table, write, buffer, length, ppos);
if (write)
setup_per_zone_wmarks();
return 0;
#ifdef CONFIG_NUMA
int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
int rc;
- rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;
}
int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
int rc;
- rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;
* if in function of the boot time zone sizes.
*/
int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
- proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ proc_dointvec_minmax(table, write, buffer, length, ppos);
setup_per_zone_lowmem_reserve();
return 0;
}
*/
int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
unsigned int cpu;
int ret;
- ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (!write || (ret == -EINVAL))
return ret;
for_each_populated_zone(zone) {
int node = numa_node_id();
struct zone *zones = NODE_DATA(node)->node_zones;
int num_cpus_on_node;
- const struct cpumask *cpumask_on_node = cpumask_of_node(node);
node_free_pages =
#ifdef CONFIG_ZONE_DMA
max = node_free_pages / FRACTION_OF_NODE_MEM;
- num_cpus_on_node = cpus_weight_nr(*cpumask_on_node);
+ num_cpus_on_node = cpumask_weight(cpumask_of_node(node));
max /= num_cpus_on_node;
return max(max, min_pages);
* mapping->tree_lock (widely used, in set_page_dirty,
* in arch-dependent flush_dcache_mmap_lock,
* within inode_lock in __sync_single_inode)
+ *
+ * (code doesn't rely on that order so it could be switched around)
+ * ->tasklist_lock
+ * anon_vma->lock (memory_failure, collect_procs_anon)
+ * pte map lock
*/
#include <linux/mm.h>
* Getting a lock on a stable anon_vma from a page off the LRU is
* tricky: page_lock_anon_vma rely on RCU to guard against the races.
*/
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
{
struct anon_vma *anon_vma;
unsigned long anon_mapping;
return NULL;
}
-static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma(struct anon_vma *anon_vma)
{
spin_unlock(&anon_vma->lock);
rcu_read_unlock();
* if the page is not mapped into the page tables of this VMA. Only
* valid for normal file or anonymous VMAs.
*/
-static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
{
unsigned long address;
pte_t *pte;
* repeatedly from either try_to_unmap_anon or try_to_unmap_file.
*/
static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
- int migration)
+ enum ttu_flags flags)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
* If it's recently referenced (perhaps page_referenced
* skipped over this mm) then we should reactivate it.
*/
- if (!migration) {
+ if (!(flags & TTU_IGNORE_MLOCK)) {
if (vma->vm_flags & VM_LOCKED) {
ret = SWAP_MLOCK;
goto out_unmap;
}
+ }
+ if (!(flags & TTU_IGNORE_ACCESS)) {
if (ptep_clear_flush_young_notify(vma, address, pte)) {
ret = SWAP_FAIL;
goto out_unmap;
/* Update high watermark before we lower rss */
update_hiwater_rss(mm);
- if (PageAnon(page)) {
+ if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
+ if (PageAnon(page))
+ dec_mm_counter(mm, anon_rss);
+ else
+ dec_mm_counter(mm, file_rss);
+ set_pte_at(mm, address, pte,
+ swp_entry_to_pte(make_hwpoison_entry(page)));
+ } else if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(page) };
if (PageSwapCache(page)) {
* pte. do_swap_page() will wait until the migration
* pte is removed and then restart fault handling.
*/
- BUG_ON(!migration);
+ BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
entry = make_migration_entry(page, pte_write(pteval));
}
set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
BUG_ON(pte_file(*pte));
- } else if (PAGE_MIGRATION && migration) {
+ } else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
/* Establish migration entry for a file page */
swp_entry_t entry;
entry = make_migration_entry(page, pte_write(pteval));
* vm_flags for that VMA. That should be OK, because that vma shouldn't be
* 'LOCKED.
*/
-static int try_to_unmap_anon(struct page *page, int unlock, int migration)
+static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
{
struct anon_vma *anon_vma;
struct vm_area_struct *vma;
unsigned int mlocked = 0;
int ret = SWAP_AGAIN;
+ int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
if (MLOCK_PAGES && unlikely(unlock))
ret = SWAP_SUCCESS; /* default for try_to_munlock() */
continue; /* must visit all unlocked vmas */
ret = SWAP_MLOCK; /* saw at least one mlocked vma */
} else {
- ret = try_to_unmap_one(page, vma, migration);
+ ret = try_to_unmap_one(page, vma, flags);
if (ret == SWAP_FAIL || !page_mapped(page))
break;
}
/**
* try_to_unmap_file - unmap/unlock file page using the object-based rmap method
* @page: the page to unmap/unlock
- * @unlock: request for unlock rather than unmap [unlikely]
- * @migration: unmapping for migration - ignored if @unlock
+ * @flags: action and flags
*
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the address_space struct it points to.
* vm_flags for that VMA. That should be OK, because that vma shouldn't be
* 'LOCKED.
*/
-static int try_to_unmap_file(struct page *page, int unlock, int migration)
+static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
{
struct address_space *mapping = page->mapping;
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
unsigned long max_nl_size = 0;
unsigned int mapcount;
unsigned int mlocked = 0;
+ int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
if (MLOCK_PAGES && unlikely(unlock))
ret = SWAP_SUCCESS; /* default for try_to_munlock() */
continue; /* must visit all vmas */
ret = SWAP_MLOCK;
} else {
- ret = try_to_unmap_one(page, vma, migration);
+ ret = try_to_unmap_one(page, vma, flags);
if (ret == SWAP_FAIL || !page_mapped(page))
goto out;
}
ret = SWAP_MLOCK; /* leave mlocked == 0 */
goto out; /* no need to look further */
}
- if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED))
+ if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
+ (vma->vm_flags & VM_LOCKED))
continue;
cursor = (unsigned long) vma->vm_private_data;
if (cursor > max_nl_cursor)
do {
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
shared.vm_set.list) {
- if (!MLOCK_PAGES && !migration &&
+ if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
(vma->vm_flags & VM_LOCKED))
continue;
cursor = (unsigned long) vma->vm_private_data;
/**
* try_to_unmap - try to remove all page table mappings to a page
* @page: the page to get unmapped
- * @migration: migration flag
+ * @flags: action and flags
*
* Tries to remove all the page table entries which are mapping this
* page, used in the pageout path. Caller must hold the page lock.
* SWAP_FAIL - the page is unswappable
* SWAP_MLOCK - page is mlocked.
*/
-int try_to_unmap(struct page *page, int migration)
+int try_to_unmap(struct page *page, enum ttu_flags flags)
{
int ret;
BUG_ON(!PageLocked(page));
if (PageAnon(page))
- ret = try_to_unmap_anon(page, 0, migration);
+ ret = try_to_unmap_anon(page, flags);
else
- ret = try_to_unmap_file(page, 0, migration);
+ ret = try_to_unmap_file(page, flags);
if (ret != SWAP_MLOCK && !page_mapped(page))
ret = SWAP_SUCCESS;
return ret;
VM_BUG_ON(!PageLocked(page) || PageLRU(page));
if (PageAnon(page))
- return try_to_unmap_anon(page, 1, 0);
+ return try_to_unmap_anon(page, TTU_MUNLOCK);
else
- return try_to_unmap_file(page, 1, 0);
+ return try_to_unmap_file(page, TTU_MUNLOCK);
}
if (pos + copied > inode->i_size)
i_size_write(inode, pos + copied);
- unlock_page(page);
set_page_dirty(page);
+ unlock_page(page);
page_cache_release(page);
return copied;
iput(inode);
return error;
}
- unlock_page(page);
inode->i_mapping->a_ops = &shmem_aops;
inode->i_op = &shmem_symlink_inode_operations;
kaddr = kmap_atomic(page, KM_USER0);
memcpy(kaddr, symname, len);
kunmap_atomic(kaddr, KM_USER0);
set_page_dirty(page);
+ unlock_page(page);
page_cache_release(page);
}
if (dir->i_mode & S_ISGID)
.write_end = shmem_write_end,
#endif
.migratepage = migrate_page,
+ .error_remove_page = generic_error_remove_page,
};
static const struct file_operations shmem_file_operations = {
struct swap_info_struct *p;
struct page *page = NULL;
- if (is_migration_entry(entry))
+ if (non_swap_entry(entry))
return 1;
p = swap_info_get(entry);
int count;
bool has_cache;
- if (is_migration_entry(entry))
+ if (non_swap_entry(entry))
return -EINVAL;
type = swp_type(entry);
* its lock, b) when a concurrent invalidate_mapping_pages got there first and
* c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
*/
-static void
+static int
truncate_complete_page(struct address_space *mapping, struct page *page)
{
if (page->mapping != mapping)
- return;
+ return -EIO;
if (page_has_private(page))
do_invalidatepage(page, 0);
remove_from_page_cache(page);
ClearPageMappedToDisk(page);
page_cache_release(page); /* pagecache ref */
+ return 0;
}
/*
return ret;
}
+int truncate_inode_page(struct address_space *mapping, struct page *page)
+{
+ if (page_mapped(page)) {
+ unmap_mapping_range(mapping,
+ (loff_t)page->index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, 0);
+ }
+ return truncate_complete_page(mapping, page);
+}
+
+/*
+ * Used to get rid of pages on hardware memory corruption.
+ */
+int generic_error_remove_page(struct address_space *mapping, struct page *page)
+{
+ if (!mapping)
+ return -EINVAL;
+ /*
+ * Only punch for normal data pages for now.
+ * Handling other types like directories would need more auditing.
+ */
+ if (!S_ISREG(mapping->host->i_mode))
+ return -EIO;
+ return truncate_inode_page(mapping, page);
+}
+EXPORT_SYMBOL(generic_error_remove_page);
+
+/*
+ * Safely invalidate one page from its pagecache mapping.
+ * It only drops clean, unused pages. The page must be locked.
+ *
+ * Returns 1 if the page is successfully invalidated, otherwise 0.
+ */
+int invalidate_inode_page(struct page *page)
+{
+ struct address_space *mapping = page_mapping(page);
+ if (!mapping)
+ return 0;
+ if (PageDirty(page) || PageWriteback(page))
+ return 0;
+ if (page_mapped(page))
+ return 0;
+ return invalidate_complete_page(mapping, page);
+}
+
/**
* truncate_inode_pages - truncate range of pages specified by start & end byte offsets
* @mapping: mapping to truncate
unlock_page(page);
continue;
}
- if (page_mapped(page)) {
- unmap_mapping_range(mapping,
- (loff_t)page_index<<PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE, 0);
- }
- truncate_complete_page(mapping, page);
+ truncate_inode_page(mapping, page);
unlock_page(page);
}
pagevec_release(&pvec);
break;
lock_page(page);
wait_on_page_writeback(page);
- if (page_mapped(page)) {
- unmap_mapping_range(mapping,
- (loff_t)page->index<<PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE, 0);
- }
+ truncate_inode_page(mapping, page);
if (page->index > next)
next = page->index;
next++;
- truncate_complete_page(mapping, page);
unlock_page(page);
}
pagevec_release(&pvec);
if (lock_failed)
continue;
- if (PageDirty(page) || PageWriteback(page))
- goto unlock;
- if (page_mapped(page))
- goto unlock;
- ret += invalidate_complete_page(mapping, page);
-unlock:
+ ret += invalidate_inode_page(page);
+
unlock_page(page);
if (next > end)
break;
return invalidate_inode_pages2_range(mapping, 0, -1);
}
EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
+
+/**
+ * truncate_pagecache - unmap and remove pagecache that has been truncated
+ * @inode: inode
+ * @old: old file offset
+ * @new: new file offset
+ *
+ * inode's new i_size must already be written before truncate_pagecache
+ * is called.
+ *
+ * This function should typically be called before the filesystem
+ * releases resources associated with the freed range (eg. deallocates
+ * blocks). This way, pagecache will always stay logically coherent
+ * with on-disk format, and the filesystem would not have to deal with
+ * situations such as writepage being called for a page that has already
+ * had its underlying blocks deallocated.
+ */
+void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
+{
+ if (new < old) {
+ struct address_space *mapping = inode->i_mapping;
+
+ /*
+ * unmap_mapping_range is called twice, first simply for
+ * efficiency so that truncate_inode_pages does fewer
+ * single-page unmaps. However after this first call, and
+ * before truncate_inode_pages finishes, it is possible for
+ * private pages to be COWed, which remain after
+ * truncate_inode_pages finishes, hence the second
+ * unmap_mapping_range call must be made for correctness.
+ */
+ unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+ truncate_inode_pages(mapping, new);
+ unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+ }
+}
+EXPORT_SYMBOL(truncate_pagecache);
+
+/**
+ * vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
+ *
+ * NOTE! We have to be ready to update the memory sharing
+ * between the file and the memory map for a potential last
+ * incomplete page. Ugly, but necessary.
+ */
+int vmtruncate(struct inode *inode, loff_t offset)
+{
+ loff_t oldsize;
+ int error;
+
+ error = inode_newsize_ok(inode, offset);
+ if (error)
+ return error;
+ oldsize = inode->i_size;
+ i_size_write(inode, offset);
+ truncate_pagecache(inode, oldsize, offset);
+ if (inode->i_op->truncate)
+ inode->i_op->truncate(inode);
+
+ return error;
+}
+EXPORT_SYMBOL(vmtruncate);
* processes. Try to unmap it here.
*/
if (page_mapped(page) && mapping) {
- switch (try_to_unmap(page, 0)) {
+ switch (try_to_unmap(page, TTU_UNMAP)) {
case SWAP_FAIL:
goto activate_locked;
case SWAP_AGAIN:
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+ gfp_t gfp_mask, bool noswap,
+ unsigned int swappiness,
+ struct zone *zone, int nid)
+{
+ struct scan_control sc = {
+ .may_writepage = !laptop_mode,
+ .may_unmap = 1,
+ .may_swap = !noswap,
+ .swap_cluster_max = SWAP_CLUSTER_MAX,
+ .swappiness = swappiness,
+ .order = 0,
+ .mem_cgroup = mem,
+ .isolate_pages = mem_cgroup_isolate_pages,
+ };
+ nodemask_t nm = nodemask_of_node(nid);
+
+ sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+ (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
+ sc.nodemask = &nm;
+ sc.nr_reclaimed = 0;
+ sc.nr_scanned = 0;
+ /*
+ * NOTE: Although we can get the priority field, using it
+ * here is not a good idea, since it limits the pages we can scan.
+ * if we don't reclaim here, the shrink_zone from balance_pgdat
+ * will pick up pages from other mem cgroup's as well. We hack
+ * the priority and make it zero.
+ */
+ shrink_zone(0, zone, &sc);
+ return sc.nr_reclaimed;
+}
+
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
gfp_t gfp_mask,
bool noswap,
unsigned int swappiness)
{
+ struct zonelist *zonelist;
struct scan_control sc = {
.may_writepage = !laptop_mode,
.may_unmap = 1,
.isolate_pages = mem_cgroup_isolate_pages,
.nodemask = NULL, /* we don't care the placement */
};
- struct zonelist *zonelist;
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
int nr_slab;
+ int nid, zid;
if (!populated_zone(zone))
continue;
temp_priority[i] = priority;
sc.nr_scanned = 0;
note_zone_scanning_priority(zone, priority);
+
+ nid = pgdat->node_id;
+ zid = zone_idx(zone);
+ /*
+ * Call soft limit reclaim before calling shrink_zone.
+ * For now we ignore the return value
+ */
+ mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
+ nid, zid);
/*
* We put equal pressure on every zone, unless one
* zone has way too many pages free already.
unsigned long scan_unevictable_pages;
int scan_unevictable_handler(struct ctl_table *table, int write,
- struct file *file, void __user *buffer,
+ void __user *buffer,
size_t *length, loff_t *ppos)
{
- proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+ proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (write && *(unsigned long *)table->data)
scan_all_zones_unevictable_pages();
ax25_info.idletimer = ax25_display_timer(&ax25->idletimer) / (60 * HZ);
ax25_info.n2count = ax25->n2count;
ax25_info.state = ax25->state;
- ax25_info.rcv_q = sk_wmem_alloc_get(sk);
- ax25_info.snd_q = sk_rmem_alloc_get(sk);
+ ax25_info.rcv_q = sk_rmem_alloc_get(sk);
+ ax25_info.snd_q = sk_wmem_alloc_get(sk);
ax25_info.vs = ax25->vs;
ax25_info.vr = ax25->vr;
ax25_info.va = ax25->va;
#ifdef CONFIG_SYSCTL
static
-int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp,
+int brnf_sysctl_call_tables(ctl_table * ctl, int write,
void __user * buffer, size_t * lenp, loff_t * ppos)
{
int ret;
- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write && *(int *)(ctl->data))
*(int *)(ctl->data) = 1;
#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
/* Thread control flag bits */
-#define T_TERMINATE (1<<0)
-#define T_STOP (1<<1) /* Stop run */
-#define T_RUN (1<<2) /* Start run */
-#define T_REMDEVALL (1<<3) /* Remove all devs */
-#define T_REMDEV (1<<4) /* Remove one dev */
+#define T_STOP (1<<0) /* Stop run */
+#define T_RUN (1<<1) /* Start run */
+#define T_REMDEVALL (1<<2) /* Remove all devs */
+#define T_REMDEV (1<<3) /* Remove one dev */
/* If lock -- can be removed after some work */
#define if_lock(t) spin_lock(&(t->if_lock));
static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
{
- ktime_t start;
+ ktime_t start_time, end_time;
s32 remaining;
struct hrtimer_sleeper t;
if (remaining <= 0)
return;
- start = ktime_now();
+ start_time = ktime_now();
if (remaining < 100)
udelay(remaining); /* really small just spin */
else {
} while (t.task && pkt_dev->running && !signal_pending(current));
__set_current_state(TASK_RUNNING);
}
- pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), start));
+ end_time = ktime_now();
+
+ pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
+ pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay);
}
static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
mutex_unlock(&pktgen_thread_lock);
}
-static void idle(struct pktgen_dev *pkt_dev)
+static void pktgen_resched(struct pktgen_dev *pkt_dev)
{
ktime_t idle_start = ktime_now();
+ schedule();
+ pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
+}
- if (need_resched())
- schedule();
- else
- cpu_relax();
+static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
+{
+ ktime_t idle_start = ktime_now();
+ while (atomic_read(&(pkt_dev->skb->users)) != 1) {
+ if (signal_pending(current))
+ break;
+
+ if (need_resched())
+ pktgen_resched(pkt_dev);
+ else
+ cpu_relax();
+ }
pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
}
-
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
struct net_device *odev = pkt_dev->odev;
u16 queue_map;
int ret;
- if (pkt_dev->delay) {
- spin(pkt_dev, pkt_dev->next_tx);
-
- /* This is max DELAY, this has special meaning of
- * "never transmit"
- */
- if (pkt_dev->delay == ULLONG_MAX) {
- pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX);
- return;
- }
- }
-
- if (!pkt_dev->skb) {
- set_cur_queue_map(pkt_dev);
- queue_map = pkt_dev->cur_queue_map;
- } else {
- queue_map = skb_get_queue_mapping(pkt_dev->skb);
+ /* If device is offline, then don't send */
+ if (unlikely(!netif_running(odev) || !netif_carrier_ok(odev))) {
+ pktgen_stop_device(pkt_dev);
+ return;
}
- txq = netdev_get_tx_queue(odev, queue_map);
- /* Did we saturate the queue already? */
- if (netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq)) {
- /* If device is down, then all queues are permnantly frozen */
- if (netif_running(odev))
- idle(pkt_dev);
- else
- pktgen_stop_device(pkt_dev);
+ /* This is max DELAY, this has special meaning of
+ * "never transmit"
+ */
+ if (unlikely(pkt_dev->delay == ULLONG_MAX)) {
+ pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX);
return;
}
+ /* If no skb or clone count exhausted then get new one */
if (!pkt_dev->skb || (pkt_dev->last_ok &&
++pkt_dev->clone_count >= pkt_dev->clone_skb)) {
/* build a new pkt */
pkt_dev->clone_count = 0; /* reset counter */
}
- /* fill_packet() might have changed the queue */
+ if (pkt_dev->delay && pkt_dev->last_ok)
+ spin(pkt_dev, pkt_dev->next_tx);
+
queue_map = skb_get_queue_mapping(pkt_dev->skb);
txq = netdev_get_tx_queue(odev, queue_map);
__netif_tx_lock_bh(txq);
- if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq)))
- pkt_dev->last_ok = 0;
- else {
- atomic_inc(&(pkt_dev->skb->users));
+ atomic_inc(&(pkt_dev->skb->users));
- retry_now:
+ if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq)))
+ ret = NETDEV_TX_BUSY;
+ else
ret = (*xmit)(pkt_dev->skb, odev);
- switch (ret) {
- case NETDEV_TX_OK:
- txq_trans_update(txq);
- pkt_dev->last_ok = 1;
- pkt_dev->sofar++;
- pkt_dev->seq_num++;
- pkt_dev->tx_bytes += pkt_dev->cur_pkt_size;
- break;
- case NETDEV_TX_LOCKED:
- cpu_relax();
- goto retry_now;
- default: /* Drivers are not supposed to return other values! */
- if (net_ratelimit())
- pr_info("pktgen: %s xmit error: %d\n",
- odev->name, ret);
- pkt_dev->errors++;
- /* fallthru */
- case NETDEV_TX_BUSY:
- /* Retry it next time */
- atomic_dec(&(pkt_dev->skb->users));
- pkt_dev->last_ok = 0;
- }
-
- if (pkt_dev->delay)
- pkt_dev->next_tx = ktime_add_ns(ktime_now(),
- pkt_dev->delay);
+
+ switch (ret) {
+ case NETDEV_TX_OK:
+ txq_trans_update(txq);
+ pkt_dev->last_ok = 1;
+ pkt_dev->sofar++;
+ pkt_dev->seq_num++;
+ pkt_dev->tx_bytes += pkt_dev->cur_pkt_size;
+ break;
+ default: /* Drivers are not supposed to return other values! */
+ if (net_ratelimit())
+ pr_info("pktgen: %s xmit error: %d\n",
+ odev->name, ret);
+ pkt_dev->errors++;
+ /* fallthru */
+ case NETDEV_TX_LOCKED:
+ case NETDEV_TX_BUSY:
+ /* Retry it next time */
+ atomic_dec(&(pkt_dev->skb->users));
+ pkt_dev->last_ok = 0;
}
__netif_tx_unlock_bh(txq);
/* If pkt_dev->count is zero, then run forever */
if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
- while (atomic_read(&(pkt_dev->skb->users)) != 1) {
- if (signal_pending(current))
- break;
- idle(pkt_dev);
- }
+ pktgen_wait_for_skb(pkt_dev);
/* Done with this */
pktgen_stop_device(pkt_dev);
while (!kthread_should_stop()) {
pkt_dev = next_to_run(t);
- if (!pkt_dev &&
- (t->control & (T_STOP | T_RUN | T_REMDEVALL | T_REMDEV))
- == 0) {
- prepare_to_wait(&(t->queue), &wait,
- TASK_INTERRUPTIBLE);
- schedule_timeout(HZ / 10);
- finish_wait(&(t->queue), &wait);
+ if (unlikely(!pkt_dev && t->control == 0)) {
+ wait_event_interruptible_timeout(t->queue,
+ t->control != 0,
+ HZ/10);
+ continue;
}
__set_current_state(TASK_RUNNING);
- if (pkt_dev)
+ if (likely(pkt_dev)) {
pktgen_xmit(pkt_dev);
+ if (need_resched())
+ pktgen_resched(pkt_dev);
+ else
+ cpu_relax();
+ }
+
if (t->control & T_STOP) {
pktgen_stop(t);
t->control &= ~(T_STOP);
static int min_priority[1];
static int max_priority[] = { 127 }; /* From DECnet spec */
-static int dn_forwarding_proc(ctl_table *, int, struct file *,
+static int dn_forwarding_proc(ctl_table *, int,
void __user *, size_t *, loff_t *);
static int dn_forwarding_sysctl(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
}
static int dn_forwarding_proc(ctl_table *table, int write,
- struct file *filep,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
dn_db = dev->dn_ptr;
old = dn_db->parms.forwarding;
- err = proc_dointvec(table, write, filep, buffer, lenp, ppos);
+ err = proc_dointvec(table, write, buffer, lenp, ppos);
if ((err >= 0) && write) {
if (dn_db->parms.forwarding < 0)
}
static int dn_node_address_handler(ctl_table *table, int write,
- struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
static int dn_def_dev_handler(ctl_table *table, int write,
- struct file * filp,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
}
static int devinet_conf_proc(ctl_table *ctl, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write) {
struct ipv4_devconf *cnf = ctl->extra1;
}
static int devinet_sysctl_forward(ctl_table *ctl, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write && *valp != val) {
struct net *net = ctl->extra2;
}
int ipv4_doint_and_flush(ctl_table *ctl, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
struct net *net = ctl->extra2;
if (write && *valp != val)
solution, but it supposes maintaing new variable in ALL
skb, even if no tunneling is used.
- Current solution: t->recursion lock breaks dead loops. It looks
- like dev->tbusy flag, but I preferred new variable, because
- the semantics is different. One day, when hard_start_xmit
- will be multithreaded we will have to use skb->encapsulation.
+ Current solution: HARD_TX_LOCK lock breaks dead loops.
__be32 dst;
int mtu;
- if (tunnel->recursion++) {
- stats->collisions++;
- goto tx_error;
- }
-
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
ip_rt_put(rt);
stats->tx_dropped++;
dev_kfree_skb(skb);
- tunnel->recursion--;
return NETDEV_TX_OK;
}
if (skb->sk)
nf_reset(skb);
IPTUNNEL_XMIT();
- tunnel->recursion--;
return NETDEV_TX_OK;
tx_error_icmp:
tx_error:
stats->tx_errors++;
dev_kfree_skb(skb);
- tunnel->recursion--;
return NETDEV_TX_OK;
}
* Check the arguments are allowable
*/
+ if (optlen < sizeof(struct in_addr))
+ goto e_inval;
+
err = -EFAULT;
if (optlen >= sizeof(struct ip_mreqn)) {
if (copy_from_user(&mreq, optval, sizeof(mreq)))
__be32 dst = tiph->daddr;
int mtu;
- if (tunnel->recursion++) {
- stats->collisions++;
- goto tx_error;
- }
-
if (skb->protocol != htons(ETH_P_IP))
goto tx_error;
ip_rt_put(rt);
stats->tx_dropped++;
dev_kfree_skb(skb);
- tunnel->recursion--;
return NETDEV_TX_OK;
}
if (skb->sk)
nf_reset(skb);
IPTUNNEL_XMIT();
- tunnel->recursion--;
return NETDEV_TX_OK;
tx_error_icmp:
tx_error:
stats->tx_errors++;
dev_kfree_skb(skb);
- tunnel->recursion--;
return NETDEV_TX_OK;
}
#ifdef CONFIG_SYSCTL
static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
- struct file *filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
if (write) {
memcpy(&ctl, __ctl, sizeof(ctl));
ctl.data = &flush_delay;
- proc_dointvec(&ctl, write, filp, buffer, lenp, ppos);
+ proc_dointvec(&ctl, write, buffer, lenp, ppos);
net = (struct net *)__ctl->extra1;
rt_cache_flush(net, flush_delay);
}
static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
- struct file *filp,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int old = ip_rt_secret_interval;
- int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
rt_secret_reschedule(old);
}
/* Validate changes from /proc interface. */
-static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
+static int ipv4_local_port_range(ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
};
inet_get_local_port_range(range, range + 1);
- ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0) {
if (range[1] < range[0])
}
-static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp,
+static int proc_tcp_congestion_control(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char val[TCP_CA_NAME_MAX];
tcp_get_default_congestion_control(val);
- ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0)
ret = tcp_set_default_congestion_control(val);
return ret;
}
static int proc_tcp_available_congestion_control(ctl_table *ctl,
- int write, struct file * filp,
+ int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
if (!tbl.data)
return -ENOMEM;
tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
- ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
kfree(tbl.data);
return ret;
}
static int proc_allowed_congestion_control(ctl_table *ctl,
- int write, struct file * filp,
+ int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
return -ENOMEM;
tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
- ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0)
ret = tcp_set_allowed_congestion_control(tbl.data);
kfree(tbl.data);
#ifdef CONFIG_SYSCTL
static
-int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_forward(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
int ret;
- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write)
ret = addrconf_fixup_forwarding(ctl, valp, val);
}
static
-int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_disable(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
int ret;
- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write)
ret = addrconf_disable_ipv6(ctl, valp, val);
struct net_device_stats *stats = &t->dev->stats;
int ret;
- if (t->recursion++) {
- stats->collisions++;
- goto tx_err;
- }
-
switch (skb->protocol) {
case htons(ETH_P_IP):
ret = ip4ip6_tnl_xmit(skb, dev);
if (ret < 0)
goto tx_err;
- t->recursion--;
return NETDEV_TX_OK;
tx_err:
stats->tx_errors++;
stats->tx_dropped++;
kfree_skb(skb);
- t->recursion--;
return NETDEV_TX_OK;
}
}
}
-int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct net_device *dev = ctl->extra1;
struct inet6_dev *idev;
ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
if (strcmp(ctl->procname, "retrans_time") == 0)
- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
else if (strcmp(ctl->procname, "base_reachable_time") == 0)
ret = proc_dointvec_jiffies(ctl, write,
- filp, buffer, lenp, ppos);
+ buffer, lenp, ppos);
else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
(strcmp(ctl->procname, "base_reachable_time_ms") == 0))
ret = proc_dointvec_ms_jiffies(ctl, write,
- filp, buffer, lenp, ppos);
+ buffer, lenp, ppos);
else
ret = -1;
#ifdef CONFIG_SYSCTL
static
-int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
int delay = net->ipv6.sysctl.flush_delay;
if (write) {
- proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ proc_dointvec(ctl, write, buffer, lenp, ppos);
fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
return 0;
} else
struct in6_addr *addr6;
int addr_type;
- if (tunnel->recursion++) {
- stats->collisions++;
- goto tx_error;
- }
-
if (skb->protocol != htons(ETH_P_IPV6))
goto tx_error;
ip_rt_put(rt);
stats->tx_dropped++;
dev_kfree_skb(skb);
- tunnel->recursion--;
return NETDEV_TX_OK;
}
if (skb->sk)
nf_reset(skb);
IPTUNNEL_XMIT();
- tunnel->recursion--;
return NETDEV_TX_OK;
tx_error_icmp:
tx_error:
stats->tx_errors++;
dev_kfree_skb(skb);
- tunnel->recursion--;
return NETDEV_TX_OK;
}
/* For other sysctl, I've no idea of the range. Maybe Dag could help
* us on that - Jean II */
-static int do_devname(ctl_table *table, int write, struct file *filp,
+static int do_devname(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
- ret = proc_dostring(table, write, filp, buffer, lenp, ppos);
+ ret = proc_dostring(table, write, buffer, lenp, ppos);
if (ret == 0 && write) {
struct ias_value *val;
}
-static int do_discovery(ctl_table *table, int write, struct file *filp,
+static int do_discovery(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
- ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret)
return ret;
bss->dtim_period = tim_ie->dtim_period;
}
- /* set default value for buggy APs */
- if (!elems->tim || bss->dtim_period == 0)
+ /* set default value for buggy AP/no TIM element */
+ if (bss->dtim_period == 0)
bss->dtim_period = 1;
bss->supp_rates_len = 0;
static int
-proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
+proc_do_defense_mode(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = table->data;
int val = *valp;
int rc;
- rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ rc = proc_dointvec(table, write, buffer, lenp, ppos);
if (write && (*valp != val)) {
if ((*valp < 0) || (*valp > 3)) {
/* Restore the correct value */
static int
-proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
+proc_do_sync_threshold(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = table->data;
/* backup the value first */
memcpy(val, valp, sizeof(val));
- rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ rc = proc_dointvec(table, write, buffer, lenp, ppos);
if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
/* Restore the correct value */
memcpy(valp, val, sizeof(val));
static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
static struct ctl_table_header *nf_log_dir_header;
-static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
+static int nf_log_proc_dostring(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
const struct nf_logger *logger;
table->data = "NONE";
else
table->data = logger->name;
- r = proc_dostring(table, write, filp, buffer, lenp, ppos);
+ r = proc_dostring(table, write, buffer, lenp, ppos);
mutex_unlock(&nf_log_mutex);
}
return err;
}
+void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
+{
+ struct sock *sk;
+ struct hlist_node *node;
+ struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
+
+ sk_for_each_bound(sk, node, &tbl->mc_list)
+ netlink_update_socket_mc(nlk_sk(sk), group, 0);
+}
+
/**
* netlink_clear_multicast_users - kick off multicast listeners
*
*/
void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
{
- struct sock *sk;
- struct hlist_node *node;
- struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
-
netlink_table_grab();
-
- sk_for_each_bound(sk, node, &tbl->mc_list)
- netlink_update_socket_mc(nlk_sk(sk), group, 0);
-
+ __netlink_clear_multicast_users(ksk, group);
netlink_table_ungrab();
}
struct net *net;
BUG_ON(grp->family != family);
+ netlink_table_grab();
rcu_read_lock();
for_each_net_rcu(net)
- netlink_clear_multicast_users(net->genl_sock, grp->id);
+ __netlink_clear_multicast_users(net->genl_sock, grp->id);
rcu_read_unlock();
+ netlink_table_ungrab();
clear_bit(grp->id, mc_groups);
list_del(&grp->list);
goto drop;
}
+ /* Broadcast sending is not implemented */
+ if (pn_addr(dst) == PNADDR_BROADCAST) {
+ err = -EOPNOTSUPP;
+ goto drop;
+ }
+
skb_reset_transport_header(skb);
WARN_ON(skb_headroom(skb) & 1); /* HW assumes word alignment */
skb_push(skb, sizeof(struct phonethdr));
}
EXPORT_SYMBOL(pn_sock_unhash);
+static DEFINE_MUTEX(port_mutex);
+
static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
{
struct sock *sk = sock->sk;
err = -EINVAL; /* attempt to rebind */
goto out;
}
+ WARN_ON(sk_hashed(sk));
+ mutex_lock(&port_mutex);
err = sk->sk_prot->get_port(sk, pn_port(handle));
if (err)
- goto out;
+ goto out_port;
/* get_port() sets the port, bind() sets the address if applicable */
pn->sobject = pn_object(saddr, pn_port(pn->sobject));
/* Enable RX on the socket */
sk->sk_prot->hash(sk);
+out_port:
+ mutex_unlock(&port_mutex);
out:
release_sock(sk);
return err;
};
EXPORT_SYMBOL(phonet_stream_ops);
-static DEFINE_MUTEX(port_mutex);
-
/* allocate port for a socket */
int pn_sock_get_port(struct sock *sk, unsigned short sport)
{
memset(&try_sa, 0, sizeof(struct sockaddr_pn));
try_sa.spn_family = AF_PHONET;
-
- mutex_lock(&port_mutex);
-
+ WARN_ON(!mutex_is_locked(&port_mutex));
if (!sport) {
/* search free port */
int port, pmin, pmax;
else
sock_put(tmpsk);
}
- mutex_unlock(&port_mutex);
-
/* the port must be in use already */
return -EADDRINUSE;
} while (read_seqretry(&local_port_range_lock, seq));
}
-static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
+static int proc_local_port_range(ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
.extra2 = &local_port_range_max,
};
- ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0) {
if (range[1] < range[0])
#include <linux/types.h>
#include <linux/module.h>
-#include <linux/utsname.h>
#include <linux/sunrpc/clnt.h>
#ifdef RPC_DEBUG
/**
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
- * @ops: RPC call ops
+ * @req: RPC request
+ * @tk_ops: RPC call ops
*/
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
- const struct rpc_call_ops *tk_ops)
+ const struct rpc_call_ops *tk_ops)
{
struct rpc_task *task;
struct xdr_buf *xbufp = &req->rq_snd_buf;
/**
* rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs
- * @path: path from the rpc_pipefs root to the new directory
+ * @dentry: dentry from the rpc_pipefs root to the new directory
+ * @name: &struct qstr for the name
* @rpc_client: rpc client to associate with this directory
*
* This creates a directory at the given @path associated with
}
}
-static int proc_do_xprt(ctl_table *table, int write, struct file *file,
+static int proc_do_xprt(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[256];
}
static int
-proc_dodebug(ctl_table *table, int write, struct file *file,
+proc_dodebug(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[20], c, *s;
* current value.
*/
static int read_reset_stat(ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
atomic_t *stat = (atomic_t *)table->data;
dprintk("RPC: xs_close xprt %p\n", xprt);
xs_reset_transport(transport);
+ xprt->reestablish_timeout = 0;
smp_mb__before_clear_bit();
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
if (xprt->shutdown)
goto out;
+ /* Any data means we had a useful conversation, so
+ * the we don't need to delay the next reconnect
+ */
+ if (xprt->reestablish_timeout)
+ xprt->reestablish_timeout = 0;
+
/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
rd_desc.arg.data = xprt;
do {
&transport->connect_worker,
xprt->reestablish_timeout);
xprt->reestablish_timeout <<= 1;
+ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+ xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
} else {
wdev->wext.connect.ie = wdev->wext.ie;
wdev->wext.connect.ie_len = wdev->wext.ie_len;
- wdev->wext.connect.privacy = wdev->wext.default_key != -1;
if (wdev->wext.keys) {
wdev->wext.keys->def = wdev->wext.default_key;
wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key;
+ wdev->wext.connect.privacy = true;
}
if (!wdev->wext.connect.ssid_len)
# is automatically cleaned up.
try-run = $(shell set -e; \
TMP="$(TMPOUT).$$$$.tmp"; \
+ TMPO="$(TMPOUT).$$$$.o"; \
if ($(1)) >/dev/null 2>&1; \
then echo "$(2)"; \
else echo "$(3)"; \
fi; \
- rm -f "$$TMP")
+ rm -f "$$TMP" "$$TMPO")
# as-option
# Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
cc-option = $(call try-run,\
- $(CC) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2))
+ $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2))
# cc-option-yn
# Usage: flag := $(call cc-option-yn,-march=winchip-c6)
cc-option-yn = $(call try-run,\
- $(CC) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",y,n)
+ $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",y,n)
# cc-option-align
# Prefix align with either -falign or -malign
# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
cc-ifversion = $(shell [ $(call cc-version, $(CC)) $(1) $(2) ] && echo $(3))
+# cc-ldoption
+# Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+cc-ldoption = $(call try-run,\
+ $(CC) $(1) -nostdlib -xc /dev/null -o "$$TMP",$(1),$(2))
+
# ld-option
-# Usage: ldflags += $(call ld-option, -Wl$(comma)--hash-style=both)
+# Usage: LDFLAGS += $(call ld-option, -X)
ld-option = $(call try-run,\
- $(CC) $(1) -nostdlib -xc /dev/null -o "$$TMP",$(1),$(2))
+ $(CC) /dev/null -c -o "$$TMPO" ; $(LD) $(1) "$$TMPO" -o "$$TMP",$(1),$(2))
######
endif
ifdef CONFIG_FTRACE_MCOUNT_RECORD
-cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
+cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
"$(if $(CONFIG_64BIT),64,32)" \
"$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
"$(if $(part-of-module),1,0)" "$(@)";
$(call echo-cmd,checksrc) $(cmd_checksrc) \
$(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \
$(cmd_modversions) \
+ $(call echo-cmd,record_mcount) \
$(cmd_record_mcount) \
scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \
$(dot-target).tmp; \
# Linker scripts preprocessor (.lds.S -> .lds)
# ---------------------------------------------------------------------------
quiet_cmd_cpp_lds_S = LDS $@
- cmd_cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ -o $@ $<
+ cmd_cpp_lds_S = $(CPP) $(cpp_flags) -P -C -U$(ARCH) \
+ -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
$(obj)/%.lds: $(src)/%.lds.S FORCE
$(call if_changed_dep,cpp_lds_S)
static char *srctree, *kernsrctree;
-void usage (void)
+static void usage (void)
{
fprintf(stderr, "Usage: docproc {doc|depend} file\n");
fprintf(stderr, "Input is read from file.tmpl. Output is sent to stdout\n");
/*
* Execute kernel-doc with parameters given in svec
*/
-void exec_kernel_doc(char **svec)
+static void exec_kernel_doc(char **svec)
{
pid_t pid;
int ret;
struct symfile symfilelist[MAXFILES];
int symfilecnt = 0;
-void add_new_symbol(struct symfile *sym, char * symname)
+static void add_new_symbol(struct symfile *sym, char * symname)
{
sym->symbollist =
realloc(sym->symbollist, (sym->symbolcnt + 1) * sizeof(char *));
}
/* Add a filename to the list */
-struct symfile * add_new_file(char * filename)
+static struct symfile * add_new_file(char * filename)
{
symfilelist[symfilecnt++].filename = strdup(filename);
return &symfilelist[symfilecnt - 1];
}
/* Check if file already are present in the list */
-struct symfile * filename_exist(char * filename)
+static struct symfile * filename_exist(char * filename)
{
int i;
for (i=0; i < symfilecnt; i++)
* List all files referenced within the template file.
* Files are separated by tabs.
*/
-void adddep(char * file) { printf("\t%s", file); }
-void adddep2(char * file, char * line) { line = line; adddep(file); }
-void noaction(char * line) { line = line; }
-void noaction2(char * file, char * line) { file = file; line = line; }
+static void adddep(char * file) { printf("\t%s", file); }
+static void adddep2(char * file, char * line) { line = line; adddep(file); }
+static void noaction(char * line) { line = line; }
+static void noaction2(char * file, char * line) { file = file; line = line; }
/* Echo the line without further action */
-void printline(char * line) { printf("%s", line); }
+static void printline(char * line) { printf("%s", line); }
/*
* Find all symbols in filename that are exported with EXPORT_SYMBOL &
* EXPORT_SYMBOL_GPL (& EXPORT_SYMBOL_GPL_FUTURE implicitly).
* All symbols located are stored in symfilelist.
*/
-void find_export_symbols(char * filename)
+static void find_export_symbols(char * filename)
{
FILE * fp;
struct symfile *sym;
* intfunc uses -nofunction
* extfunc uses -function
*/
-void docfunctions(char * filename, char * type)
+static void docfunctions(char * filename, char * type)
{
int i,j;
int symcnt = 0;
fflush(stdout);
free(vec);
}
-void intfunc(char * filename) { docfunctions(filename, NOFUNCTION); }
-void extfunc(char * filename) { docfunctions(filename, FUNCTION); }
+static void intfunc(char * filename) { docfunctions(filename, NOFUNCTION); }
+static void extfunc(char * filename) { docfunctions(filename, FUNCTION); }
/*
* Document specific function(s) in a file.
* Call kernel-doc with the following parameters:
* kernel-doc -docbook -function function1 [-function function2]
*/
-void singfunc(char * filename, char * line)
+static void singfunc(char * filename, char * line)
{
char *vec[200]; /* Enough for specific functions */
int i, idx = 0;
* Call kernel-doc with the following parameters:
* kernel-doc -docbook -function "doc section" filename
*/
-void docsect(char *filename, char *line)
+static void docsect(char *filename, char *line)
{
char *vec[6]; /* kerneldoc -docbook -function "section" file NULL */
char *s;
* 5) Lines containing !P
* 6) Default lines - lines not matching the above
*/
-void parse_file(FILE *infile)
+static void parse_file(FILE *infile)
{
char line[MAXLINESZ];
char * s;
char *depfile;
char *cmdline;
-void usage(void)
+static void usage(void)
{
fprintf(stderr, "Usage: fixdep <depfile> <target> <cmdline>\n");
exit(1);
/*
* Print out the commandline prefixed with cmd_<target filename> :=
*/
-void print_cmdline(void)
+static void print_cmdline(void)
{
printf("cmd_%s := %s\n\n", target, cmdline);
}
* Grow the configuration string to a desired length.
* Usually the first growth is plenty.
*/
-void grow_config(int len)
+static void grow_config(int len)
{
while (len_config + len > size_config) {
if (size_config == 0)
/*
* Lookup a value in the configuration string.
*/
-int is_defined_config(const char * name, int len)
+static int is_defined_config(const char * name, int len)
{
const char * pconfig;
const char * plast = str_config + len_config - len;
/*
* Add a new value to the configuration string.
*/
-void define_config(const char * name, int len)
+static void define_config(const char * name, int len)
{
grow_config(len + 1);
/*
* Clear the set of configuration strings.
*/
-void clear_config(void)
+static void clear_config(void)
{
len_config = 0;
define_config("", 0);
/*
* Record the use of a CONFIG_* word.
*/
-void use_config(char *m, int slen)
+static void use_config(char *m, int slen)
{
char s[PATH_MAX];
char *p;
printf(" $(wildcard include/config/%s.h) \\\n", s);
}
-void parse_config_file(char *map, size_t len)
+static void parse_config_file(char *map, size_t len)
{
int *end = (int *) (map + len);
/* start at +1, so that p can never be < map */
}
/* test is s ends in sub */
-int strrcmp(char *s, char *sub)
+static int strrcmp(char *s, char *sub)
{
int slen = strlen(s);
int sublen = strlen(sub);
return memcmp(s + slen - sublen, sub, sublen);
}
-void do_config_file(char *filename)
+static void do_config_file(char *filename)
{
struct stat st;
int fd;
close(fd);
}
-void parse_dep_file(void *map, size_t len)
+static void parse_dep_file(void *map, size_t len)
{
char *m = map;
char *end = m + len;
printf("$(deps_%s):\n", target);
}
-void print_deps(void)
+static void print_deps(void)
{
struct stat st;
int fd;
close(fd);
}
-void traps(void)
+static void traps(void)
{
static char test[] __attribute__((aligned(sizeof(int)))) = "CONF";
int *p = (int *)test;
* http://www.cse.yorku.ca/~oz/hash.html
*/
-unsigned int djb2_hash(char *str)
+static unsigned int djb2_hash(char *str)
{
unsigned long hash = 5381;
int c;
return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1));
}
-unsigned int r5_hash(char *str)
+static unsigned int r5_hash(char *str)
{
unsigned long hash = 0;
int c;
#!/usr/bin/perl
#
-# checkincludes: Find files included more than once in (other) files.
+# checkincludes: find/remove files included more than once
+#
# Copyright abandoned, 2000, Niels Kristian Bech Jensen <nkbj@image.dk>.
+# Copyright 2009 Luis R. Rodriguez <mcgrof@gmail.com>
+#
+# This script checks for duplicate includes. It also has support
+# to remove them in place. Note that this will not take into
+# consideration macros so you should run this only if you know
+# you do have real dups and do not have them under #ifdef's. You
+# could also just review the results.
+
+sub usage {
+ print "Usage: checkincludes.pl [-r]\n";
+ print "By default we just warn of duplicates\n";
+ print "To remove duplicated includes in place use -r\n";
+ exit 1;
+}
+
+my $remove = 0;
+
+if ($#ARGV < 0) {
+ usage();
+}
+
+if ($#ARGV >= 1) {
+ if ($ARGV[0] =~ /^-/) {
+ if ($ARGV[0] eq "-r") {
+ $remove = 1;
+ shift;
+ } else {
+ usage();
+ }
+ }
+}
foreach $file (@ARGV) {
open(FILE, $file) or die "Cannot open $file: $!.\n";
my %includedfiles = ();
+ my @file_lines = ();
while (<FILE>) {
if (m/^\s*#\s*include\s*[<"](\S*)[>"]/o) {
++$includedfiles{$1};
}
+ push(@file_lines, $_);
}
-
- foreach $filename (keys %includedfiles) {
- if ($includedfiles{$filename} > 1) {
- print "$file: $filename is included more than once.\n";
+
+ close(FILE);
+
+ if (!$remove) {
+ foreach $filename (keys %includedfiles) {
+ if ($includedfiles{$filename} > 1) {
+ print "$file: $filename is included more than once.\n";
+ }
}
+ next;
}
+ open(FILE,">$file") || die("Cannot write to $file: $!");
+
+ my $dups = 0;
+ foreach (@file_lines) {
+ if (m/^\s*#\s*include\s*[<"](\S*)[>"]/o) {
+ foreach $filename (keys %includedfiles) {
+ if ($1 eq $filename) {
+ if ($includedfiles{$filename} > 1) {
+ $includedfiles{$filename}--;
+ $dups++;
+ } else {
+ print FILE $_;
+ }
+ }
+ }
+ } else {
+ print FILE $_;
+ }
+ }
+ if ($dups > 0) {
+ print "$file: removed $dups duplicate includes\n";
+ }
close(FILE);
}
static char line[128];
static struct menu *rootEntry;
-static char nohelp_text[] = N_("Sorry, no help available for this option yet.\n");
-
-static const char *get_help(struct menu *menu)
+static void print_help(struct menu *menu)
{
- if (menu_has_help(menu))
- return _(menu_get_help(menu));
- else
- return nohelp_text;
+ struct gstr help = str_new();
+
+ menu_get_ext_help(menu, &help);
+
+ printf("\n%s\n", str_get(&help));
+ str_free(&help);
}
static void strip(char *str)
return 1;
}
-int conf_string(struct menu *menu)
+static int conf_string(struct menu *menu)
{
struct symbol *sym = menu->sym;
const char *def;
case '?':
/* print help */
if (line[1] == '\n') {
- printf("\n%s\n", get_help(menu));
+ print_help(menu);
def = NULL;
break;
}
if (sym_set_tristate_value(sym, newval))
return 0;
help:
- printf("\n%s\n", get_help(menu));
+ print_help(menu);
}
}
fgets(line, 128, stdin);
strip(line);
if (line[0] == '?') {
- printf("\n%s\n", get_help(menu));
+ print_help(menu);
continue;
}
if (!line[0])
if (!child)
continue;
if (line[strlen(line) - 1] == '?') {
- printf("\n%s\n", get_help(child));
+ print_help(child);
continue;
}
sym_set_choice_value(sym, child->sym);
return 0;
}
-int conf_split_config(void)
+static int conf_split_config(void)
{
const char *name;
char path[128];
/*
* e1 || e2 -> ?
*/
-struct expr *expr_join_or(struct expr *e1, struct expr *e2)
+static struct expr *expr_join_or(struct expr *e1, struct expr *e2)
{
struct expr *tmp;
struct symbol *sym1, *sym2;
return NULL;
}
-struct expr *expr_join_and(struct expr *e1, struct expr *e2)
+static struct expr *expr_join_and(struct expr *e1, struct expr *e2)
{
struct expr *tmp;
struct symbol *sym1, *sym2;
static void expr_print_gstr_helper(void *data, struct symbol *sym, const char *str)
{
str_append((struct gstr*)data, str);
+ if (sym)
+ str_printf((struct gstr*)data, " [=%s]", sym_get_string_value(sym));
}
void expr_gstr_print(struct expr *e, struct gstr *gs)
GtkTextBuffer *buffer;
GtkTextIter start, end;
const char *prompt = _(menu_get_prompt(menu));
- gchar *name;
- const char *help;
+ struct gstr help = str_new();
- help = menu_get_help(menu);
-
- /* Gettextize if the help text not empty */
- if ((help != 0) && (help[0] != 0))
- help = _(help);
-
- if (menu->sym && menu->sym->name)
- name = g_strdup_printf(menu->sym->name);
- else
- name = g_strdup("");
+ menu_get_ext_help(menu, &help);
buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w));
gtk_text_buffer_get_bounds(buffer, &start, &end);
gtk_text_buffer_get_end_iter(buffer, &end);
gtk_text_buffer_insert_with_tags(buffer, &end, prompt, -1, tag1,
NULL);
- gtk_text_buffer_insert_at_cursor(buffer, " ", 1);
- gtk_text_buffer_get_end_iter(buffer, &end);
- gtk_text_buffer_insert_with_tags(buffer, &end, name, -1, tag1,
- NULL);
gtk_text_buffer_insert_at_cursor(buffer, "\n\n", 2);
gtk_text_buffer_get_end_iter(buffer, &end);
- gtk_text_buffer_insert_with_tags(buffer, &end, help, -1, tag2,
+ gtk_text_buffer_insert_with_tags(buffer, &end, str_get(&help), -1, tag2,
NULL);
+ str_free(&help);
}
<property name="headers_visible">True</property>
<property name="rules_hint">False</property>
<property name="reorderable">False</property>
- <property name="enable_search">True</property>
+ <property name="enable_search">False</property>
<signal name="cursor_changed" handler="on_treeview2_cursor_changed" last_modification_time="Sun, 12 Jan 2003 15:58:22 GMT"/>
<signal name="button_press_event" handler="on_treeview1_button_press_event" last_modification_time="Sun, 12 Jan 2003 16:03:52 GMT"/>
<signal name="key_press_event" handler="on_treeview2_key_press_event" last_modification_time="Sun, 12 Jan 2003 16:11:44 GMT"/>
<property name="headers_visible">True</property>
<property name="rules_hint">False</property>
<property name="reorderable">False</property>
- <property name="enable_search">True</property>
+ <property name="enable_search">False</property>
<signal name="cursor_changed" handler="on_treeview2_cursor_changed" last_modification_time="Sun, 12 Jan 2003 15:57:55 GMT"/>
<signal name="button_press_event" handler="on_treeview2_button_press_event" last_modification_time="Sun, 12 Jan 2003 15:57:58 GMT"/>
<signal name="key_press_event" handler="on_treeview2_key_press_event" last_modification_time="Sun, 12 Jan 2003 15:58:01 GMT"/>
return rc;
}
-void menu_build_message_list(struct menu *menu)
+static void menu_build_message_list(struct menu *menu)
{
struct menu *child;
"msgstr \"\"\n", self->msg);
}
-void menu__xgettext(void)
+static void menu__xgettext(void)
{
struct message *m = message__list;
P(menu_get_parent_menu,struct menu *,(struct menu *menu));
P(menu_has_help,bool,(struct menu *menu));
P(menu_get_help,const char *,(struct menu *menu));
+P(get_symbol_str,void,(struct gstr *r, struct symbol *sym));
+P(menu_get_ext_help,void,(struct menu *menu, struct gstr *help));
/* symbol.c */
P(symbol_hash,struct symbol *,[SYMBOL_HASHSIZE]);
setmod_text[] = N_(
"This feature depends on another which has been configured as a module.\n"
"As a result, this feature will be built as a module."),
-nohelp_text[] = N_(
- "There is no help available for this kernel option.\n"),
load_config_text[] = N_(
"Enter the name of the configuration file you wish to load. "
"Accept the name shown to restore the configuration you "
static void show_helptext(const char *title, const char *text);
static void show_help(struct menu *menu);
-static void get_prompt_str(struct gstr *r, struct property *prop)
-{
- int i, j;
- struct menu *submenu[8], *menu;
-
- str_printf(r, _("Prompt: %s\n"), _(prop->text));
- str_printf(r, _(" Defined at %s:%d\n"), prop->menu->file->name,
- prop->menu->lineno);
- if (!expr_is_yes(prop->visible.expr)) {
- str_append(r, _(" Depends on: "));
- expr_gstr_print(prop->visible.expr, r);
- str_append(r, "\n");
- }
- menu = prop->menu->parent;
- for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent)
- submenu[i++] = menu;
- if (i > 0) {
- str_printf(r, _(" Location:\n"));
- for (j = 4; --i >= 0; j += 2) {
- menu = submenu[i];
- str_printf(r, "%*c-> %s", j, ' ', _(menu_get_prompt(menu)));
- if (menu->sym) {
- str_printf(r, " (%s [=%s])", menu->sym->name ?
- menu->sym->name : _("<choice>"),
- sym_get_string_value(menu->sym));
- }
- str_append(r, "\n");
- }
- }
-}
-
-static void get_symbol_str(struct gstr *r, struct symbol *sym)
-{
- bool hit;
- struct property *prop;
-
- if (sym && sym->name)
- str_printf(r, "Symbol: %s [=%s]\n", sym->name,
- sym_get_string_value(sym));
- for_all_prompts(sym, prop)
- get_prompt_str(r, prop);
- hit = false;
- for_all_properties(sym, prop, P_SELECT) {
- if (!hit) {
- str_append(r, " Selects: ");
- hit = true;
- } else
- str_printf(r, " && ");
- expr_gstr_print(prop->expr, r);
- }
- if (hit)
- str_append(r, "\n");
- if (sym->rev_dep.expr) {
- str_append(r, _(" Selected by: "));
- expr_gstr_print(sym->rev_dep.expr, r);
- str_append(r, "\n");
- }
- str_append(r, "\n\n");
-}
-
static struct gstr get_relations_str(struct symbol **sym_arr)
{
struct symbol *sym;
static void show_help(struct menu *menu)
{
struct gstr help = str_new();
- struct symbol *sym = menu->sym;
-
- if (menu_has_help(menu))
- {
- if (sym->name) {
- str_printf(&help, "CONFIG_%s:\n\n", sym->name);
- str_append(&help, _(menu_get_help(menu)));
- str_append(&help, "\n");
- }
- } else {
- str_append(&help, nohelp_text);
- }
- get_symbol_str(&help, sym);
+
+ menu_get_ext_help(menu, &help);
+
show_helptext(_(menu_get_prompt(menu)), str_get(&help));
str_free(&help);
}
#define LKC_DIRECT_LINK
#include "lkc.h"
+static const char nohelp_text[] = N_(
+ "There is no help available for this kernel option.\n");
+
struct menu rootmenu;
static struct menu **last_entry_ptr;
current_menu = current_menu->parent;
}
-struct expr *menu_check_dep(struct expr *e)
+static struct expr *menu_check_dep(struct expr *e)
{
if (!e)
return e;
(sym2->type == S_UNKNOWN && sym_string_valid(sym, sym2->name));
}
-void sym_check_prop(struct symbol *sym)
+static void sym_check_prop(struct symbol *sym)
{
struct property *prop;
struct symbol *sym2;
else
return "";
}
+
+static void get_prompt_str(struct gstr *r, struct property *prop)
+{
+ int i, j;
+ struct menu *submenu[8], *menu;
+
+ str_printf(r, _("Prompt: %s\n"), _(prop->text));
+ str_printf(r, _(" Defined at %s:%d\n"), prop->menu->file->name,
+ prop->menu->lineno);
+ if (!expr_is_yes(prop->visible.expr)) {
+ str_append(r, _(" Depends on: "));
+ expr_gstr_print(prop->visible.expr, r);
+ str_append(r, "\n");
+ }
+ menu = prop->menu->parent;
+ for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent)
+ submenu[i++] = menu;
+ if (i > 0) {
+ str_printf(r, _(" Location:\n"));
+ for (j = 4; --i >= 0; j += 2) {
+ menu = submenu[i];
+ str_printf(r, "%*c-> %s", j, ' ', _(menu_get_prompt(menu)));
+ if (menu->sym) {
+ str_printf(r, " (%s [=%s])", menu->sym->name ?
+ menu->sym->name : _("<choice>"),
+ sym_get_string_value(menu->sym));
+ }
+ str_append(r, "\n");
+ }
+ }
+}
+
+void get_symbol_str(struct gstr *r, struct symbol *sym)
+{
+ bool hit;
+ struct property *prop;
+
+ if (sym && sym->name)
+ str_printf(r, "Symbol: %s [=%s]\n", sym->name,
+ sym_get_string_value(sym));
+ for_all_prompts(sym, prop)
+ get_prompt_str(r, prop);
+ hit = false;
+ for_all_properties(sym, prop, P_SELECT) {
+ if (!hit) {
+ str_append(r, " Selects: ");
+ hit = true;
+ } else
+ str_printf(r, " && ");
+ expr_gstr_print(prop->expr, r);
+ }
+ if (hit)
+ str_append(r, "\n");
+ if (sym->rev_dep.expr) {
+ str_append(r, _(" Selected by: "));
+ expr_gstr_print(sym->rev_dep.expr, r);
+ str_append(r, "\n");
+ }
+ str_append(r, "\n\n");
+}
+
+void menu_get_ext_help(struct menu *menu, struct gstr *help)
+{
+ struct symbol *sym = menu->sym;
+
+ if (menu_has_help(menu)) {
+ if (sym->name) {
+ str_printf(help, "CONFIG_%s:\n\n", sym->name);
+ str_append(help, _(menu_get_help(menu)));
+ str_append(help, "\n");
+ }
+ } else {
+ str_append(help, nohelp_text);
+ }
+ if (sym)
+ get_symbol_str(help, sym);
+}
if (showDebug())
debug = debug_info(sym);
- help = menu_get_help(menu);
- /* Gettextize if the help text not empty */
- if (help.isEmpty())
- help = print_filter(menu_get_help(menu));
- else
- help = print_filter(_(menu_get_help(menu)));
+ struct gstr help_gstr = str_new();
+ menu_get_ext_help(menu, &help_gstr);
+ help = print_filter(str_get(&help_gstr));
+ str_free(&help_gstr);
} else if (menu->prompt) {
head += "<big><b>";
head += print_filter(_(menu->prompt->text));
struct expr *sym_env_list;
-void sym_add_default(struct symbol *sym, const char *def)
+static void sym_add_default(struct symbol *sym, const char *def)
{
struct property *prop = prop_alloc(P_DEFAULT, sym);
return NULL;
}
-struct property *sym_get_range_prop(struct symbol *sym)
+static struct property *sym_get_range_prop(struct symbol *sym)
{
struct property *prop;
return "unknown";
}
-void prop_add_env(const char *env)
+static void prop_add_env(const char *env)
{
struct symbol *sym, *sym2;
struct property *prop;
# if it's a module, we need to find the .ko file and calculate a load offset
if ($module ne "") {
- my $dir = dirname($filename);
- $dir = $dir . "/";
- my $mod = $module . ".ko";
- my $modulefile = `find $dir -name $mod | head -1`;
+ my $modulefile = `modinfo $module | grep '^filename:' | awk '{ print \$2 }'`;
chomp($modulefile);
$filename = $modulefile;
if ($filename eq "") {
-I ____cacheline_aligned_in_smp \
-I ____cacheline_internodealigned_in_smp \
-I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL \
- --extra=+f --c-kinds=+px \
+ -I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \
+ --extra=+f --c-kinds=-px \
--regex-asm='/^ENTRY\(([^)]*)\).*/\1/' \
--regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/'
struct cgroup_subsys devices_subsys;
static int devcgroup_can_attach(struct cgroup_subsys *ss,
- struct cgroup *new_cgroup, struct task_struct *task)
+ struct cgroup *new_cgroup, struct task_struct *task,
+ bool threadgroup)
{
if (current != task && !capable(CAP_SYS_ADMIN))
return -EPERM;
/* trawl through the keys looking for keyrings */
for (;;) {
- if (key->expiry > now && key->expiry < new_timer) {
+ if (key->expiry > limit && key->expiry < new_timer) {
kdebug("will expire %x in %ld",
- key_serial(key), key->expiry - now);
+ key_serial(key), key->expiry - limit);
new_timer = key->expiry;
}
char *name1, char *name2)
{
if (!ipv6_addr_any(addr))
- audit_log_format(ab, " %s=%pI6", name1, addr);
+ audit_log_format(ab, " %s=%pI6c", name1, addr);
if (port)
audit_log_format(ab, " %s=%d", name2, ntohs(port));
}
* sysctl handler which just sets dac_mmap_min_addr = the new value and then
* calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly
*/
-int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+int mmap_min_addr_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
- ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+ ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
update_mmap_min_addr();
void avc_disable(void)
{
- avc_flush();
- synchronize_rcu();
- if (avc_node_cachep)
- kmem_cache_destroy(avc_node_cachep);
+ /*
+ * If you are looking at this because you have realized that we are
+ * not destroying the avc_node_cachep it might be easy to fix, but
+ * I don't know the memory barrier semantics well enough to know. It's
+ * possible that some other task dereferenced security_ops when
+ * it still pointed to selinux operations. If that is the case it's
+ * possible that it is about to use the avc and is about to need the
+ * avc_node_cachep. I know I could wrap the security.c security_ops call
+ * in an rcu_lock, but seriously, it's not worth it. Instead I just flush
+ * the cache and get that memory back.
+ */
+ if (avc_node_cachep) {
+ avc_flush();
+ /* kmem_cache_destroy(avc_node_cachep); */
+ }
}
/* Wake up the parent if it is waiting so that it can recheck
* wait permission to the new task SID. */
read_lock(&tasklist_lock);
- wake_up_interruptible(¤t->real_parent->signal->wait_chldexit);
+ __wake_up_parent(current, current->real_parent);
read_unlock(&tasklist_lock);
}
gen_init_cpio
initramfs_data.cpio
initramfs_data.cpio.gz
+initramfs_data.cpio.bz2
+initramfs_data.cpio.lzma
initramfs_list
include
PHONY += klibcdirs
-# Gzip, but no bzip2
+# Gzip
suffix_$(CONFIG_INITRAMFS_COMPRESSION_GZIP) = .gz
# Bzip2
bool called = true;
struct kvm_vcpu *vcpu;
- if (alloc_cpumask_var(&cpus, GFP_ATOMIC))
- cpumask_clear(cpus);
+ zalloc_cpumask_var(&cpus, GFP_ATOMIC);
spin_lock(&kvm->requests_lock);
me = smp_processor_id();