diff --git a/Documentation/block/row-iosched.txt b/Documentation/block/row-iosched.txt new file mode 100644 index 00000000000..987bd883444 --- /dev/null +++ b/Documentation/block/row-iosched.txt @@ -0,0 +1,117 @@ +Introduction +============ + +The ROW scheduling algorithm will be used in mobile devices as default +block layer IO scheduling algorithm. ROW stands for "READ Over WRITE" +which is the main requests dispatch policy of this algorithm. + +The ROW IO scheduler was developed with the mobile devices needs in +mind. In mobile devices we favor user experience upon everything else, +thus we want to give READ IO requests as much priority as possible. +The main idea of the ROW scheduling policy is: +If there are READ requests in pipe - dispatch them but don't starve +the WRITE requests too much. + +Software description +==================== +The requests are kept in queues according to their priority. The +dispatching of requests is done in a Round Robin manner with a +different slice for each queue. The dispatch quantum for a specific +queue is defined according to the queues priority. READ queues are +given bigger dispatch quantum than the WRITE queues, within a dispatch +cycle. + +At the moment there are 6 types of queues the requests are +distributed to: +- High priority READ queue +- High priority Synchronous WRITE queue +- Regular priority READ queue +- Regular priority Synchronous WRITE queue +- Regular priority WRITE queue +- Low priority READ queue + +If in a certain dispatch cycle one of the queues was empty and didn't +use its quantum that queue will be marked as "un-served". If we're in a +middle of a dispatch cycle dispatching from queue Y and a request +arrives for queue X that was un-served in the previous cycle, if X's +priority is higher than Y's, queue X will be preempted in the favor of +queue Y. This won't mean that cycle is restarted. The "dispatched" +counter of queue X will remain unchanged. Once queue Y uses up it's quantum +(or there will be no more requests left on it) we'll switch back to queue X +and allow it to finish it's quantum. + +For READ requests queues we allow idling in within a dispatch quantum in +order to give the application a chance to insert more requests. Idling +means adding some extra time for serving a certain queue even if the +queue is empty. The idling is enabled if we identify the application is +inserting requests in a high frequency. + +For idling on READ queues we use timer mechanism. When the timer expires, +if there are requests in the scheduler we will signal the underlying driver +(for example the MMC driver) to fetch another request for dispatch. + +The ROW algorithm takes the scheduling policy one step further, making +it a bit more "user-needs oriented", by allowing the application to +hint on the urgency of its requests. For example: even among the READ +requests several requests may be more urgent for completion then others. +The former will go to the High priority READ queue, that is given the +bigger dispatch quantum than any other queue. + +ROW scheduler will support special services for block devices that +supports High Priority Requests. That is, the scheduler may inform the +device upon urgent requests using new callback make_urgent_request. +In addition it will support rescheduling of requests that were +interrupted. For example, if the device issues a long write request and +a sudden high priority read interrupt pops in, the scheduler will +inform the device about the urgent request, so the device can stop the +current write request and serve the high priority read request. In such +a case the device may also send back to the scheduler the reminder of +the interrupted write request, such that the scheduler may continue +sending high priority requests without the need to interrupt the +ongoing write again and again. The write remainder will be sent later on +according to the scheduler policy. + +Design +====== +Existing algorithms (cfq, deadline) sort the io requests according LBA. +When deciding on the next request to dispatch they choose the closest +request to the current disk head position (from handling last +dispatched request). This is done in order to reduce the disk head +movement to a minimum. +We feel that this functionality isn't really needed in mobile devices. +Usually applications that write/read large chunks of data insert the +requests in already sorted LBA order. Thus dealing with sort trees adds +unnecessary complexity. + +We're planing to try this enhancement in the future to check if the +performance is influenced by it. + +SMP/multi-core +============== +At the moment the code is acceded from 2 contexts: +- Application context (from block/elevator layer): adding the requests. +- Underlying driver context (for example the mmc driver thread): dispatching + the requests and notifying on completion. + +One lock is used to synchronize between the two. This lock is provided +by the underlying driver along with the dispatch queue. + +Config options +============== +1. hp_read_quantum: dispatch quantum for the high priority READ queue +2. rp_read_quantum: dispatch quantum for the regular priority READ queue +3. hp_swrite_quantum: dispatch quantum for the high priority Synchronous + WRITE queue +4. rp_swrite_quantum: dispatch quantum for the regular priority + Synchronous WRITE queue +5. rp_write_quantum: dispatch quantum for the regular priority WRITE + queue +6. lp_read_quantum: dispatch quantum for the low priority READ queue +7. lp_swrite_quantum: dispatch quantum for the low priority Synchronous + WRITE queue +8. read_idle: how long to idle on read queue in Msec (in case idling + is enabled on that queue). +9. read_idle_freq: frequency of inserting READ requests that will + trigger idling. This is the time in Msec between inserting two READ + requests + diff --git a/Documentation/cgroups/timer_slack.txt b/Documentation/cgroups/timer_slack.txt new file mode 100644 index 00000000000..4006eabac1c --- /dev/null +++ b/Documentation/cgroups/timer_slack.txt @@ -0,0 +1,72 @@ +Timer Slack Controller +====================== + +Overview +-------- + +Every task_struct has timer_slack_ns value. This value uses to round up +poll() and select() timeout values. This feature can be useful in +mobile environment where combined wakeups are desired. + +Originally, prctl() was the only way to change timer slack value of +a process. So you was not able change timer slack value of another +process. + +cgroup subsys "timer_slack" implements timer slack controller. It +provides a way to set minimal timer slack value for a group of tasks. +If a task belongs to a cgroup with minimal timer slack value higher than +task's value, cgroup's value will be applied. + +Timer slack controller allows to implement setting timer slack value of +a process based on a policy. For example, you can create foreground and +background cgroups and move tasks between them based on system state. + +User interface +-------------- + +To get timer slack controller functionality you need to enable it in +kernel configuration: + +CONFIG_CGROUP_TIMER_SLACK=y + +The controller provides two files: + +# mount -t cgroup -o timer_slack none /sys/fs/cgroup +# ls /sys/fs/cgroup/timer_slack.* +/sys/fs/cgroup/timer_slack.effective_slack_ns +/sys/fs/cgroup/timer_slack.min_slack_ns + +By default timer_slack.min_slack_ns is 0: + +# cat /sys/fs/cgroup/timer_slack.min_slack_ns +0 + +You can set it to some value: + +# echo 50000 > /sys/fs/cgroup/timer_slack.min_slack_ns +# cat /sys/fs/cgroup/timer_slack.min_slack_ns +50000 + +Tasks still can set task's value below 50000 using prctl(), but in this +case cgroup's value will be applied. + +Timer slack controller supports hierarchical groups. + +# mkdir /sys/fs/cgroup/a +# cat /sys/fs/cgroup/a/timer_slack.min_slack_ns +50000 +# echo 70000 > /sys/fs/cgroup/a/timer_slack.min_slack_ns +# cat /sys/fs/cgroup/a/timer_slack.min_slack_ns +70000 + +You can set any value you want, but effective value will the highest value +up by hierarchy. You can see effective timer slack value for the cgroup from +timer_slack.effective_slack_ns file: + +# cat /sys/fs/cgroup/a/timer_slack.effective_slack_ns +70000 +# echo 100000 > /sys/fs/cgroup/timer_slack.min_slack_ns +# cat /sys/fs/cgroup/a/timer_slack.min_slack_ns +70000 +# cat /sys/fs/cgroup/a/timer_slack.effective_slack_ns +100000 diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt new file mode 100644 index 00000000000..97aca46bd7a --- /dev/null +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt @@ -0,0 +1,11 @@ +NVIDIA Tegra AHB + +Required properties: +- compatible : "nvidia,tegra20-ahb" or "nvidia,tegra30-ahb" +- reg : Should contain 1 register ranges(address and length) + +Example: + ahb: ahb@6000c004 { + compatible = "nvidia,tegra20-ahb"; + reg = <0x6000c004 0x10c>; /* AHB Arbitration + Gizmo Controller */ + }; diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 8c624a18f67..ce5fd467791 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -48,6 +48,8 @@ ext4.txt - info, mount options and specifications for the Ext4 filesystem. files.txt - info on file management in the Linux kernel. +f2fs.txt + - info and mount options for the F2FS filesystem. fuse.txt - info on the Filesystem in User SpacE including mount options. gfs2.txt diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt new file mode 100644 index 00000000000..a3fe811bbdb --- /dev/null +++ b/Documentation/filesystems/f2fs.txt @@ -0,0 +1,502 @@ +================================================================================ +WHAT IS Flash-Friendly File System (F2FS)? +================================================================================ + +NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have +been equipped on a variety systems ranging from mobile to server systems. Since +they are known to have different characteristics from the conventional rotating +disks, a file system, an upper layer to the storage device, should adapt to the +changes from the sketch in the design level. + +F2FS is a file system exploiting NAND flash memory-based storage devices, which +is based on Log-structured File System (LFS). The design has been focused on +addressing the fundamental issues in LFS, which are snowball effect of wandering +tree and high cleaning overhead. + +Since a NAND flash memory-based storage device shows different characteristic +according to its internal geometry or flash memory management scheme, namely FTL, +F2FS and its tools support various parameters not only for configuring on-disk +layout, but also for selecting allocation and cleaning algorithms. + +The following git tree provides the file system formatting tool (mkfs.f2fs), +a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). +>> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git + +For reporting bugs and sending patches, please use the following mailing list: +>> linux-f2fs-devel@lists.sourceforge.net + +================================================================================ +BACKGROUND AND DESIGN ISSUES +================================================================================ + +Log-structured File System (LFS) +-------------------------------- +"A log-structured file system writes all modifications to disk sequentially in +a log-like structure, thereby speeding up both file writing and crash recovery. +The log is the only structure on disk; it contains indexing information so that +files can be read back from the log efficiently. In order to maintain large free +areas on disk for fast writing, we divide the log into segments and use a +segment cleaner to compress the live information from heavily fragmented +segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and +implementation of a log-structured file system", ACM Trans. Computer Systems +10, 1, 26–52. + +Wandering Tree Problem +---------------------- +In LFS, when a file data is updated and written to the end of log, its direct +pointer block is updated due to the changed location. Then the indirect pointer +block is also updated due to the direct pointer block update. In this manner, +the upper index structures such as inode, inode map, and checkpoint block are +also updated recursively. This problem is called as wandering tree problem [1], +and in order to enhance the performance, it should eliminate or relax the update +propagation as much as possible. + +[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/ + +Cleaning Overhead +----------------- +Since LFS is based on out-of-place writes, it produces so many obsolete blocks +scattered across the whole storage. In order to serve new empty log space, it +needs to reclaim these obsolete blocks seamlessly to users. This job is called +as a cleaning process. + +The process consists of three operations as follows. +1. A victim segment is selected through referencing segment usage table. +2. It loads parent index structures of all the data in the victim identified by + segment summary blocks. +3. It checks the cross-reference between the data and its parent index structure. +4. It moves valid data selectively. + +This cleaning job may cause unexpected long delays, so the most important goal +is to hide the latencies to users. And also definitely, it should reduce the +amount of valid data to be moved, and move them quickly as well. + +================================================================================ +KEY FEATURES +================================================================================ + +Flash Awareness +--------------- +- Enlarge the random write area for better performance, but provide the high + spatial locality +- Align FS data structures to the operational units in FTL as best efforts + +Wandering Tree Problem +---------------------- +- Use a term, “node”, that represents inodes as well as various pointer blocks +- Introduce Node Address Table (NAT) containing the locations of all the “node” + blocks; this will cut off the update propagation. + +Cleaning Overhead +----------------- +- Support a background cleaning process +- Support greedy and cost-benefit algorithms for victim selection policies +- Support multi-head logs for static/dynamic hot and cold data separation +- Introduce adaptive logging for efficient block allocation + +================================================================================ +MOUNT OPTIONS +================================================================================ + +background_gc=%s Turn on/off cleaning operations, namely garbage + collection, triggered in background when I/O subsystem is + idle. If background_gc=on, it will turn on the garbage + collection and if background_gc=off, garbage collection + will be truned off. + Default value for this option is on. So garbage + collection is on by default. +disable_roll_forward Disable the roll-forward recovery routine +discard Issue discard/TRIM commands when a segment is cleaned. +no_heap Disable heap-style segment allocation which finds free + segments for data from the beginning of main area, while + for node from the end of main area. +nouser_xattr Disable Extended User Attributes. Note: xattr is enabled + by default if CONFIG_F2FS_FS_XATTR is selected. +noacl Disable POSIX Access Control List. Note: acl is enabled + by default if CONFIG_F2FS_FS_POSIX_ACL is selected. +active_logs=%u Support configuring the number of active logs. In the + current design, f2fs supports only 2, 4, and 6 logs. + Default number is 6. +disable_ext_identify Disable the extension list configured by mkfs, so f2fs + does not aware of cold files such as media files. +inline_xattr Enable the inline xattrs feature. + +================================================================================ +DEBUGFS ENTRIES +================================================================================ + +/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as +f2fs. Each file shows the whole f2fs information. + +/sys/kernel/debug/f2fs/status includes: + - major file system information managed by f2fs currently + - average SIT information about whole segments + - current memory footprint consumed by f2fs. + +================================================================================ +SYSFS ENTRIES +================================================================================ + +Information about mounted f2f2 file systems can be found in +/sys/fs/f2fs. Each mounted filesystem will have a directory in +/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda). +The files in each per-device directory are shown in table below. + +Files in /sys/fs/f2fs/ +(see also Documentation/ABI/testing/sysfs-fs-f2fs) +.............................................................................. + File Content + + gc_max_sleep_time This tuning parameter controls the maximum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_min_sleep_time This tuning parameter controls the minimum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_no_gc_sleep_time This tuning parameter controls the default sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_idle This parameter controls the selection of victim + policy for garbage collection. Setting gc_idle = 0 + (default) will disable this option. Setting + gc_idle = 1 will select the Cost Benefit approach + & setting gc_idle = 2 will select the greedy aproach. + + reclaim_segments This parameter controls the number of prefree + segments to be reclaimed. If the number of prefree + segments is larger than this number, f2fs tries to + conduct checkpoint to reclaim the prefree segments + to free segments. By default, 100 segments, 200MB. + +================================================================================ +USAGE +================================================================================ + +1. Download userland tools and compile them. + +2. Skip, if f2fs was compiled statically inside kernel. + Otherwise, insert the f2fs.ko module. + # insmod f2fs.ko + +3. Create a directory trying to mount + # mkdir /mnt/f2fs + +4. Format the block device, and then mount as f2fs + # mkfs.f2fs -l label /dev/block_device + # mount -t f2fs /dev/block_device /mnt/f2fs + +mkfs.f2fs +--------- +The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem, +which builds a basic on-disk layout. + +The options consist of: +-l [label] : Give a volume label, up to 512 unicode name. +-a [0 or 1] : Split start location of each area for heap-based allocation. + 1 is set by default, which performs this. +-o [int] : Set overprovision ratio in percent over volume size. + 5 is set by default. +-s [int] : Set the number of segments per section. + 1 is set by default. +-z [int] : Set the number of sections per zone. + 1 is set by default. +-e [str] : Set basic extension list. e.g. "mp3,gif,mov" +-t [0 or 1] : Disable discard command or not. + 1 is set by default, which conducts discard. + +fsck.f2fs +--------- +The fsck.f2fs is a tool to check the consistency of an f2fs-formatted +partition, which examines whether the filesystem metadata and user-made data +are cross-referenced correctly or not. +Note that, initial version of the tool does not fix any inconsistency. + +The options consist of: + -d debug level [default:0] + +dump.f2fs +--------- +The dump.f2fs shows the information of specific inode and dumps SSA and SIT to +file. Each file is dump_ssa and dump_sit. + +The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. +It shows on-disk inode information reconized by a given inode number, and is +able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and +./dump_sit respectively. + +The options consist of: + -d debug level [default:0] + -i inode no (hex) + -s [SIT dump segno from #1~#2 (decimal), for all 0~-1] + -a [SSA dump segno from #1~#2 (decimal), for all 0~-1] + +Examples: +# dump.f2fs -i [ino] /dev/sdx +# dump.f2fs -s 0~-1 /dev/sdx (SIT dump) +# dump.f2fs -a 0~-1 /dev/sdx (SSA dump) + +================================================================================ +DESIGN +================================================================================ + +On-disk Layout +-------------- + +F2FS divides the whole volume into a number of segments, each of which is fixed +to 2MB in size. A section is composed of consecutive segments, and a zone +consists of a set of sections. By default, section and zone sizes are set to one +segment size identically, but users can easily modify the sizes by mkfs. + +F2FS splits the entire volume into six areas, and all the areas except superblock +consists of multiple segments as described below. + + align with the zone size <-| + |-> align with the segment size + _________________________________________________________________________ + | | | Segment | Node | Segment | | + | Superblock | Checkpoint | Info. | Address | Summary | Main | + | (SB) | (CP) | Table (SIT) | Table (NAT) | Area (SSA) | | + |____________|_____2______|______N______|______N______|______N_____|__N___| + . . + . . + . . + ._________________________________________. + |_Segment_|_..._|_Segment_|_..._|_Segment_| + . . + ._________._________ + |_section_|__...__|_ + . . + .________. + |__zone__| + +- Superblock (SB) + : It is located at the beginning of the partition, and there exist two copies + to avoid file system crash. It contains basic partition information and some + default parameters of f2fs. + +- Checkpoint (CP) + : It contains file system information, bitmaps for valid NAT/SIT sets, orphan + inode lists, and summary entries of current active segments. + +- Segment Information Table (SIT) + : It contains segment information such as valid block count and bitmap for the + validity of all the blocks. + +- Node Address Table (NAT) + : It is composed of a block address table for all the node blocks stored in + Main area. + +- Segment Summary Area (SSA) + : It contains summary entries which contains the owner information of all the + data and node blocks stored in Main area. + +- Main Area + : It contains file and directory data including their indices. + +In order to avoid misalignment between file system and flash-based storage, F2FS +aligns the start block address of CP with the segment size. Also, it aligns the +start block address of Main area with the zone size by reserving some segments +in SSA area. + +Reference the following survey for additional technical details. +https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey + +File System Metadata Structure +------------------------------ + +F2FS adopts the checkpointing scheme to maintain file system consistency. At +mount time, F2FS first tries to find the last valid checkpoint data by scanning +CP area. In order to reduce the scanning time, F2FS uses only two copies of CP. +One of them always indicates the last valid data, which is called as shadow copy +mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism. + +For file system consistency, each CP points to which NAT and SIT copies are +valid, as shown as below. + + +--------+----------+---------+ + | CP | SIT | NAT | + +--------+----------+---------+ + . . . . + . . . . + . . . . + +-------+-------+--------+--------+--------+--------+ + | CP #0 | CP #1 | SIT #0 | SIT #1 | NAT #0 | NAT #1 | + +-------+-------+--------+--------+--------+--------+ + | ^ ^ + | | | + `----------------------------------------' + +Index Structure +--------------- + +The key data structure to manage the data locations is a "node". Similar to +traditional file structures, F2FS has three types of node: inode, direct node, +indirect node. F2FS assigns 4KB to an inode block which contains 923 data block +indices, two direct node pointers, two indirect node pointers, and one double +indirect node pointer as described below. One direct node block contains 1018 +data blocks, and one indirect node block contains also 1018 node blocks. Thus, +one inode block (i.e., a file) covers: + + 4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB. + + Inode block (4KB) + |- data (923) + |- direct node (2) + | `- data (1018) + |- indirect node (2) + | `- direct node (1018) + | `- data (1018) + `- double indirect node (1) + `- indirect node (1018) + `- direct node (1018) + `- data (1018) + +Note that, all the node blocks are mapped by NAT which means the location of +each node is translated by the NAT table. In the consideration of the wandering +tree problem, F2FS is able to cut off the propagation of node updates caused by +leaf data writes. + +Directory Structure +------------------- + +A directory entry occupies 11 bytes, which consists of the following attributes. + +- hash hash value of the file name +- ino inode number +- len the length of file name +- type file type such as directory, symlink, etc + +A dentry block consists of 214 dentry slots and file names. Therein a bitmap is +used to represent whether each dentry is valid or not. A dentry block occupies +4KB with the following composition. + + Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) + + dentries(11 * 214 bytes) + file name (8 * 214 bytes) + + [Bucket] + +--------------------------------+ + |dentry block 1 | dentry block 2 | + +--------------------------------+ + . . + . . + . [Dentry Block Structure: 4KB] . + +--------+----------+----------+------------+ + | bitmap | reserved | dentries | file names | + +--------+----------+----------+------------+ + [Dentry Block: 4KB] . . + . . + . . + +------+------+-----+------+ + | hash | ino | len | type | + +------+------+-----+------+ + [Dentry Structure: 11 bytes] + +F2FS implements multi-level hash tables for directory structure. Each level has +a hash table with dedicated number of hash buckets as shown below. Note that +"A(2B)" means a bucket includes 2 data blocks. + +---------------------- +A : bucket +B : block +N : MAX_DIR_HASH_DEPTH +---------------------- + +level #0 | A(2B) + | +level #1 | A(2B) - A(2B) + | +level #2 | A(2B) - A(2B) - A(2B) - A(2B) + . | . . . . +level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) + . | . . . . +level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) + +The number of blocks and buckets are determined by, + + ,- 2, if n < MAX_DIR_HASH_DEPTH / 2, + # of blocks in level #n = | + `- 4, Otherwise + + ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2, + # of buckets in level #n = | + `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise + +When F2FS finds a file name in a directory, at first a hash value of the file +name is calculated. Then, F2FS scans the hash table in level #0 to find the +dentry consisting of the file name and its inode number. If not found, F2FS +scans the next hash table in level #1. In this way, F2FS scans hash tables in +each levels incrementally from 1 to N. In each levels F2FS needs to scan only +one bucket determined by the following equation, which shows O(log(# of files)) +complexity. + + bucket number to scan in level #n = (hash value) % (# of buckets in level #n) + +In the case of file creation, F2FS finds empty consecutive slots that cover the +file name. F2FS searches the empty slots in the hash tables of whole levels from +1 to N in the same way as the lookup operation. + +The following figure shows an example of two cases holding children. + --------------> Dir <-------------- + | | + child child + + child - child [hole] - child + + child - child - child [hole] - [hole] - child + + Case 1: Case 2: + Number of children = 6, Number of children = 3, + File size = 7 File size = 7 + +Default Block Allocation +------------------------ + +At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node +and Hot/Warm/Cold data. + +- Hot node contains direct node blocks of directories. +- Warm node contains direct node blocks except hot node blocks. +- Cold node contains indirect node blocks +- Hot data contains dentry blocks +- Warm data contains data blocks except hot and cold data blocks +- Cold data contains multimedia data or migrated data blocks + +LFS has two schemes for free space management: threaded log and copy-and-compac- +tion. The copy-and-compaction scheme which is known as cleaning, is well-suited +for devices showing very good sequential write performance, since free segments +are served all the time for writing new data. However, it suffers from cleaning +overhead under high utilization. Contrarily, the threaded log scheme suffers +from random writes, but no cleaning process is needed. F2FS adopts a hybrid +scheme where the copy-and-compaction scheme is adopted by default, but the +policy is dynamically changed to the threaded log scheme according to the file +system status. + +In order to align F2FS with underlying flash-based storage, F2FS allocates a +segment in a unit of section. F2FS expects that the section size would be the +same as the unit size of garbage collection in FTL. Furthermore, with respect +to the mapping granularity in FTL, F2FS allocates each section of the active +logs from different zones as much as possible, since FTL can write the data in +the active logs into one allocation unit according to its mapping granularity. + +Cleaning process +---------------- + +F2FS does cleaning both on demand and in the background. On-demand cleaning is +triggered when there are not enough free segments to serve VFS calls. Background +cleaner is operated by a kernel thread, and triggers the cleaning job when the +system is idle. + +F2FS supports two victim selection policies: greedy and cost-benefit algorithms. +In the greedy algorithm, F2FS selects a victim segment having the smallest number +of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment +according to the segment age and the number of valid blocks in order to address +log block thrashing problem in the greedy algorithm. F2FS adopts the greedy +algorithm for on-demand cleaner, while background cleaner adopts cost-benefit +algorithm. + +In order to identify whether the data in the victim segment are valid or not, +F2FS manages a bitmap. Each bit represents the validity of a block, and the +bitmap is composed of a bit stream covering whole blocks in main area. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ca5cdcd0f0e..35ff17979b3 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -22,6 +22,13 @@ ip_no_pmtu_disc - BOOLEAN min_pmtu - INTEGER default 562 - minimum discovered Path MTU +fwmark_reflect - BOOLEAN + Controls the fwmark of kernel-generated IPv4 reply packets that are not + associated with a socket for example, TCP RSTs or ICMP echo replies). + If unset, these packets have a fwmark of zero. If set, they have the + fwmark of the packet they are replying to. + Default: 0 + route/max_size - INTEGER Maximum number of routes allowed in the kernel. Increase this when using large numbers of interfaces and/or routes. @@ -1036,6 +1043,13 @@ conf/all/forwarding - BOOLEAN proxy_ndp - BOOLEAN Do proxy ndp. +fwmark_reflect - BOOLEAN + Controls the fwmark of kernel-generated IPv6 reply packets that are not + associated with a socket for example, TCP RSTs or ICMPv6 echo replies). + If unset, these packets have a fwmark of zero. If set, they have the + fwmark of the packet they are replying to. + Default: 0 + conf/interface/*: Change special settings per interface. diff --git a/Makefile b/Makefile index 64562abe90a..cd2931f6d03 100644 --- a/Makefile +++ b/Makefile @@ -158,6 +158,8 @@ VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD)) export srctree objtree VPATH +CCACHE := ccache + # SUBARCH tells the usermode build what the underlying arch is. That is set # first, and if a usermode build is happening, the "ARCH=um" on the command @@ -193,7 +195,7 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile export KBUILD_BUILDHOST := $(SUBARCH) ARCH ?= $(SUBARCH) -CROSS_COMPILE ?= $(CONFIG_CROSS_COMPILE:"%"=%) +CROSS_COMPILE ?= $(CCACHE) $(CONFIG_CROSS_COMPILE:"%"=%) # Architecture as present in compile.h UTS_MACHINE := $(ARCH) @@ -243,8 +245,8 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ else if [ -x /bin/bash ]; then echo /bin/bash; \ else echo sh; fi ; fi) -HOSTCC = gcc -HOSTCXX = g++ +HOSTCC = $(CCACHE) gcc +HOSTCXX = $(CCACHE) g++ HOSTCFLAGS = -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer HOSTCXXFLAGS = -O2 @@ -330,7 +332,7 @@ include $(srctree)/scripts/Kbuild.include AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld -CC = $(CROSS_COMPILE)gcc +CC = $(CCACHE) $(CROSS_COMPILE)gcc CPP = $(CC) -E AR = $(CROSS_COMPILE)ar NM = $(CROSS_COMPILE)nm @@ -347,11 +349,12 @@ CHECK = sparse CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void $(CF) -CFLAGS_MODULE = -AFLAGS_MODULE = -LDFLAGS_MODULE = -CFLAGS_KERNEL = -AFLAGS_KERNEL = +MODFLAGS = -DMODULE -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a9 -marm -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops +CFLAGS_MODULE = $(MODFLAGS) +AFLAGS_MODULE = $(MODFLAGS) +LDFLAGS_MODULE = -T $(srctree)/scripts/module-common.lds +CFLAGS_KERNEL = -O2 -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops +AFLAGS_KERNEL = CFLAGS_GCOV = -fprofile-arcs -ftest-coverage @@ -368,7 +371,12 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -fno-delete-null-pointer-checks + -fno-delete-null-pointer-checks \ + -mtune=cortex-a9 -march=armv7-a -mfpu=neon \ + -fpredictive-commoning -fgcse-after-reload -ftree-vectorize -mvectorize-with-neon-quad \ + -fipa-cp-clone -fsingle-precision-constant \ + -funswitch-loops + KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := KBUILD_AFLAGS := -D__ASSEMBLY__ @@ -579,37 +587,37 @@ endif # Use make W=1 to enable this warning (see scripts/Makefile.build) KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) -ifdef CONFIG_FRAME_POINTER -KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls -else +#ifdef CONFIG_FRAME_POINTER +#KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls +#else # Some targets (ARM with Thumb2, for example), can't be built with frame # pointers. For those, we don't have FUNCTION_TRACER automatically # select FRAME_POINTER. However, FUNCTION_TRACER adds -pg, and this is # incompatible with -fomit-frame-pointer with current GCC, so we don't use # -fomit-frame-pointer with FUNCTION_TRACER. -ifndef CONFIG_FUNCTION_TRACER +#ifndef CONFIG_FUNCTION_TRACER KBUILD_CFLAGS += -fomit-frame-pointer -endif -endif - -ifdef CONFIG_DEBUG_INFO -KBUILD_CFLAGS += -g -KBUILD_AFLAGS += -gdwarf-2 -endif - -ifdef CONFIG_DEBUG_INFO_REDUCED -KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) -endif - -ifdef CONFIG_FUNCTION_TRACER -KBUILD_CFLAGS += -pg -ifdef CONFIG_DYNAMIC_FTRACE - ifdef CONFIG_HAVE_C_RECORDMCOUNT - BUILD_C_RECORDMCOUNT := y - export BUILD_C_RECORDMCOUNT - endif -endif -endif +#endif +#endif + +#ifdef CONFIG_DEBUG_INFO +#KBUILD_CFLAGS += -g +#KBUILD_AFLAGS += -gdwarf-2 +#endif + +#ifdef CONFIG_DEBUG_INFO_REDUCED +#KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) +#endif + +#ifdef CONFIG_FUNCTION_TRACER +#KBUILD_CFLAGS += -pg +#ifdef CONFIG_DYNAMIC_FTRACE +# ifdef CONFIG_HAVE_C_RECORDMCOUNT +# BUILD_C_RECORDMCOUNT := y +# export BUILD_C_RECORDMCOUNT +# endif +#endif +#endif # We trigger additional mismatches with less inlining ifdef CONFIG_DEBUG_SECTION_MISMATCH diff --git a/android/configs/README b/android/configs/README new file mode 100644 index 00000000000..8798731f890 --- /dev/null +++ b/android/configs/README @@ -0,0 +1,15 @@ +The files in this directory are meant to be used as a base for an Android +kernel config. All devices should have the options in android-base.cfg enabled. +While not mandatory, the options in android-recommended.cfg enable advanced +Android features. + +Assuming you already have a minimalist defconfig for your device, a possible +way to enable these options would be: + + ARCH= scripts/kconfig/merge_config.sh /_defconfig android/configs/android-base.cfg android/configs/android-recommended.cfg + +This will generate a .config that can then be used to save a new defconfig or +compile a new kernel with Android features enabled. + +Because there is no tool to consistently generate these config fragments, +lets keep them alphabetically sorted instead of random. diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg new file mode 100644 index 00000000000..225f3e28590 --- /dev/null +++ b/android/configs/android-base.cfg @@ -0,0 +1,141 @@ +# KEEP ALPHABETICALLY SORTED +# CONFIG_INET_LRO is not set +# CONFIG_MODULES is not set +# CONFIG_OABI_COMPAT is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_INTF_ALARM_DEV=y +CONFIG_ANDROID_LOGGER=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_ASHMEM=y +CONFIG_BLK_DEV_DM=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_SCHED=y +CONFIG_DM_CRYPT=y +CONFIG_DM_VERITY=y +CONFIG_EMBEDDED=y +CONFIG_EXPERIMENTAL=y +CONFIG_FB=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_INET=y +CONFIG_INET_ESP=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_TARGET_REJECT_SKERR=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_REJECT_SKERR=y +CONFIG_NET=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_TPROXY=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_KEY=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_NAT=y +CONFIG_NO_HZ=y +CONFIG_PACKET=y +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PPP=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PREEMPT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_RTC_CLASS=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_STAGING=y +CONFIG_SWITCH=y +CONFIG_SYNC=y +CONFIG_SYSVIPC=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_USB_GADGET=y +CONFIG_USB_G_ANDROID=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_XFRM_USER=y diff --git a/android/configs/android-recommended.cfg b/android/configs/android-recommended.cfg new file mode 100644 index 00000000000..9caa089c5d1 --- /dev/null +++ b/android/configs/android-recommended.cfg @@ -0,0 +1,118 @@ +# KEEP ALPHABETICALLY SORTED +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_NF_CONNTRACK_SIP is not set +# CONFIG_PM_WAKELOCKS_GC is not set +# CONFIG_VT is not set +CONFIG_ANDROID_RAM_CONSOLE=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_COMPACTION=y +CONFIG_DM_UEVENT=y +CONFIG_DRAGONRISE_FF=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FUSE_FS=y +CONFIG_GREENASIA_FF=y +CONFIG_HIDRAW=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GREENASIA=y +CONFIG_HID_GYRATION=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WACOM=y +CONFIG_HID_WALTOP=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_GPIO=y +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_KEYRESET=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_UINPUT=y +CONFIG_ION=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KSM=y +CONFIG_LOGIG940_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGITECH_FF=y +CONFIG_MD=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_MSDOS_FS=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_PANTHERLORD_FF=y +CONFIG_PERF_EVENTS=y +CONFIG_PM_DEBUG=y +CONFIG_PM_RUNTIME=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +CONFIG_POWER_SUPPLY=y +CONFIG_SCHEDSTATS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_SND=y +CONFIG_SOUND=y +CONFIG_SUSPEND_TIME=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_TABLET_USB_WACOM=y +CONFIG_TIMER_STATS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_UHID=y +CONFIG_UID_STAT=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_USBNET=y +CONFIG_VFAT_FS=y diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3c3b868948a..453978d3bbe 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -19,6 +19,7 @@ config ARM select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO + select HAVE_KERNEL_LZ4 select HAVE_KERNEL_LZMA select HAVE_IRQ_WORK select HAVE_PERF_EVENTS @@ -139,10 +140,9 @@ config GENERIC_LOCKBREAK config RWSEM_GENERIC_SPINLOCK bool - default y config RWSEM_XCHGADD_ALGORITHM - bool + def_bool y config ARCH_HAS_ILOG2_U32 bool @@ -1905,6 +1905,26 @@ config ZBOOT_ROM_SH_MOBILE_SDHI endchoice +config ARM_APPENDED_DTB + bool "Use appended device tree blob to zImage (EXPERIMENTAL)" + depends on OF && !ZBOOT_ROM && EXPERIMENTAL + help + With this option, the boot code will look for a device tree binary + (DTB) appended to zImage + (e.g. cat zImage .dtb > zImage_w_dtb). + + This is meant as a backward compatibility convenience for those + systems with a bootloader that can't be upgraded to accommodate + the documented boot protocol using a device tree. + + Beware that there is very little in terms of protection against + this option being confused by leftover garbage in memory that might + look like a DTB header after a reboot if no actual DTB is appended + to zImage. Do not leave this option active in a production kernel + if you don't intend to always append a DTB. Proper passing of the + location into r2 of a bootloader provided DTB is always preferable + to this option. + config CMDLINE string "Default kernel command string" default "" @@ -1993,6 +2013,32 @@ config ATAGS_PROC Should the atags used to boot the kernel be exported in an "atags" file in procfs. Useful with kexec. +config KEXEC_HARDBOOT + bool "Support hard booting to a kexec kernel" + depends on KEXEC + help + Allows hard booting (i.e., with a full hardware reboot) to a kernel + previously loaded in memory by kexec. This works around the problem of + soft-booted kernel hangs due to improper device shutdown and/or + reinitialization. Support is comprised of two components: + + First, a "hardboot" flag is added to the kexec syscall to force a hard + reboot in relocate_new_kernel() (which requires machine-specific assembly + code). This also requires the kexec userspace tool to load the kexec'd + kernel in memory region left untouched by the bootloader (i.e., not + explicitly cleared and not overwritten by the boot kernel). Just prior + to reboot, the kexec kernel arguments are stashed in a machine-specific + memory page that must also be preserved. Note that this hardboot page + need not be reserved during regular kernel execution. + + Second, the zImage decompresor of the boot (bootloader-loaded) kernel is + modified to check the hardboot page for fresh kexec arguments, and if + present, attempts to jump to the kexec'd kernel preserved in memory. + + Note that hardboot support is only required in the boot kernel and any + kernel capable of performing a hardboot kexec. It is _not_ required by a + kexec'd kernel. + config CRASH_DUMP bool "Build kdump crash kernel (EXPERIMENTAL)" depends on EXPERIMENTAL @@ -2103,6 +2149,8 @@ endif source "drivers/cpuidle/Kconfig" +source "drivers/cpuquiet/Kconfig" + endmenu menu "Floating point emulation" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 70c424eaf7b..5507cdc2c17 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -17,7 +17,7 @@ endif OBJCOPYFLAGS :=-O binary -R .comment -S GZFLAGS :=-9 -#KBUILD_CFLAGS +=-pipe +KBUILD_CFLAGS +=-pipe # Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb: KBUILD_CFLAGS +=$(call cc-option,-marm,) @@ -33,13 +33,13 @@ ifeq ($(CONFIG_MMU),) MMUEXT := -nommu endif -ifeq ($(CONFIG_FRAME_POINTER),y) -KBUILD_CFLAGS +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog -endif +#ifeq ($(CONFIG_FRAME_POINTER),y) +#KBUILD_CFLAGS +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog +#endif -ifeq ($(CONFIG_CC_STACKPROTECTOR),y) -KBUILD_CFLAGS +=-fstack-protector -endif +#ifeq ($(CONFIG_CC_STACKPROTECTOR),y) +#KBUILD_CFLAGS +=-fstack-protector +#endif ifeq ($(CONFIG_CPU_BIG_ENDIAN),y) KBUILD_CPPFLAGS += -mbig-endian @@ -57,7 +57,7 @@ comma = , # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a) +arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv5te -Wa$(comma)-march=armv7-a) arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 @@ -97,9 +97,9 @@ else CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,) endif -ifeq ($(CONFIG_ARM_UNWIND),y) -CFLAGS_ABI +=-funwind-tables -endif +#ifeq ($(CONFIG_ARM_UNWIND),y) +#CFLAGS_ABI +=-funwind-tables +#endif ifeq ($(CONFIG_THUMB2_KERNEL),y) AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it) @@ -113,8 +113,8 @@ endif endif # Need -Uarm for gcc < 3.x -KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_THUMB2) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm -KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_THUMB2) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float +KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_THUMB2) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -mfpu=neon -Uarm +KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_THUMB2) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float -mfpu=neon CHECKFLAGS += -D__arm__ @@ -253,6 +253,7 @@ core-$(CONFIG_VFP) += arch/arm/vfp/ # If we have a machine-specific directory, then include it in the build. core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/ +core-y += arch/arm/crypto/ core-y += $(machdirs) $(platdirs) drivers-$(CONFIG_OPROFILE) += arch/arm/oprofile/ diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 0c74a6fab95..a422bbc5c58 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -5,6 +5,7 @@ # OBJS = +plus_sec := $(call as-instr,.arch_extension sec,+sec) # Ensure that MMCIF loader code appears early in the image # to minimise that number of bocks that have to be read in @@ -21,9 +22,20 @@ OBJS += sdhi-shmobile.o OBJS += sdhi-sh7372.o endif -AFLAGS_head.o += -DTEXT_OFFSET=$(TEXT_OFFSET) -HEAD = head.o -OBJS += misc.o decompress.o +AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) +AFLAGS_head.o += -Wa,-march=armv7-a$(plus_sec) +HEAD = head.o + +AFLAGS_misc.o +=-Wa,-march=armv7-a$(plus_sec) +MISC = misc.o + +AFLAGS_decompress.o += -Wa,-march=armv7-a$(plus_sec) +DECOMPRESS = decompress.o + +ifeq ($(CONFIG_KERNEL_LZ4),y) +CFLAGS_decompress.o := -Os +endif + FONTC = $(srctree)/drivers/video/console/font_acorn_8x8.c # @@ -88,13 +100,14 @@ SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/ suffix_$(CONFIG_KERNEL_GZIP) = gzip suffix_$(CONFIG_KERNEL_LZO) = lzo suffix_$(CONFIG_KERNEL_LZMA) = lzma +suffix_$(CONFIG_KERNEL_LZ4) = lz4 -targets := vmlinux vmlinux.lds \ +targets := vmlinux vmlinux.lds \ piggy.$(suffix_y) piggy.$(suffix_y).o \ - font.o font.c head.o misc.o $(OBJS) + font.o font.c head.o misc.o decompress.o $(OBJS) # Make sure files are removed during clean -extra-y += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S +extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.lz4 lib1funcs.S ifeq ($(CONFIG_FUNCTION_TRACER),y) ORIG_CFLAGS := $(KBUILD_CFLAGS) @@ -111,6 +124,9 @@ endif ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 endif +ifneq ($(PARAMS_PHYS),) +LDFLAGS_vmlinux += --defsym params_phys=$(PARAMS_PHYS) +endif # ? LDFLAGS_vmlinux += -p # Report unresolved symbol references @@ -121,6 +137,7 @@ LDFLAGS_vmlinux += -X LDFLAGS_vmlinux += -T # For __aeabi_uidivmod +AFLAGS_lib1funcs.o +=-Wa,-march=armv7-a$(plus_sec) lib1funcs = $(obj)/lib1funcs.o $(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S FORCE @@ -139,7 +156,7 @@ bad_syms=$$($(CROSS_COMPILE)nm $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \ ( echo "following symbols must have non local/private scope:" >&2; \ echo "$$bad_syms" >&2; rm -f $@; false ) -$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \ +$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/$(MISC) $(obj)/$(DECOMPRESS) $(obj)/piggy.$(suffix_y).o \ $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) FORCE $(call if_changed,ld) @$(check_for_bad_syms) @@ -147,6 +164,7 @@ $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \ $(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE $(call if_changed,$(suffix_y)) +AFLAGS_piggy.$(suffix_y).o += -Wa,-march=armv7-a$(plus_sec) $(obj)/piggy.$(suffix_y).o: $(obj)/piggy.$(suffix_y) FORCE CFLAGS_font.o := -Dstatic= diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index 07be5a2f830..dfeaff40df1 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -44,6 +44,10 @@ extern void error(char *); #include "../../../../lib/decompress_unlzma.c" #endif +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) { return decompress(input, len, NULL, NULL, output, NULL, error); diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 24701d6f72b..1e2a694ad69 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -9,6 +9,11 @@ * published by the Free Software Foundation. */ #include +#include + +#ifdef CONFIG_KEXEC_HARDBOOT + #include +#endif /* * Debugging stuff @@ -133,7 +138,31 @@ start: .word _edata @ zImage end address THUMB( .thumb ) 1: mov r7, r1 @ save architecture ID - mov r8, r2 @ save atags pointer + teq r0, #0 @ Check for kexec_boot_atags. + movne r8, r0 @ Save kexec_boot_tags. + moveq r8, r2 @ save atags pointer + +#ifdef CONFIG_KEXEC_HARDBOOT + /* Check hardboot page for a kexec kernel. */ + ldr r3, =KEXEC_HB_PAGE_ADDR + ldr r0, [r3] + ldr r1, =KEXEC_HB_PAGE_MAGIC + teq r0, r1 + bne not_booting_other + + /* Clear hardboot page magic to avoid boot loop. */ + mov r0, #0 + str r0, [r3] + + /* Load boot arguments and jump to kexec kernel. */ + ldr r0, [r3, #12] @ kexec_boot_atags (r2: boot_atags) + ldr r1, [r3, #8] @ kexec_mach_type + ldr pc, [r3, #4] @ kexec_start_address + + .ltorg + +not_booting_other: +#endif #ifndef __ARM_ARCH_2__ /* @@ -216,6 +245,59 @@ restart: adr r0, LC0 mov r10, r6 #endif + mov r5, #0 @ init dtb size to 0 +#ifdef CONFIG_ARM_APPENDED_DTB +/* + * r0 = delta + * r2 = BSS start + * r3 = BSS end + * r4 = final kernel address + * r5 = appended dtb size (still unknown) + * r6 = _edata + * r7 = architecture ID + * r8 = atags/device tree pointer + * r9 = size of decompressed image + * r10 = end of this image, including bss/stack/malloc space if non XIP + * r11 = GOT start + * r12 = GOT end + * sp = stack pointer + * + * if there are device trees (dtb) appended to zImage, advance r10 so that the + * dtb data will get relocated along with the kernel if necessary. + */ + + ldr lr, [r6, #0] +#ifndef __ARMEB__ + ldr r1, =0xedfe0dd0 @ sig is 0xd00dfeed big endian +#else + ldr r1, =0xd00dfeed +#endif + cmp lr, r1 + bne dtb_check_done @ not found + + mov r8, r6 @ use the appended device tree + + /* Get the dtb's size */ + ldr r5, [r6, #4] +#ifndef __ARMEB__ + /* convert r5 (dtb size) to little endian */ + eor r1, r5, r5, ror #16 + bic r1, r1, #0x00ff0000 + mov r5, r5, ror #8 + eor r5, r5, r1, lsr #8 +#endif + + /* preserve 64-bit alignment */ + add r5, r5, #7 + bic r5, r5, #7 + + /* relocate some pointers past the appended dtb */ + add r6, r6, r5 + add r10, r10, r5 + add sp, sp, r5 +dtb_check_done: +#endif + /* * Check to see if we will overwrite ourselves. * r4 = final kernel address @@ -285,14 +367,16 @@ wont_overwrite: * r2 = BSS start * r3 = BSS end * r4 = kernel execution address + * r5 = appended dtb size (0 if not present) * r7 = architecture ID * r8 = atags pointer * r11 = GOT start * r12 = GOT end * sp = stack pointer */ - teq r0, #0 + orrs r1, r0, r5 beq not_relocated + add r11, r11, r0 add r12, r12, r0 @@ -307,12 +391,21 @@ wont_overwrite: /* * Relocate all entries in the GOT table. + * Bump bss entries to _edata + dtb size */ 1: ldr r1, [r11, #0] @ relocate entries in the GOT - add r1, r1, r0 @ table. This fixes up the - str r1, [r11], #4 @ C references. + add r1, r1, r0 @ This fixes up C references + cmp r1, r2 @ if entry >= bss_start && + cmphs r3, r1 @ bss_end > entry + addhi r1, r1, r5 @ entry += dtb size + str r1, [r11], #4 @ next entry cmp r11, r12 blo 1b + + /* bump our bss pointers too */ + add r2, r2, r5 + add r3, r3, r5 + #else /* @@ -348,6 +441,44 @@ not_relocated: mov r0, #0 add r2, sp, #0x10000 @ 64k max mov r3, r7 bl decompress_kernel + +/* Copy the kernel tagged list (atags): + * + * The kernel requires atags to be located in a direct-mapped region, + * usually below the kernel in the first 16 kB of RAM. If they're above + * (the start of) the kernel, they need to be copied to a suitable + * location, e.g., the machine-defined params_phys. + * + * The assumption is that the tags will only be "out of place" if the + * decompressor code is also, so copying is implemented only in the "won't + * overwrite" case (which should be fixed). Still need to make sure that + * the copied tags don't overwrite either the kernel or decompressor code + * (or rather, the remainder of it since everything up to here has already + * been executed). + * + * r4: zreladdr (kernel start) + * r8: atags */ + + /* Don't need to copy atags if they're already below the kernel. */ + cmp r8, r4 + blo call_kernel + + /* r1: min(zreladdr, pc) */ + mov r1, pc + cmp r4, r1 + movlo r1, r4 + + /* Compute max space for atags, if max <= 0 don't copy. */ + ldr r0, =params_phys @ dest + subs r2, r1, r0 @ max = min(zreladdr, pc) - dest + bls call_kernel + + /* Copy atags to params_phys. */ + mov r1, r8 @ src + bl copy_atags + mov r8, r0 + +call_kernel: bl cache_clean_flush bl cache_off mov r0, #0 @ must be zero @@ -356,6 +487,8 @@ not_relocated: mov r0, #0 ARM( mov pc, r4 ) @ call kernel THUMB( bx r4 ) @ entry point is always ARM + .ltorg + .align 2 .type LC0, #object LC0: .word LC0 @ r1 @@ -467,9 +600,14 @@ __setup_mmu: sub r3, r4, #16384 @ Page directory size * bits for the RAM area only. */ mov r0, r3 +#if defined(PLAT_PHYS_OFFSET) && defined(END_MEM) + mov r9, #PLAT_PHYS_OFFSET @ start of RAM + ldr r10, =END_MEM @ end of RAM +#else mov r9, r0, lsr #18 mov r9, r9, lsl #18 @ start of RAM add r10, r9, #0x10000000 @ a reasonable RAM size +#endif mov r1, #0x12 orr r1, r1, #3 << 10 add r2, r3, #16384 diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c index 832d37236c5..f1ce0efa505 100644 --- a/arch/arm/boot/compressed/misc.c +++ b/arch/arm/boot/compressed/misc.c @@ -25,6 +25,7 @@ unsigned int __machine_arch_type; #include /* for NULL */ #include #include +#include static void putstr(const char *ptr); @@ -192,3 +193,25 @@ decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p, else putstr(" done, booting the kernel.\n"); } + +const struct tag *copy_atags(struct tag *dest, const struct tag *src, + size_t max) +{ + struct tag *tag; + size_t size; + + /* Find the last tag (ATAG_NONE). */ + for_each_tag(tag, (struct tag *)src) + continue; + + /* Include the last tag in copy. */ + size = (char *)tag - (char *)src + sizeof(struct tag_header); + + /* If there's not enough room, just use original and hope it works. */ + if (size > max) + return src; + + memcpy(dest, src, size); + + return dest; +} diff --git a/arch/arm/boot/compressed/piggy.lz4.S b/arch/arm/boot/compressed/piggy.lz4.S new file mode 100644 index 00000000000..3d9a575618a --- /dev/null +++ b/arch/arm/boot/compressed/piggy.lz4.S @@ -0,0 +1,6 @@ + .section .piggydata,#alloc + .globl input_data +input_data: + .incbin "arch/arm/boot/compressed/piggy.lz4" + .globl input_data_end +input_data_end: diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in index 4e728834a1b..4919f2ac8b8 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.in +++ b/arch/arm/boot/compressed/vmlinux.lds.in @@ -51,6 +51,10 @@ SECTIONS _got_start = .; .got : { *(.got) } _got_end = .; + + /* ensure the zImage file size is always a multiple of 64 bits */ + /* (without a dummy byte, ld just ignores the empty section) */ + .pad : { BYTE(0); . = ALIGN(8); } _edata = .; . = BSS_START; diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 5727595cde6..e37c0ba662d 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -135,5 +135,10 @@ reg = <0xc8000600 0x200>; interrupts = < 63 >; }; + + ahb: ahb at 6000c004 { + compatible = "nvidia,tegra20-ahb"; + reg = <0x6000c004 0x10c>; /* AHB Arbitration + Gizmo Controller */ + }; }; diff --git a/arch/arm/boot/dts/tegra30-grouper.dts b/arch/arm/boot/dts/tegra30-grouper.dts new file mode 100644 index 00000000000..e2bf28458d3 --- /dev/null +++ b/arch/arm/boot/dts/tegra30-grouper.dts @@ -0,0 +1,8 @@ +/dts-v1/; + +/include/ "tegra30.dtsi" + +/ { + model = "NVIDIA Tegra30 Grouper"; + compatible = "nvidia,grouper", "nvidia,tegra30"; +}; diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi new file mode 100644 index 00000000000..dbf46c27256 --- /dev/null +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -0,0 +1,575 @@ +/include/ "skeleton.dtsi" + +/ { + compatible = "nvidia,tegra30"; + interrupt-parent = <&intc>; + + aliases { + serial0 = &uarta; + serial1 = &uartb; + serial2 = &uartc; + serial3 = &uartd; + serial4 = &uarte; + }; + + host1x { + compatible = "nvidia,tegra30-host1x", "simple-bus"; + reg = <0x50000000 0x00024000>; + interrupts = <0 65 0x04 /* mpcore syncpt */ + 0 67 0x04>; /* mpcore general */ + clocks = <&tegra_car 28>; + + #address-cells = <1>; + #size-cells = <1>; + + ranges = <0x54000000 0x54000000 0x04000000>; + + mpe { + compatible = "nvidia,tegra30-mpe"; + reg = <0x54040000 0x00040000>; + interrupts = <0 68 0x04>; + clocks = <&tegra_car 60>; + }; + + vi { + compatible = "nvidia,tegra30-vi"; + reg = <0x54080000 0x00040000>; + interrupts = <0 69 0x04>; + clocks = <&tegra_car 164>; + }; + + epp { + compatible = "nvidia,tegra30-epp"; + reg = <0x540c0000 0x00040000>; + interrupts = <0 70 0x04>; + clocks = <&tegra_car 19>; + }; + + isp { + compatible = "nvidia,tegra30-isp"; + reg = <0x54100000 0x00040000>; + interrupts = <0 71 0x04>; + clocks = <&tegra_car 23>; + }; + + gr2d { + compatible = "nvidia,tegra30-gr2d"; + reg = <0x54140000 0x00040000>; + interrupts = <0 72 0x04>; + clocks = <&tegra_car 21>; + }; + + gr3d { + compatible = "nvidia,tegra30-gr3d"; + reg = <0x54180000 0x00040000>; + clocks = <&tegra_car 24 &tegra_car 98>; + clock-names = "3d", "3d2"; + }; + + dc@54200000 { + compatible = "nvidia,tegra30-dc"; + reg = <0x54200000 0x00040000>; + interrupts = <0 73 0x04>; + clocks = <&tegra_car 27>, <&tegra_car 179>; + clock-names = "disp1", "parent"; + + rgb { + status = "disabled"; + }; + }; + + dc@54240000 { + compatible = "nvidia,tegra30-dc"; + reg = <0x54240000 0x00040000>; + interrupts = <0 74 0x04>; + clocks = <&tegra_car 26>, <&tegra_car 179>; + clock-names = "disp2", "parent"; + + rgb { + status = "disabled"; + }; + }; + + hdmi { + compatible = "nvidia,tegra30-hdmi"; + reg = <0x54280000 0x00040000>; + interrupts = <0 75 0x04>; + clocks = <&tegra_car 51>, <&tegra_car 189>; + clock-names = "hdmi", "parent"; + status = "disabled"; + }; + + tvo { + compatible = "nvidia,tegra30-tvo"; + reg = <0x542c0000 0x00040000>; + interrupts = <0 76 0x04>; + clocks = <&tegra_car 169>; + status = "disabled"; + }; + + dsi { + compatible = "nvidia,tegra30-dsi"; + reg = <0x54300000 0x00040000>; + clocks = <&tegra_car 48>; + status = "disabled"; + }; + }; + + timer@50004600 { + compatible = "arm,cortex-a9-twd-timer"; + reg = <0x50040600 0x20>; + interrupts = <1 13 0xf04>; + clocks = <&tegra_car 214>; + }; + + intc: interrupt-controller { + compatible = "arm,cortex-a9-gic"; + reg = <0x50041000 0x1000 + 0x50040100 0x0100>; + interrupt-controller; + #interrupt-cells = <3>; + }; + + cache-controller { + compatible = "arm,pl310-cache"; + reg = <0x50043000 0x1000>; + arm,data-latency = <6 6 2>; + arm,tag-latency = <5 5 2>; + cache-unified; + cache-level = <2>; + }; + + timer@60005000 { + compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer"; + reg = <0x60005000 0x400>; + interrupts = <0 0 0x04 + 0 1 0x04 + 0 41 0x04 + 0 42 0x04 + 0 121 0x04 + 0 122 0x04>; + }; + + tegra_car: clock { + compatible = "nvidia,tegra30-car"; + reg = <0x60006000 0x1000>; + #clock-cells = <1>; + }; + + apbdma: dma { + compatible = "nvidia,tegra30-apbdma", "nvidia,tegra20-apbdma"; + reg = <0x6000a000 0x1400>; + interrupts = <0 104 0x04 + 0 105 0x04 + 0 106 0x04 + 0 107 0x04 + 0 108 0x04 + 0 109 0x04 + 0 110 0x04 + 0 111 0x04 + 0 112 0x04 + 0 113 0x04 + 0 114 0x04 + 0 115 0x04 + 0 116 0x04 + 0 117 0x04 + 0 118 0x04 + 0 119 0x04 + 0 128 0x04 + 0 129 0x04 + 0 130 0x04 + 0 131 0x04 + 0 132 0x04 + 0 133 0x04 + 0 134 0x04 + 0 135 0x04 + 0 136 0x04 + 0 137 0x04 + 0 138 0x04 + 0 139 0x04 + 0 140 0x04 + 0 141 0x04 + 0 142 0x04 + 0 143 0x04>; + clocks = <&tegra_car 34>; + }; + + ahb: ahb { + compatible = "nvidia,tegra30-ahb"; + reg = <0x6000c004 0x14c>; /* AHB Arbitration + Gizmo Controller */ + }; + + gpio: gpio { + compatible = "nvidia,tegra30-gpio"; + reg = <0x6000d000 0x1000>; + interrupts = <0 32 0x04 + 0 33 0x04 + 0 34 0x04 + 0 35 0x04 + 0 55 0x04 + 0 87 0x04 + 0 89 0x04 + 0 125 0x04>; + #gpio-cells = <2>; + gpio-controller; + #interrupt-cells = <2>; + interrupt-controller; + }; + + pinmux: pinmux { + compatible = "nvidia,tegra30-pinmux"; + reg = <0x70000868 0xd4 /* Pad control registers */ + 0x70003000 0x3e4>; /* Mux registers */ + }; + + /* + * There are two serial driver i.e. 8250 based simple serial + * driver and APB DMA based serial driver for higher baudrate + * and performace. To enable the 8250 based driver, the compatible + * is "nvidia,tegra30-uart", "nvidia,tegra20-uart" and to enable + * the APB DMA based serial driver, the comptible is + * "nvidia,tegra30-hsuart", "nvidia,tegra20-hsuart". + */ + uarta: serial@70006000 { + compatible = "nvidia,tegra30-uart", "nvidia,tegra20-uart"; + reg = <0x70006000 0x40>; + reg-shift = <2>; + interrupts = <0 36 0x04>; + nvidia,dma-request-selector = <&apbdma 8>; + clocks = <&tegra_car 6>; + status = "disabled"; + }; + + uartb: serial@70006040 { + compatible = "nvidia,tegra30-uart", "nvidia,tegra20-uart"; + reg = <0x70006040 0x40>; + reg-shift = <2>; + interrupts = <0 37 0x04>; + nvidia,dma-request-selector = <&apbdma 9>; + clocks = <&tegra_car 160>; + status = "disabled"; + }; + + uartc: serial@70006200 { + compatible = "nvidia,tegra30-uart", "nvidia,tegra20-uart"; + reg = <0x70006200 0x100>; + reg-shift = <2>; + interrupts = <0 46 0x04>; + nvidia,dma-request-selector = <&apbdma 10>; + clocks = <&tegra_car 55>; + status = "disabled"; + }; + + uartd: serial@70006300 { + compatible = "nvidia,tegra30-uart", "nvidia,tegra20-uart"; + reg = <0x70006300 0x100>; + reg-shift = <2>; + interrupts = <0 90 0x04>; + nvidia,dma-request-selector = <&apbdma 19>; + clocks = <&tegra_car 65>; + status = "disabled"; + }; + + uarte: serial@70006400 { + compatible = "nvidia,tegra30-uart", "nvidia,tegra20-uart"; + reg = <0x70006400 0x100>; + reg-shift = <2>; + interrupts = <0 91 0x04>; + nvidia,dma-request-selector = <&apbdma 20>; + clocks = <&tegra_car 66>; + status = "disabled"; + }; + + pwm: pwm { + compatible = "nvidia,tegra30-pwm", "nvidia,tegra20-pwm"; + reg = <0x7000a000 0x100>; + #pwm-cells = <2>; + clocks = <&tegra_car 17>; + }; + + rtc { + compatible = "nvidia,tegra30-rtc", "nvidia,tegra20-rtc"; + reg = <0x7000e000 0x100>; + interrupts = <0 2 0x04>; + }; + + i2c@7000c000 { + compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c"; + reg = <0x7000c000 0x100>; + interrupts = <0 38 0x04>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 12>, <&tegra_car 182>; + clock-names = "div-clk", "fast-clk"; + status = "disabled"; + }; + + i2c@7000c400 { + compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c"; + reg = <0x7000c400 0x100>; + interrupts = <0 84 0x04>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 54>, <&tegra_car 182>; + clock-names = "div-clk", "fast-clk"; + status = "disabled"; + }; + + i2c@7000c500 { + compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c"; + reg = <0x7000c500 0x100>; + interrupts = <0 92 0x04>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 67>, <&tegra_car 182>; + clock-names = "div-clk", "fast-clk"; + status = "disabled"; + }; + + i2c@7000c700 { + compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c"; + reg = <0x7000c700 0x100>; + interrupts = <0 120 0x04>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 103>, <&tegra_car 182>; + clock-names = "div-clk", "fast-clk"; + status = "disabled"; + }; + + i2c@7000d000 { + compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c"; + reg = <0x7000d000 0x100>; + interrupts = <0 53 0x04>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 47>, <&tegra_car 182>; + clock-names = "div-clk", "fast-clk"; + status = "disabled"; + }; + + spi@7000d400 { + compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; + reg = <0x7000d400 0x200>; + interrupts = <0 59 0x04>; + nvidia,dma-request-selector = <&apbdma 15>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 41>; + status = "disabled"; + }; + + spi@7000d600 { + compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; + reg = <0x7000d600 0x200>; + interrupts = <0 82 0x04>; + nvidia,dma-request-selector = <&apbdma 16>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 44>; + status = "disabled"; + }; + + spi@7000d800 { + compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; + reg = <0x7000d800 0x200>; + interrupts = <0 83 0x04>; + nvidia,dma-request-selector = <&apbdma 17>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 46>; + status = "disabled"; + }; + + spi@7000da00 { + compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; + reg = <0x7000da00 0x200>; + interrupts = <0 93 0x04>; + nvidia,dma-request-selector = <&apbdma 18>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 68>; + status = "disabled"; + }; + + spi@7000dc00 { + compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; + reg = <0x7000dc00 0x200>; + interrupts = <0 94 0x04>; + nvidia,dma-request-selector = <&apbdma 27>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 104>; + status = "disabled"; + }; + + spi@7000de00 { + compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; + reg = <0x7000de00 0x200>; + interrupts = <0 79 0x04>; + nvidia,dma-request-selector = <&apbdma 28>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 105>; + status = "disabled"; + }; + + kbc { + compatible = "nvidia,tegra30-kbc", "nvidia,tegra20-kbc"; + reg = <0x7000e200 0x100>; + interrupts = <0 85 0x04>; + clocks = <&tegra_car 36>; + status = "disabled"; + }; + + pmc { + compatible = "nvidia,tegra20-pmc", "nvidia,tegra30-pmc"; + reg = <0x7000e400 0x400>; + }; + + memory-controller { + compatible = "nvidia,tegra30-mc"; + reg = <0x7000f000 0x010 + 0x7000f03c 0x1b4 + 0x7000f200 0x028 + 0x7000f284 0x17c>; + interrupts = <0 77 0x04>; + }; + + iommu { + compatible = "nvidia,tegra30-smmu"; + reg = <0x7000f010 0x02c + 0x7000f1f0 0x010 + 0x7000f228 0x05c>; + nvidia,#asids = <4>; /* # of ASIDs */ + dma-window = <0 0x40000000>; /* IOVA start & length */ + nvidia,ahb = <&ahb>; + }; + + ahub { + compatible = "nvidia,tegra30-ahub"; + reg = <0x70080000 0x200 + 0x70080200 0x100>; + interrupts = <0 103 0x04>; + nvidia,dma-request-selector = <&apbdma 1>; + clocks = <&tegra_car 106>, <&tegra_car 107>, <&tegra_car 30>, + <&tegra_car 11>, <&tegra_car 18>, <&tegra_car 101>, + <&tegra_car 102>, <&tegra_car 108>, <&tegra_car 109>, + <&tegra_car 110>, <&tegra_car 162>; + clock-names = "d_audio", "apbif", "i2s0", "i2s1", "i2s2", + "i2s3", "i2s4", "dam0", "dam1", "dam2", + "spdif_in"; + ranges; + #address-cells = <1>; + #size-cells = <1>; + + tegra_i2s0: i2s@70080300 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080300 0x100>; + nvidia,ahub-cif-ids = <4 4>; + clocks = <&tegra_car 30>; + status = "disabled"; + }; + + tegra_i2s1: i2s@70080400 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080400 0x100>; + nvidia,ahub-cif-ids = <5 5>; + clocks = <&tegra_car 11>; + status = "disabled"; + }; + + tegra_i2s2: i2s@70080500 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080500 0x100>; + nvidia,ahub-cif-ids = <6 6>; + clocks = <&tegra_car 18>; + status = "disabled"; + }; + + tegra_i2s3: i2s@70080600 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080600 0x100>; + nvidia,ahub-cif-ids = <7 7>; + clocks = <&tegra_car 101>; + status = "disabled"; + }; + + tegra_i2s4: i2s@70080700 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080700 0x100>; + nvidia,ahub-cif-ids = <8 8>; + clocks = <&tegra_car 102>; + status = "disabled"; + }; + }; + + sdhci@78000000 { + compatible = "nvidia,tegra30-sdhci", "nvidia,tegra20-sdhci"; + reg = <0x78000000 0x200>; + interrupts = <0 14 0x04>; + clocks = <&tegra_car 14>; + status = "disabled"; + }; + + sdhci@78000200 { + compatible = "nvidia,tegra30-sdhci", "nvidia,tegra20-sdhci"; + reg = <0x78000200 0x200>; + interrupts = <0 15 0x04>; + clocks = <&tegra_car 9>; + status = "disabled"; + }; + + sdhci@78000400 { + compatible = "nvidia,tegra30-sdhci", "nvidia,tegra20-sdhci"; + reg = <0x78000400 0x200>; + interrupts = <0 19 0x04>; + clocks = <&tegra_car 69>; + status = "disabled"; + }; + + sdhci@78000600 { + compatible = "nvidia,tegra30-sdhci", "nvidia,tegra20-sdhci"; + reg = <0x78000600 0x200>; + interrupts = <0 31 0x04>; + clocks = <&tegra_car 15>; + status = "disabled"; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <1>; + }; + + cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <2>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <3>; + }; + }; + + pmu { + compatible = "arm,cortex-a9-pmu"; + interrupts = <0 144 0x04 + 0 145 0x04 + 0 146 0x04 + 0 147 0x04>; + }; +}; diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig new file mode 100644 index 00000000000..11b7b7d4587 --- /dev/null +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -0,0 +1,3423 @@ +# +# Automatically generated file; DO NOT EDIT. +# Linux/arm 3.1.10 Kernel Configuration +# +CONFIG_ARM=y +CONFIG_HAVE_PWM=y +CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_HAVE_SCHED_CLOCK=y +CONFIG_GENERIC_GPIO=y +# CONFIG_ARCH_USES_GETTIMEOFFSET is not set +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +CONFIG_KTIME_SCALAR=y +CONFIG_HAVE_PROC_CPU=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_LOCKBREAK=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_ARCH_HAS_CPUFREQ=y +CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_NEED_DMA_MAP_STATE=y +CONFIG_FIQ=y +CONFIG_ARCH_PROVIDES_UDELAY=y +CONFIG_VECTORS_BASE=0xffff0000 +# CONFIG_ARM_PATCH_PHYS_VIRT is not set +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_HAVE_IRQ_WORK=y +CONFIG_IRQ_WORK=y + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_CROSS_COMPILE="" +CONFIG_LOCALVERSION="-MKernel-a69" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_HAVE_KERNEL_GZIP=y +CONFIG_HAVE_KERNEL_LZMA=y +CONFIG_HAVE_KERNEL_LZO=y +CONFIG_HAVE_KERNEL_LZ4=y +CONFIG_KERNEL_GZIP=y +# CONFIG_KERNEL_LZMA is not set +# CONFIG_KERNEL_LZO is not set +# CONFIG_KERNEL_LZ4 is not set +CONFIG_DEFAULT_HOSTNAME="(none)" +# CONFIG_SWAP is not set +# CONFIG_SYSVIPC is not set +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_FHANDLE is not set +# CONFIG_TASKSTATS is not set +CONFIG_AUDIT=y +CONFIG_HAVE_GENERIC_HARDIRQS=y + +# +# IRQ subsystem +# +CONFIG_GENERIC_HARDIRQS=y +CONFIG_HAVE_SPARSE_IRQ=y +CONFIG_GENERIC_IRQ_SHOW=y +# CONFIG_SPARSE_IRQ is not set + +# +# RCU Subsystem +# +CONFIG_TREE_PREEMPT_RCU=y +CONFIG_PREEMPT_RCU=y +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_RCU_BOOST is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_TIMER_SLACK=y +# CONFIG_CGROUP_DEVICE is not set +# CONFIG_CPUSETS is not set +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +# CONFIG_CGROUP_MEM_RES_CTLR is not set +# CONFIG_CGROUP_PERF is not set +CONFIG_CGROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_RT_GROUP_SCHED=y +# CONFIG_BLK_CGROUP is not set +# CONFIG_NAMESPACES is not set +CONFIG_SCHED_AUTOGROUP=y +# CONFIG_SYSFS_DEPRECATED is not set +# CONFIG_RELAY is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set +CONFIG_RD_LZ4=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y +CONFIG_PANIC_TIMEOUT=10 +CONFIG_EXPERT=y +CONFIG_UID16=y +# CONFIG_SYSCTL_SYSCALL is not set +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +# CONFIG_ELF_CORE is not set +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_ASHMEM=y +CONFIG_AIO=y +CONFIG_EMBEDDED=y +CONFIG_HAVE_PERF_EVENTS=y +CONFIG_PERF_USE_VMALLOC=y + +# +# Kernel Performance Events And Counters +# +CONFIG_PERF_EVENTS=y +# CONFIG_PERF_COUNTERS is not set +# CONFIG_DEBUG_PERF_USE_VMALLOC is not set +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_PCI_QUIRKS=y +CONFIG_COMPAT_BRK=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y +CONFIG_OPROFILE=y +CONFIG_HAVE_OPROFILE=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_USE_GENERIC_SMP_HELPERS=y +CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y +CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +CONFIG_HAVE_HW_BREAKPOINT=y + +# +# GCOV-based kernel profiling +# +# CONFIG_GCOV_KERNEL is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +# CONFIG_MODULES is not set +CONFIG_STOP_MACHINE=y +CONFIG_BLOCK=y +CONFIG_LBDAF=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_BSGLIB is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_ROW=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_IOSCHED_SIO is not set +# CONFIG_IOSCHED_VR is not set +CONFIG_IOSCHED_BFQ=y +CONFIG_CGROUP_BFQIO=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_ROW is not set +# CONFIG_DEFAULT_CFQ is not set +CONFIG_DEFAULT_BFQ=y +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="bfq" +# CONFIG_INLINE_SPIN_TRYLOCK is not set +# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK is not set +# CONFIG_INLINE_SPIN_LOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK_IRQ is not set +# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set +# CONFIG_INLINE_SPIN_UNLOCK is not set +# CONFIG_INLINE_SPIN_UNLOCK_BH is not set +# CONFIG_INLINE_SPIN_UNLOCK_IRQ is not set +# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_READ_TRYLOCK is not set +# CONFIG_INLINE_READ_LOCK is not set +# CONFIG_INLINE_READ_LOCK_BH is not set +# CONFIG_INLINE_READ_LOCK_IRQ is not set +# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set +# CONFIG_INLINE_READ_UNLOCK is not set +# CONFIG_INLINE_READ_UNLOCK_BH is not set +# CONFIG_INLINE_READ_UNLOCK_IRQ is not set +# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_WRITE_TRYLOCK is not set +# CONFIG_INLINE_WRITE_LOCK is not set +# CONFIG_INLINE_WRITE_LOCK_BH is not set +# CONFIG_INLINE_WRITE_LOCK_IRQ is not set +# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set +# CONFIG_INLINE_WRITE_UNLOCK is not set +# CONFIG_INLINE_WRITE_UNLOCK_BH is not set +# CONFIG_INLINE_WRITE_UNLOCK_IRQ is not set +# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set +CONFIG_MUTEX_SPIN_ON_OWNER=y +CONFIG_FREEZER=y + +# +# System Type +# +CONFIG_MMU=y +# CONFIG_ARCH_INTEGRATOR is not set +# CONFIG_ARCH_REALVIEW is not set +# CONFIG_ARCH_VERSATILE is not set +# CONFIG_ARCH_VEXPRESS is not set +# CONFIG_ARCH_AT91 is not set +# CONFIG_ARCH_BCMRING is not set +# CONFIG_ARCH_CLPS711X is not set +# CONFIG_ARCH_CNS3XXX is not set +# CONFIG_ARCH_GEMINI is not set +# CONFIG_ARCH_PRIMA2 is not set +# CONFIG_ARCH_EBSA110 is not set +# CONFIG_ARCH_EP93XX is not set +# CONFIG_ARCH_FOOTBRIDGE is not set +# CONFIG_ARCH_MXC is not set +# CONFIG_ARCH_MXS is not set +# CONFIG_ARCH_NETX is not set +# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_IOP13XX is not set +# CONFIG_ARCH_IOP32X is not set +# CONFIG_ARCH_IOP33X is not set +# CONFIG_ARCH_IXP23XX is not set +# CONFIG_ARCH_IXP2000 is not set +# CONFIG_ARCH_IXP4XX is not set +# CONFIG_ARCH_DOVE is not set +# CONFIG_ARCH_KIRKWOOD is not set +# CONFIG_ARCH_LPC32XX is not set +# CONFIG_ARCH_MV78XX0 is not set +# CONFIG_ARCH_ORION5X is not set +# CONFIG_ARCH_MMP is not set +# CONFIG_ARCH_KS8695 is not set +# CONFIG_ARCH_W90X900 is not set +# CONFIG_ARCH_NUC93X is not set +CONFIG_ARCH_TEGRA=y +# CONFIG_ARCH_PNX4008 is not set +# CONFIG_ARCH_PXA is not set +# CONFIG_ARCH_MSM is not set +# CONFIG_ARCH_SHMOBILE is not set +# CONFIG_ARCH_RPC is not set +# CONFIG_ARCH_SA1100 is not set +# CONFIG_ARCH_S3C2410 is not set +# CONFIG_ARCH_S3C64XX is not set +# CONFIG_ARCH_S5P64X0 is not set +# CONFIG_ARCH_S5PC100 is not set +# CONFIG_ARCH_S5PV210 is not set +# CONFIG_ARCH_EXYNOS4 is not set +# CONFIG_ARCH_SHARK is not set +# CONFIG_ARCH_TCC_926 is not set +# CONFIG_ARCH_U300 is not set +# CONFIG_ARCH_U8500 is not set +# CONFIG_ARCH_NOMADIK is not set +# CONFIG_ARCH_DAVINCI is not set +# CONFIG_ARCH_OMAP is not set +# CONFIG_PLAT_SPEAR is not set +# CONFIG_ARCH_VT8500 is not set +# CONFIG_ARCH_ZYNQ is not set +CONFIG_GPIO_PCA953X=y +# CONFIG_KEYBOARD_GPIO_POLLED is not set + +# +# System MMU +# + +# +# NVIDIA Tegra options +# +CONFIG_ARCH_TEGRA_3x_SOC=y +CONFIG_ARCH_TEGRA_HAS_DUAL_3D=y +CONFIG_ARCH_TEGRA_HAS_DUAL_CPU_CLUSTERS=y +CONFIG_ARCH_TEGRA_HAS_PCIE=y +CONFIG_ARCH_TEGRA_HAS_SATA=y +CONFIG_TEGRA_PCI=y +CONFIG_TEGRA_AHB=y + +# +# Tegra board type +# +# CONFIG_MACH_TEGRA_DT is not set +# CONFIG_MACH_ARUBA is not set +CONFIG_MACH_CARDHU=y +# CONFIG_MACH_P1852 is not set +# CONFIG_MACH_TEGRA_ENTERPRISE is not set +# CONFIG_MACH_KAI is not set +CONFIG_MACH_GROUPER=y +CONFIG_TEGRA_SILICON_PLATFORM=y +# CONFIG_TEGRA_SIMULATION_PLATFORM is not set +# CONFIG_TEGRA_FPGA_PLATFORM is not set +CONFIG_TEGRA_DEBUG_UART_NONE=y +CONFIG_TEGRA_SYSTEM_DMA=y +CONFIG_TEGRA_PWM=y +CONFIG_TEGRA_FIQ_DEBUGGER=y +# CONFIG_TEGRA_CARDHU_DSI is not set +CONFIG_TEGRA_EMC_SCALING_ENABLE=y +CONFIG_VOLTAGE_CONTROL=y +CONFIG_CUSTOM_BRIGHTNESS=y +# CONFIG_DEFAULT_DUAL_CORE is not set +CONFIG_GPU_OVERCLOCK=y +# CONFIG_GPU_OC_332 is not set +CONFIG_GPU_OC_446=y +# CONFIG_GPU_OC_484 is not set +# CONFIG_GPU_OC_520 is not set +# CONFIG_GPU_OC_600 is not set +# CONFIG_GPU_OC_666 is not set +# CONFIG_GPU_OC_700 is not set +CONFIG_LP_OVERCLOCK=y +# CONFIG_LP_OC_555 is not set +# CONFIG_LP_OC_620 is not set +CONFIG_LP_OC_666=y +# CONFIG_LP_OC_700 is not set +# CONFIG_LP_OC_740 is not set +# CONFIG_LP_ONLY is not set +CONFIG_AUDIO_MIN_PERFLOCK=y +CONFIG_TEGRA_CPU_DVFS=y +CONFIG_TEGRA_CORE_DVFS=y +CONFIG_TEGRA_IOVMM_SMMU=y +# CONFIG_TEGRA_SMMU_BASE_AT_E0000000 is not set +# CONFIG_TEGRA_IOVMM_SMMU_SYSFS is not set +CONFIG_TEGRA_IOVMM=y +CONFIG_TEGRA_AVP_KERNEL_ON_SMMU=y +CONFIG_TEGRA_THERMAL_THROTTLE=y +CONFIG_WIFI_CONTROL_FUNC=y +CONFIG_TEGRA_CLOCK_DEBUG_WRITE=y +CONFIG_TEGRA_CLUSTER_CONTROL=y +CONFIG_TEGRA_AUTO_HOTPLUG=y +CONFIG_TEGRA_MC_EARLY_ACK=y +CONFIG_TEGRA_MC_PROFILE=y +CONFIG_TEGRA_EDP_LIMITS=y +CONFIG_TEGRA_EMC_TO_DDR_CLOCK=1 +# CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND is not set +CONFIG_TEGRA_LP1_950=y +CONFIG_TEGRA_RUNNABLE_THREAD=y +CONFIG_TEGRA_VARIANT_INFO=y +CONFIG_USB_HOTPLUG=y +CONFIG_TEGRA_DYNAMIC_PWRDET=y +CONFIG_TEGRA_EDP_EXACT_FREQ=y +# CONFIG_TEGRA_USB_MODEM_POWER is not set +CONFIG_TEGRA_BB_XMM_POWER=y +# CONFIG_TEGRA_BB_XMM_POWER2 is not set +# CONFIG_TEGRA_THERMAL_SYSFS is not set +CONFIG_TEGRA_PLLM_RESTRICTED=y +# CONFIG_TEGRA_WDT_RECOVERY is not set +CONFIG_TEGRA_LP2_ARM_TWD=y +CONFIG_TEGRA_SLOW_CSITE=y +# CONFIG_TEGRA_PREINIT_CLOCKS is not set + +# +# Processor Type +# +CONFIG_CPU_V7=y +CONFIG_CPU_32v6K=y +CONFIG_CPU_32v7=y +CONFIG_CPU_ABRT_EV7=y +CONFIG_CPU_PABRT_V7=y +CONFIG_CPU_CACHE_V7=y +CONFIG_CPU_CACHE_VIPT=y +CONFIG_CPU_COPY_V6=y +CONFIG_CPU_TLB_V7=y +CONFIG_CPU_HAS_ASID=y +CONFIG_CPU_CP15=y +CONFIG_CPU_CP15_MMU=y + +# +# Processor Features +# +CONFIG_ARM_THUMB=y +# CONFIG_ARM_THUMBEE is not set +CONFIG_SWP_EMULATE=y +# CONFIG_CPU_ICACHE_DISABLE is not set +# CONFIG_CPU_DCACHE_DISABLE is not set +# CONFIG_CPU_BPREDICT_DISABLE is not set +CONFIG_OUTER_CACHE=y +CONFIG_OUTER_CACHE_SYNC=y +CONFIG_CACHE_L2X0=y +CONFIG_CACHE_PL310=y +CONFIG_ARM_L1_CACHE_SHIFT=5 +CONFIG_ARM_DMA_MEM_BUFFERABLE=y +CONFIG_ARM_SAVE_DEBUG_CONTEXT=y +CONFIG_CPA=y +CONFIG_CPU_HAS_PMU=y +# CONFIG_ARM_ERRATA_430973 is not set +# CONFIG_ARM_ERRATA_458693 is not set +# CONFIG_ARM_ERRATA_460075 is not set +CONFIG_ARM_ERRATA_742230=y +# CONFIG_ARM_ERRATA_742231 is not set +# CONFIG_PL310_ERRATA_588369 is not set +# CONFIG_ARM_ERRATA_720789 is not set +# CONFIG_PL310_ERRATA_727915 is not set +CONFIG_ARM_ERRATA_743622=y +CONFIG_ARM_ERRATA_751472=y +# CONFIG_ARM_ERRATA_753970 is not set +CONFIG_ARM_ERRATA_754322=y +# CONFIG_ARM_ERRATA_754327 is not set +CONFIG_ARM_ERRATA_764369=y +# CONFIG_ARM_ERRATA_720791 is not set +CONFIG_ARM_ERRATA_752520=y +# CONFIG_PL310_ERRATA_769419 is not set +CONFIG_ARM_GIC=y +CONFIG_FIQ_GLUE=y +CONFIG_FIQ_DEBUGGER=y +# CONFIG_FIQ_DEBUGGER_NO_SLEEP is not set +# CONFIG_FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON is not set +CONFIG_FIQ_DEBUGGER_CONSOLE=y +# CONFIG_FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE is not set +CONFIG_GIC_SET_MULTIPLE_CPUS=y + +# +# Bus support +# +CONFIG_PCI=y +CONFIG_PCI_SYSCALL=y +CONFIG_ARCH_SUPPORTS_MSI=y +CONFIG_PCI_MSI=y +# CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_STUB is not set +# CONFIG_PCI_IOV is not set +# CONFIG_PCCARD is not set + +# +# Kernel Features +# +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_SMP=y +CONFIG_SMP_ON_UP=y +CONFIG_HAVE_ARM_SCU=y +CONFIG_HAVE_ARM_TWD=y +CONFIG_VMSPLIT_3G=y +# CONFIG_VMSPLIT_2G is not set +# CONFIG_VMSPLIT_1G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_TASK_SIZE_3G_LESS_16M=y +# CONFIG_TASK_SIZE_3G_LESS_24M is not set +CONFIG_TASK_SIZE=0xBF000000 +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_LOCAL_TIMERS=y +CONFIG_ARCH_NR_GPIO=512 +# CONFIG_PREEMPT_NONE is not set +# CONFIG_PREEMPT_VOLUNTARY is not set +CONFIG_PREEMPT=y +CONFIG_PREEMPT_COUNT=y +CONFIG_HZ=100 +# CONFIG_THUMB2_KERNEL is not set +CONFIG_AEABI=y +# CONFIG_OABI_COMPAT is not set +# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set +# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set +CONFIG_HAVE_ARCH_PFN_VALID=y +CONFIG_HIGHMEM=y +# CONFIG_HIGHPTE is not set +CONFIG_HW_PERF_EVENTS=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_HAVE_MEMBLOCK=y +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_COMPACTION is not set +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_BOUNCE=y +CONFIG_VIRT_TO_BUS=y +CONFIG_KSM=y +CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 +# CONFIG_CLEANCACHE is not set +CONFIG_FORCE_MAX_ZONEORDER=11 +CONFIG_ALIGNMENT_TRAP=y +# CONFIG_UACCESS_WITH_MEMCPY is not set +# CONFIG_SECCOMP is not set +# CONFIG_CC_STACKPROTECTOR is not set +# CONFIG_DEPRECATED_PARAM_STRUCT is not set +CONFIG_ARM_FLUSH_CONSOLE_ON_RESTART=y + +# +# Boot options +# +# CONFIG_USE_OF is not set +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_CMDLINE="tegra_wdt.heartbeat=30" +# CONFIG_CMDLINE_FROM_BOOTLOADER is not set +CONFIG_CMDLINE_EXTEND=y +# CONFIG_CMDLINE_FORCE is not set +# CONFIG_XIP_KERNEL is not set +CONFIG_KEXEC=y +CONFIG_ATAGS_PROC=y +CONFIG_KEXEC_HARDBOOT=y +# CONFIG_CRASH_DUMP is not set +# CONFIG_AUTO_ZRELADDR is not set + +# +# CPU Power Management +# + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +CONFIG_CPU_FREQ_STAT=y +# CONFIG_CPU_FREQ_STAT_DETAILS is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_TOUCHDEMAND=y +CONFIG_CPU_FREQ_GOV_INTERACTIVE=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set +# CONFIG_CPU_FREQ_GOV_LULZACTIVE is not set +# CONFIG_CPU_FREQ_GOV_PEGASUSQ is not set + +# +# ARM CPU frequency scaling drivers +# +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y + +# +# CPUQUIET Framework +# +# CONFIG_CPUQUIET_FRAMEWORK is not set + +# +# Floating point emulation +# + +# +# At least one emulation must be selected +# +CONFIG_VFP=y +CONFIG_VFPv3=y +CONFIG_NEON=y + +# +# Userspace binary formats +# +CONFIG_BINFMT_ELF=y +CONFIG_HAVE_AOUT=y +# CONFIG_BINFMT_AOUT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Power management options +# +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +CONFIG_HAS_WAKELOCK=y +CONFIG_HAS_EARLYSUSPEND=y +CONFIG_WAKELOCK=y +CONFIG_WAKELOCK_STAT=y +CONFIG_USER_WAKELOCK=y +CONFIG_EARLYSUSPEND=y +# CONFIG_NO_USER_SPACE_SCREEN_ACCESS_CONTROL is not set +CONFIG_FB_EARLYSUSPEND=y +CONFIG_PM_SLEEP=y +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_RUNTIME=y +CONFIG_PM=y +CONFIG_PM_DEBUG=y +# CONFIG_PM_ADVANCED_DEBUG is not set +# CONFIG_PM_TEST_SUSPEND is not set +CONFIG_CAN_PM_TRACE=y +# CONFIG_APM_EMULATION is not set +CONFIG_PM_CLK=y +CONFIG_SUSPEND_TIME=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +CONFIG_XFRM_IPCOMP=y +CONFIG_NET_KEY=y +# CONFIG_NET_KEY_MIGRATE is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +# CONFIG_IP_FIB_TRIE_STATS is not set +CONFIG_IP_MULTIPLE_TABLES=y +# CONFIG_IP_ROUTE_MULTIPATH is not set +# CONFIG_IP_ROUTE_VERBOSE is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE_DEMUX is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +CONFIG_INET_ESP=y +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +CONFIG_INET_TUNNEL=y +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_CONG_WESTWOOD=y +CONFIG_TCP_CONG_HTCP=y +CONFIG_TCP_CONG_HSTCP=y +CONFIG_TCP_CONG_HYBLA=y +CONFIG_TCP_CONG_VEGAS=y +CONFIG_TCP_CONG_SCALABLE=y +CONFIG_TCP_CONG_LP=y +CONFIG_TCP_CONG_VENO=y +CONFIG_TCP_CONG_YEAH=y +CONFIG_TCP_CONG_ILLINOIS=y +# CONFIG_DEFAULT_BIC is not set +# CONFIG_DEFAULT_CUBIC is not set +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_HYBLA is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_VENO is not set +CONFIG_DEFAULT_WESTWOOD=y +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="westwood" +# CONFIG_TCP_MD5SIG is not set +CONFIG_IPV6=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +# CONFIG_IPV6_ROUTE_INFO is not set +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_INET6_XFRM_TUNNEL=y +CONFIG_INET6_TUNNEL=y +CONFIG_INET6_XFRM_MODE_TRANSPORT=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_INET6_XFRM_MODE_BEET=y +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=y +# CONFIG_IPV6_SIT_6RD is not set +CONFIG_IPV6_NDISC_NODETYPE=y +CONFIG_IPV6_TUNNEL=y +CONFIG_IPV6_MULTIPLE_TABLES=y +# CONFIG_IPV6_SUBTREES is not set +# CONFIG_IPV6_MROUTE is not set +# CONFIG_NETLABEL is not set +CONFIG_ANDROID_PARANOID_NETWORK=y +CONFIG_NET_ACTIVITY_STATS=y +CONFIG_NETWORK_SECMARK=y +# CONFIG_NETWORK_PHY_TIMESTAMPING is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_ADVANCED=y + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +# CONFIG_NF_CONNTRACK_TIMESTAMP is not set +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_GRE=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_BROADCAST=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +# CONFIG_NF_CONNTRACK_SNMP is not set +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +# CONFIG_NF_CONNTRACK_SIP is not set +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_TPROXY=y +CONFIG_NETFILTER_XTABLES=y + +# +# Xtables combined modules +# +CONFIG_NETFILTER_XT_MARK=y +CONFIG_NETFILTER_XT_CONNMARK=y + +# +# Xtables targets +# +# CONFIG_NETFILTER_XT_TARGET_AUDIT is not set +# CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +# CONFIG_NETFILTER_XT_TARGET_CT is not set +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +# CONFIG_NETFILTER_XT_TARGET_HL is not set +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set +# CONFIG_NETFILTER_XT_TARGET_RATEEST is not set +# CONFIG_NETFILTER_XT_TARGET_TEE is not set +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +# CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set + +# +# Xtables matches +# +# CONFIG_NETFILTER_XT_MATCH_ADDRTYPE is not set +# CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +# CONFIG_NETFILTER_XT_MATCH_CPU is not set +# CONFIG_NETFILTER_XT_MATCH_DCCP is not set +# CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set +# CONFIG_NETFILTER_XT_MATCH_DSCP is not set +# CONFIG_NETFILTER_XT_MATCH_ESP is not set +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_HL=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set +# CONFIG_NETFILTER_XT_MATCH_OSF is not set +# CONFIG_NETFILTER_XT_MATCH_OWNER is not set +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y +# CONFIG_NETFILTER_XT_MATCH_RATEEST is not set +# CONFIG_NETFILTER_XT_MATCH_REALM is not set +# CONFIG_NETFILTER_XT_MATCH_RECENT is not set +# CONFIG_NETFILTER_XT_MATCH_SCTP is not set +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +# CONFIG_IP_SET is not set +# CONFIG_IP_VS is not set + +# +# IP: Netfilter Configuration +# +CONFIG_NF_DEFRAG_IPV4=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_REJECT_SKERR=y +CONFIG_IP_NF_TARGET_LOG=y +# CONFIG_IP_NF_TARGET_ULOG is not set +CONFIG_NF_NAT=y +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_NF_NAT_PROTO_DCCP=y +CONFIG_NF_NAT_PROTO_GRE=y +CONFIG_NF_NAT_PROTO_UDPLITE=y +CONFIG_NF_NAT_PROTO_SCTP=y +CONFIG_NF_NAT_FTP=y +CONFIG_NF_NAT_IRC=y +CONFIG_NF_NAT_TFTP=y +CONFIG_NF_NAT_AMANDA=y +CONFIG_NF_NAT_PPTP=y +CONFIG_NF_NAT_H323=y +# CONFIG_NF_NAT_SIP is not set +CONFIG_IP_NF_MANGLE=y +# CONFIG_IP_NF_TARGET_CLUSTERIP is not set +# CONFIG_IP_NF_TARGET_ECN is not set +# CONFIG_IP_NF_TARGET_TTL is not set +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_DEFRAG_IPV6=y +CONFIG_NF_CONNTRACK_IPV6=y +# CONFIG_IP6_NF_QUEUE is not set +CONFIG_IP6_NF_IPTABLES=y +# CONFIG_IP6_NF_MATCH_AH is not set +# CONFIG_IP6_NF_MATCH_EUI64 is not set +# CONFIG_IP6_NF_MATCH_FRAG is not set +# CONFIG_IP6_NF_MATCH_OPTS is not set +# CONFIG_IP6_NF_MATCH_HL is not set +# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set +# CONFIG_IP6_NF_MATCH_MH is not set +# CONFIG_IP6_NF_MATCH_RT is not set +# CONFIG_IP6_NF_TARGET_HL is not set +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_TARGET_REJECT_SKERR=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +# CONFIG_IP6_NF_SECURITY is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_RDS is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_L2TP is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +# CONFIG_IEEE802154 is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +# CONFIG_NET_SCH_CBQ is not set +CONFIG_NET_SCH_HTB=y +# CONFIG_NET_SCH_HFSC is not set +# CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_MULTIQ is not set +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFB is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_DRR is not set +# CONFIG_NET_SCH_MQPRIO is not set +# CONFIG_NET_SCH_CHOKE is not set +# CONFIG_NET_SCH_QFQ is not set +CONFIG_NET_SCH_INGRESS=y + +# +# Classification +# +CONFIG_NET_CLS=y +# CONFIG_NET_CLS_BASIC is not set +# CONFIG_NET_CLS_TCINDEX is not set +# CONFIG_NET_CLS_ROUTE4 is not set +# CONFIG_NET_CLS_FW is not set +CONFIG_NET_CLS_U32=y +# CONFIG_CLS_U32_PERF is not set +# CONFIG_CLS_U32_MARK is not set +# CONFIG_NET_CLS_RSVP is not set +# CONFIG_NET_CLS_RSVP6 is not set +# CONFIG_NET_CLS_FLOW is not set +# CONFIG_NET_CLS_CGROUP is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +# CONFIG_NET_EMATCH_CMP is not set +# CONFIG_NET_EMATCH_NBYTE is not set +CONFIG_NET_EMATCH_U32=y +# CONFIG_NET_EMATCH_META is not set +# CONFIG_NET_EMATCH_TEXT is not set +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=y +CONFIG_NET_ACT_GACT=y +# CONFIG_GACT_PROB is not set +CONFIG_NET_ACT_MIRRED=y +# CONFIG_NET_ACT_IPT is not set +# CONFIG_NET_ACT_NAT is not set +# CONFIG_NET_ACT_PEDIT is not set +# CONFIG_NET_ACT_SIMP is not set +# CONFIG_NET_ACT_SKBEDIT is not set +# CONFIG_NET_ACT_CSUM is not set +# CONFIG_NET_CLS_IND is not set +CONFIG_NET_SCH_FIFO=y +# CONFIG_DCB is not set +CONFIG_DNS_RESOLVER=y +# CONFIG_BATMAN_ADV is not set +CONFIG_RPS=y +CONFIG_RFS_ACCEL=y +CONFIG_XPS=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_NET_DROP_MONITOR is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +CONFIG_BT=y +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y +CONFIG_BT_RFCOMM=y +CONFIG_BT_RFCOMM_TTY=y +CONFIG_BT_BNEP=y +# CONFIG_BT_BNEP_MC_FILTER is not set +# CONFIG_BT_BNEP_PROTO_FILTER is not set +CONFIG_BT_HIDP=y + +# +# Bluetooth device drivers +# +# CONFIG_BT_HCIBTUSB is not set +# CONFIG_BT_HCIBTSDIO is not set +CONFIG_BT_HCIUART=y +CONFIG_BT_HCIUART_H4=y +# CONFIG_BT_HCIUART_BCSP is not set +# CONFIG_BT_HCIUART_ATH3K is not set +CONFIG_BT_HCIUART_LL=y +# CONFIG_BT_HCIBCM203X is not set +CONFIG_BT_BLUESLEEP=y +# CONFIG_BT_TIBLUESLEEP is not set +# CONFIG_BT_HCIBPA10X is not set +# CONFIG_BT_HCIBFUSB is not set +# CONFIG_BT_HCIVHCI is not set +# CONFIG_BT_MRVL is not set +# CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y +CONFIG_WIRELESS=y +CONFIG_WEXT_CORE=y +CONFIG_WEXT_PROC=y +CONFIG_CFG80211=y +CONFIG_NL80211_TESTMODE=y +# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +# CONFIG_CFG80211_REG_DEBUG is not set +CONFIG_CFG80211_DEFAULT_PS=y +# CONFIG_CFG80211_DEBUGFS is not set +# CONFIG_CFG80211_INTERNAL_REGDB is not set +CONFIG_CFG80211_WEXT=y +CONFIG_WIRELESS_EXT_SYSFS=y +# CONFIG_LIB80211 is not set +# CONFIG_CFG80211_ALLOW_RECONNECT is not set +# CONFIG_MAC80211 is not set +# CONFIG_WIMAX is not set +CONFIG_RFKILL=y +CONFIG_RFKILL_PM=y +# CONFIG_RFKILL_INPUT is not set +# CONFIG_RFKILL_REGULATOR is not set +# CONFIG_RFKILL_GPIO is not set +# CONFIG_NET_9P is not set +CONFIG_CAIF=y +# CONFIG_CAIF_DEBUG is not set +CONFIG_CAIF_NETDEV=y +# CONFIG_CEPH_LIB is not set +CONFIG_NFC=y + +# +# Near Field Communication (NFC) devices +# +CONFIG_PN544_NFC=y +# CONFIG_NFC_PN533 is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="" +# CONFIG_DEVTMPFS is not set +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +CONFIG_REGMAP=y +CONFIG_REGMAP_I2C=y +# CONFIG_DMA_SHARED_BUFFER is not set +# CONFIG_CONNECTOR is not set +# CONFIG_MTD is not set +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 +# CONFIG_BLK_DEV_CRYPTOLOOP is not set + +# +# DRBD disabled because PROC_FS, INET or CONNECTOR not selected +# +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +# CONFIG_MG_DISK is not set +# CONFIG_BLK_DEV_RBD is not set +# CONFIG_SENSORS_LIS3LV02D is not set +CONFIG_MISC_DEVICES=y +CONFIG_AD525X_DPOT=y +CONFIG_AD525X_DPOT_I2C=y +# CONFIG_AD525X_DPOT_SPI is not set +# CONFIG_PHANTOM is not set +# CONFIG_INTEL_MID_PTI is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +# CONFIG_ICS932S401 is not set +# CONFIG_ENCLOSURE_SERVICES is not set +# CONFIG_HP_ILO is not set +CONFIG_APDS9802ALS=y +# CONFIG_ISL29003 is not set +# CONFIG_ISL29020 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_SENSORS_BH1780 is not set +# CONFIG_SENSORS_BH1770 is not set +# CONFIG_SENSORS_APDS990X is not set +# CONFIG_HMC6352 is not set +# CONFIG_SENSORS_AK8975 is not set +CONFIG_SENSORS_NCT1008=y +# CONFIG_DS1682 is not set +# CONFIG_TI_DAC7512 is not set +CONFIG_UID_STAT=y +# CONFIG_BMP085 is not set +# CONFIG_PCH_PHUB is not set +# CONFIG_USB_SWITCH_FSA9480 is not set +# CONFIG_WL127X_RFKILL is not set +# CONFIG_APANIC is not set +# CONFIG_BCM4329_RFKILL is not set +CONFIG_BCM4330_RFKILL=y +CONFIG_TEGRA_CRYPTO_DEV=y +CONFIG_MAX1749_VIBRATOR=y +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +CONFIG_EEPROM_AT24=y +# CONFIG_EEPROM_AT25 is not set +# CONFIG_EEPROM_LEGACY is not set +# CONFIG_EEPROM_MAX6875 is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_EEPROM_93XX46 is not set +# CONFIG_CB710_CORE is not set +# CONFIG_IWMC3200TOP is not set + +# +# Texas Instruments shared transport line discipline +# +# CONFIG_TI_ST is not set +# CONFIG_ST_GPS is not set +# CONFIG_SENSORS_LIS3_SPI is not set +# CONFIG_SENSORS_LIS3_I2C is not set +CONFIG_TEGRA_BB_SUPPORT=y +CONFIG_TEGRA_BB_POWER=y +CONFIG_TEGRA_BB_M7400=y +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +CONFIG_SCSI_MOD=y +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set +CONFIG_SCSI_MULTI_LUN=y +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set + +# +# SCSI Transports +# +# CONFIG_SCSI_SPI_ATTRS is not set +# CONFIG_SCSI_FC_ATTRS is not set +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set +CONFIG_SCSI_LOWLEVEL=y +# CONFIG_ISCSI_TCP is not set +# CONFIG_ISCSI_BOOT_SYSFS is not set +# CONFIG_SCSI_CXGB3_ISCSI is not set +# CONFIG_SCSI_CXGB4_ISCSI is not set +# CONFIG_SCSI_BNX2_ISCSI is not set +# CONFIG_SCSI_BNX2X_FCOE is not set +# CONFIG_BE2ISCSI is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_HPSA is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_3W_SAS is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_MVSAS is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_MPT2SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set +# CONFIG_FCOE is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_PMCRAID is not set +# CONFIG_SCSI_PM8001 is not set +# CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_BFA_FC is not set +# CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set +# CONFIG_ATA is not set +CONFIG_MD=y +# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_DM=y +# CONFIG_DM_DEBUG is not set +CONFIG_DM_CRYPT=y +# CONFIG_DM_SNAPSHOT is not set +# CONFIG_DM_MIRROR is not set +# CONFIG_DM_RAID is not set +# CONFIG_DM_ZERO is not set +# CONFIG_DM_MULTIPATH is not set +# CONFIG_DM_DELAY is not set +CONFIG_DM_UEVENT=y +# CONFIG_DM_FLAKEY is not set +# CONFIG_TARGET_CORE is not set +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_FIREWIRE is not set +# CONFIG_FIREWIRE_NOSY is not set +# CONFIG_I2O is not set +CONFIG_NETDEVICES=y +# CONFIG_IFB is not set +CONFIG_DUMMY=y +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=y +# CONFIG_VETH is not set +# CONFIG_ARCNET is not set +CONFIG_MII=y +# CONFIG_PHYLIB is not set +# CONFIG_NET_ETHERNET is not set +CONFIG_NETDEV_1000=y +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_E1000E is not set +# CONFIG_IP1000 is not set +# CONFIG_IGB is not set +# CONFIG_IGBVF is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set +# CONFIG_CNIC is not set +# CONFIG_QLA3XXX is not set +# CONFIG_ATL1 is not set +# CONFIG_ATL1E is not set +# CONFIG_ATL1C is not set +# CONFIG_JME is not set +# CONFIG_STMMAC_ETH is not set +# CONFIG_PCH_GBE is not set +# CONFIG_FTGMAC100 is not set +# CONFIG_NETDEV_10000 is not set +# CONFIG_TR is not set +CONFIG_WLAN=y +# CONFIG_ATMEL is not set +# CONFIG_PRISM54 is not set +# CONFIG_USB_ZD1201 is not set +# CONFIG_USB_NET_RNDIS_WLAN is not set +# CONFIG_ATH_COMMON is not set +# CONFIG_BCM4329 is not set +CONFIG_BCMDHD=y +CONFIG_BCMDHD_FW_PATH="/system/vendor/firmware/fw_bcmdhd.bin" +CONFIG_BCMDHD_NVRAM_PATH="/system/etc/nvram.txt" +# CONFIG_DHD_USE_STATIC_BUF is not set +CONFIG_DHD_USE_SCHED_SCAN=y +CONFIG_DHD_ENABLE_P2P=y +# CONFIG_HOSTAP is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_IWM is not set +# CONFIG_LIBERTAS is not set +# CONFIG_HERMES is not set +# CONFIG_MWIFIEX is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +CONFIG_USB_USBNET=y +CONFIG_USB_NET_AX8817X=y +CONFIG_USB_NET_CDCETHER=y +# CONFIG_USB_NET_CDC_EEM is not set +CONFIG_USB_NET_CDC_NCM=y +# CONFIG_USB_NET_DM9601 is not set +# CONFIG_USB_NET_SMSC75XX is not set +CONFIG_USB_NET_SMSC95XX=y +# CONFIG_USB_NET_GL620A is not set +# CONFIG_USB_NET_NET1080 is not set +# CONFIG_USB_NET_PLUSB is not set +# CONFIG_USB_NET_MCS7830 is not set +# CONFIG_USB_NET_RNDIS_HOST is not set +CONFIG_USB_NET_CDC_SUBSET=y +# CONFIG_USB_ALI_M5632 is not set +# CONFIG_USB_AN2720 is not set +# CONFIG_USB_BELKIN is not set +# CONFIG_USB_ARMLINUX is not set +# CONFIG_USB_EPSON2888 is not set +# CONFIG_USB_KC2190 is not set +# CONFIG_USB_NET_ZAURUS is not set +# CONFIG_USB_NET_CX82310_ETH is not set +# CONFIG_USB_NET_KALMIA is not set +# CONFIG_USB_HSO is not set +# CONFIG_USB_NET_INT51X1 is not set +# CONFIG_USB_IPHETH is not set +# CONFIG_USB_SIERRA_NET is not set +# CONFIG_USB_VL600 is not set +CONFIG_USB_NET_RAW_IP=y +# CONFIG_WAN is not set + +# +# CAIF transport drivers +# +# CONFIG_CAIF_TTY is not set +# CONFIG_CAIF_SPI_SLAVE is not set +# CONFIG_CAIF_HSI is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PPP=y +# CONFIG_PPP_MULTILINK is not set +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=y +CONFIG_PPP_SYNC_TTY=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_MPPE=y +# CONFIG_PPPOE is not set +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +# CONFIG_SLIP is not set +CONFIG_SLHC=y +# CONFIG_NET_FC is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_VMXNET3 is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +CONFIG_INPUT_FF_MEMLESS=y +# CONFIG_INPUT_POLLDEV is not set +# CONFIG_INPUT_SPARSEKMAP is not set + +# +# Userland interfaces +# +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_JOYDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set +CONFIG_INPUT_KEYRESET=y +CONFIG_INPUT_LID=y + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +# CONFIG_KEYBOARD_ADP5588 is not set +# CONFIG_KEYBOARD_ADP5589 is not set +# CONFIG_KEYBOARD_ATKBD is not set +# CONFIG_KEYBOARD_QT1070 is not set +# CONFIG_KEYBOARD_QT2160 is not set +# CONFIG_KEYBOARD_LKKBD is not set +CONFIG_KEYBOARD_GPIO=y +# CONFIG_KEYBOARD_TCA6416 is not set +# CONFIG_KEYBOARD_MATRIX is not set +# CONFIG_KEYBOARD_LM8323 is not set +# CONFIG_KEYBOARD_MAX7359 is not set +# CONFIG_KEYBOARD_MCS is not set +# CONFIG_KEYBOARD_MPR121 is not set +# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_KEYBOARD_TEGRA=y +# CONFIG_KEYBOARD_OPENCORES is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +# CONFIG_JOYSTICK_ANALOG is not set +# CONFIG_JOYSTICK_A3D is not set +# CONFIG_JOYSTICK_ADI is not set +# CONFIG_JOYSTICK_COBRA is not set +# CONFIG_JOYSTICK_GF2K is not set +# CONFIG_JOYSTICK_GRIP is not set +# CONFIG_JOYSTICK_GRIP_MP is not set +# CONFIG_JOYSTICK_GUILLEMOT is not set +# CONFIG_JOYSTICK_INTERACT is not set +# CONFIG_JOYSTICK_SIDEWINDER is not set +# CONFIG_JOYSTICK_TMDC is not set +# CONFIG_JOYSTICK_IFORCE is not set +# CONFIG_JOYSTICK_WARRIOR is not set +# CONFIG_JOYSTICK_MAGELLAN is not set +# CONFIG_JOYSTICK_SPACEORB is not set +# CONFIG_JOYSTICK_SPACEBALL is not set +# CONFIG_JOYSTICK_STINGER is not set +# CONFIG_JOYSTICK_TWIDJOY is not set +# CONFIG_JOYSTICK_ZHENHUA is not set +# CONFIG_JOYSTICK_AS5011 is not set +# CONFIG_JOYSTICK_JOYDUMP is not set +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_TABLET_USB_WACOM=y +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_ADS7846 is not set +# CONFIG_TOUCHSCREEN_AD7877 is not set +# CONFIG_TOUCHSCREEN_AD7879 is not set +# CONFIG_TOUCHSCREEN_ATMEL_MXT is not set +# CONFIG_TOUCHSCREEN_BU21013 is not set +# CONFIG_TOUCHSCREEN_CY8CTMG110 is not set +# CONFIG_TOUCHSCREEN_DYNAPRO is not set +# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set +# CONFIG_TOUCHSCREEN_EETI is not set +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set +# CONFIG_TOUCHSCREEN_MAX11801 is not set +# CONFIG_TOUCHSCREEN_MCS5000 is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_INEXIO is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_PANJIT_I2C is not set +# CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set +# CONFIG_TOUCHSCREEN_TSC2005 is not set +# CONFIG_TOUCHSCREEN_TSC2007 is not set +# CONFIG_TOUCHSCREEN_W90X900 is not set +# CONFIG_TOUCHSCREEN_ST1232 is not set +# CONFIG_TOUCHSCREEN_TPS6507X is not set +CONFIG_TOUCHSCREEN_ELAN_TF_3K=y +CONFIG_TOUCHSCREEN_RM31080A=y +CONFIG_TOUCHSCREEN_SYN_RMI4_SPI=y +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_AD714X is not set +# CONFIG_INPUT_MMA8450 is not set +# CONFIG_INPUT_MPU3050 is not set +# CONFIG_INPUT_ATI_REMOTE is not set +# CONFIG_INPUT_ATI_REMOTE2 is not set +CONFIG_INPUT_KEYCHORD=y +# CONFIG_INPUT_KEYSPAN_REMOTE is not set +# CONFIG_INPUT_KXTJ9 is not set +# CONFIG_INPUT_POWERMATE is not set +# CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_CM109 is not set +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_INPUT_PCF8574 is not set +# CONFIG_INPUT_PWM_BEEPER is not set +# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set +# CONFIG_INPUT_ADXL34X is not set +# CONFIG_INPUT_CMA3000 is not set +# CONFIG_INPUT_ALPS_GPIO_SCROLLWHEEL is not set +# CONFIG_INPUT_CAPELLA_CM3217 is not set + +# +# Proximity sensors +# +CONFIG_SENSORS_CAP1106=y + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_SERIO_ALTERA_PS2 is not set +# CONFIG_SERIO_PS2MULT is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_NOZOMI is not set +# CONFIG_N_GSM is not set +# CONFIG_TRACE_SINK is not set +CONFIG_DEVMEM=y +# CONFIG_DEVKMEM is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_TEGRA=y +# CONFIG_SERIAL_MAX3100 is not set +# CONFIG_SERIAL_MAX3107 is not set +# CONFIG_SERIAL_MFD_HSU is not set +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +# CONFIG_SERIAL_TIMBERDALE is not set +# CONFIG_SERIAL_ALTERA_JTAGUART is not set +# CONFIG_SERIAL_ALTERA_UART is not set +# CONFIG_SERIAL_IFX6X60 is not set +# CONFIG_SERIAL_PCH_UART is not set +# CONFIG_SERIAL_XILINX_PS_UART is not set +# CONFIG_TTY_PRINTK is not set +# CONFIG_HVC_DCC is not set +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y +# CONFIG_DCC_TTY is not set +# CONFIG_RAMOOPS is not set +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_COMPAT is not set +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_MUX=y + +# +# Multiplexer I2C Chip support +# +# CONFIG_I2C_MUX_GPIO is not set +# CONFIG_I2C_MUX_PCA9541 is not set +CONFIG_I2C_MUX_PCA954x=y +# CONFIG_I2C_SLAVE is not set +# CONFIG_I2C_HELPER_AUTO is not set +# CONFIG_I2C_SMBUS is not set + +# +# I2C Algorithms +# +# CONFIG_I2C_ALGOBIT is not set +# CONFIG_I2C_ALGOPCF is not set +# CONFIG_I2C_ALGOPCA is not set + +# +# I2C Hardware Bus support +# + +# +# PC SMBus host controller drivers +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_ISCH is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_DESIGNWARE is not set +# CONFIG_I2C_GPIO is not set +# CONFIG_I2C_INTEL_MID is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_PXA_PCI is not set +# CONFIG_I2C_SIMTEC is not set +CONFIG_I2C_TEGRA=y +# CONFIG_I2C_XILINX is not set +# CONFIG_I2C_EG20T is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_DIOLAN_U2C is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_TINY_USB is not set + +# +# Other I2C/SMBus bus drivers +# +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +CONFIG_SPI=y +# CONFIG_SPI_DEBUG is not set +CONFIG_SPI_MASTER=y + +# +# SPI Master Controller Drivers +# +# CONFIG_SPI_ALTERA is not set +# CONFIG_SPI_BITBANG is not set +# CONFIG_SPI_GPIO is not set +# CONFIG_SPI_OC_TINY is not set +# CONFIG_SPI_PXA2XX_PCI is not set +CONFIG_SPI_TEGRA=y +CONFIG_SPI_SLAVE_TEGRA=y +# CONFIG_SPI_TOPCLIFF_PCH is not set +# CONFIG_SPI_XILINX is not set +# CONFIG_SPI_DESIGNWARE is not set + +# +# SPI Protocol Masters +# +# CONFIG_SPI_SPIDEV is not set +# CONFIG_SPI_TLE62X0 is not set + +# +# PPS support +# +# CONFIG_PPS is not set + +# +# PPS generators support +# + +# +# PTP clock support +# + +# +# Enable Device Drivers -> PPS to see the PTP clock options. +# +CONFIG_ARCH_REQUIRE_GPIOLIB=y +CONFIG_GPIOLIB=y +CONFIG_DEBUG_GPIO=y +CONFIG_GPIO_SYSFS=y + +# +# Memory mapped GPIO drivers: +# +# CONFIG_GPIO_GENERIC_PLATFORM is not set +# CONFIG_GPIO_IT8761E is not set +# CONFIG_GPIO_VX855 is not set + +# +# I2C GPIO expanders: +# +# CONFIG_GPIO_MAX7300 is not set +# CONFIG_GPIO_MAX732X is not set +# CONFIG_GPIO_PCA953X_IRQ is not set +# CONFIG_GPIO_PCF857X is not set +# CONFIG_GPIO_SX150X is not set +# CONFIG_GPIO_ADP5588 is not set + +# +# PCI GPIO expanders: +# +# CONFIG_GPIO_BT8XX is not set +# CONFIG_GPIO_ML_IOH is not set +# CONFIG_GPIO_RDC321X is not set + +# +# SPI GPIO expanders: +# +# CONFIG_GPIO_MAX7301 is not set +# CONFIG_GPIO_MCP23S08 is not set +# CONFIG_GPIO_MC33880 is not set +# CONFIG_GPIO_74X164 is not set + +# +# AC97 GPIO expanders: +# + +# +# MODULbus GPIO expanders: +# +CONFIG_GPIO_TPS65910=y +# CONFIG_W1 is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_TEST_POWER is not set +# CONFIG_BATTERY_DS2780 is not set +# CONFIG_BATTERY_DS2782 is not set +# CONFIG_BATTERY_BQ20Z75 is not set +# CONFIG_BATTERY_BQ27x00 is not set +# CONFIG_CHARGER_TPS8003X is not set +# CONFIG_BATTERY_GAUGE_TPS8003X is not set +CONFIG_CHARGER_SMB347=y +# CONFIG_BATTERY_MAX17040 is not set +# CONFIG_BATTERY_MAX17042 is not set +# CONFIG_BATTERY_MAX17048 is not set +# CONFIG_CHARGER_ISP1704 is not set +# CONFIG_CHARGER_MAX8903 is not set +# CONFIG_CHARGER_GPIO is not set +CONFIG_BATTERY_BQ27541=y +# CONFIG_TEGRA_BPC_MGMT is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_HWMON_DEBUG_CHIP is not set + +# +# Native drivers +# +# CONFIG_SENSORS_AD7414 is not set +# CONFIG_SENSORS_AD7418 is not set +# CONFIG_SENSORS_ADCXX is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1029 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7411 is not set +# CONFIG_SENSORS_ADT7461 is not set +# CONFIG_SENSORS_ADT7462 is not set +# CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7475 is not set +# CONFIG_SENSORS_ASC7621 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS620 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_I5K_AMB is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_F71882FG is not set +# CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_G760A is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_GPIO_FAN is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_JC42 is not set +# CONFIG_SENSORS_LINEAGE is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM70 is not set +# CONFIG_SENSORS_LM73 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LTC4151 is not set +# CONFIG_SENSORS_LTC4215 is not set +# CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_LTC4261 is not set +# CONFIG_SENSORS_LM95241 is not set +# CONFIG_SENSORS_LM95245 is not set +# CONFIG_SENSORS_MAX1111 is not set +# CONFIG_SENSORS_MAX16065 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_MAX1668 is not set +# CONFIG_SENSORS_MAX6639 is not set +# CONFIG_SENSORS_MAX6642 is not set +# CONFIG_SENSORS_MAX6650 is not set +# CONFIG_SENSORS_NTC_THERMISTOR is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_PMBUS is not set +# CONFIG_SENSORS_SHT15 is not set +# CONFIG_SENSORS_SHT21 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_SMM665 is not set +# CONFIG_SENSORS_DME1737 is not set +# CONFIG_SENSORS_EMC1403 is not set +# CONFIG_SENSORS_EMC2103 is not set +# CONFIG_SENSORS_EMC6W201 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_SCH56XX_COMMON is not set +# CONFIG_SENSORS_SCH5627 is not set +# CONFIG_SENSORS_SCH5636 is not set +# CONFIG_SENSORS_ADS1015 is not set +# CONFIG_SENSORS_ADS7828 is not set +# CONFIG_SENSORS_ADS7871 is not set +# CONFIG_SENSORS_AMC6821 is not set +CONFIG_SENSORS_TEGRA_TSENSOR=y +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_TMP102 is not set +# CONFIG_SENSORS_TMP401 is not set +# CONFIG_SENSORS_TMP421 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set +# CONFIG_SENSORS_W83795 is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +CONFIG_SENSORS_INA219=y +# CONFIG_SENSORS_INA230 is not set +CONFIG_SENSORS_AL3010=y +CONFIG_THERMAL=y +CONFIG_THERMAL_HWMON=y +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_CORE is not set +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +# CONFIG_DW_WATCHDOG is not set +# CONFIG_MPCORE_WATCHDOG is not set +CONFIG_TEGRA_WATCHDOG=y +CONFIG_TEGRA_WATCHDOG_ENABLE_ON_PROBE=y +# CONFIG_MAX63XX_WATCHDOG is not set +# CONFIG_ALIM7101_WDT is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set +CONFIG_BCMA_POSSIBLE=y + +# +# Broadcom specific AMBA +# +# CONFIG_BCMA is not set +CONFIG_MFD_SUPPORT=y +CONFIG_MFD_CORE=y +# CONFIG_MFD_88PM860X is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_ASIC3 is not set +# CONFIG_HTC_EGPIO is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_HTC_I2CPLD is not set +# CONFIG_TPS6105X is not set +# CONFIG_TPS65010 is not set +# CONFIG_TPS6507X is not set +CONFIG_MFD_TPS6586X=y +CONFIG_MFD_TPS65910=y +# CONFIG_MFD_TPS65912_I2C is not set +# CONFIG_MFD_TPS65912_SPI is not set +# CONFIG_TWL4030_CORE is not set +# CONFIG_MFD_STMPE is not set +# CONFIG_MFD_TC3589X is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_T7L66XB is not set +# CONFIG_MFD_TC6387XB is not set +# CONFIG_MFD_TC6393XB is not set +# CONFIG_PMIC_DA903X is not set +# CONFIG_PMIC_ADP5520 is not set +# CONFIG_MFD_MAX8925 is not set +# CONFIG_MFD_MAX8997 is not set +# CONFIG_MFD_MAX8998 is not set +# CONFIG_MFD_MAX8907C is not set +CONFIG_MFD_MAX77663=y +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM831X_I2C is not set +# CONFIG_MFD_WM831X_SPI is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_WM8994 is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_MFD_MC13XXX is not set +# CONFIG_ABX500_CORE is not set +# CONFIG_EZX_PCAP is not set +# CONFIG_MFD_TIMBERDALE is not set +# CONFIG_LPC_SCH is not set +# CONFIG_MFD_RDC321X is not set +# CONFIG_MFD_JANZ_CMODIO is not set +# CONFIG_MFD_VX855 is not set +# CONFIG_MFD_WL1273_CORE is not set +# CONFIG_MFD_AAT2870_CORE is not set +CONFIG_MFD_TPS6591X=y +# CONFIG_MFD_TPS65090 is not set +# CONFIG_MFD_RC5T583 is not set +CONFIG_MFD_TPS80031=y +CONFIG_GPADC_TPS80031=y +CONFIG_MFD_RICOH583=y +CONFIG_REGULATOR=y +# CONFIG_REGULATOR_DEBUG is not set +# CONFIG_REGULATOR_DUMMY is not set +CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_REGULATOR_VIRTUAL_CONSUMER=y +# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set +# CONFIG_REGULATOR_GPIO is not set +# CONFIG_REGULATOR_BQ24022 is not set +# CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX8649 is not set +# CONFIG_REGULATOR_MAX8660 is not set +# CONFIG_REGULATOR_MAX8952 is not set +CONFIG_REGULATOR_MAX77663=y +# CONFIG_REGULATOR_LP3971 is not set +# CONFIG_REGULATOR_LP3972 is not set +# CONFIG_REGULATOR_TPS65023 is not set +# CONFIG_REGULATOR_TPS6507X is not set +# CONFIG_REGULATOR_ISL6271A is not set +# CONFIG_REGULATOR_AD5398 is not set +CONFIG_REGULATOR_TPS6586X=y +# CONFIG_REGULATOR_TPS6524X is not set +CONFIG_REGULATOR_TPS65910=y +CONFIG_REGULATOR_TPS62360=y +CONFIG_REGULATOR_TPS6591X=y +CONFIG_REGULATOR_TPS80031=y +CONFIG_REGULATOR_RICOH583=y +# CONFIG_REGULATOR_FAN53555 is not set +CONFIG_MEDIA_SUPPORT=y + +# +# Multimedia core support +# +# CONFIG_MEDIA_CONTROLLER is not set +CONFIG_VIDEO_DEV=y +CONFIG_VIDEO_V4L2_COMMON=y +# CONFIG_DVB_CORE is not set +CONFIG_VIDEO_MEDIA=y + +# +# Multimedia drivers +# +# CONFIG_RC_CORE is not set +CONFIG_MEDIA_TUNER=y +# CONFIG_MEDIA_TUNER_CUSTOMISE is not set +CONFIG_MEDIA_TUNER_SIMPLE=y +CONFIG_MEDIA_TUNER_TDA8290=y +CONFIG_MEDIA_TUNER_TDA827X=y +CONFIG_MEDIA_TUNER_TDA18271=y +CONFIG_MEDIA_TUNER_TDA9887=y +CONFIG_MEDIA_TUNER_TEA5761=y +CONFIG_MEDIA_TUNER_TEA5767=y +CONFIG_MEDIA_TUNER_MT20XX=y +CONFIG_MEDIA_TUNER_XC2028=y +CONFIG_MEDIA_TUNER_XC5000=y +CONFIG_MEDIA_TUNER_XC4000=y +CONFIG_MEDIA_TUNER_MC44S803=y +CONFIG_VIDEO_V4L2=y +CONFIG_VIDEO_CAPTURE_DRIVERS=y +# CONFIG_VIDEO_ADV_DEBUG is not set +# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set +CONFIG_VIDEO_HELPER_CHIPS_AUTO=y + +# +# Audio decoders, processors and mixers +# + +# +# RDS decoders +# + +# +# Video decoders +# + +# +# Video and audio decoders +# + +# +# MPEG video encoders +# + +# +# Video encoders +# + +# +# Camera sensor devices +# + +# +# Flash devices +# + +# +# Video improvement chips +# + +# +# Miscelaneous helper chips +# +CONFIG_TEGRA_RPC=y +# CONFIG_TEGRA_AVP is not set +# CONFIG_TEGRA_MEDIASERVER is not set +CONFIG_TEGRA_NVAVP=y +CONFIG_TEGRA_CAMERA=y +CONFIG_VIDEO_MI1040=y +CONFIG_TEGRA_DTV=y +# CONFIG_VIDEO_OV5650 is not set +# CONFIG_VIDEO_OV14810 is not set +# CONFIG_VIDEO_OV9726 is not set +# CONFIG_VIDEO_OV2710 is not set +# CONFIG_VIDEO_AR0832 is not set +# CONFIG_VIDEO_SOC380 is not set +# CONFIG_TORCH_SSL3250A is not set +# CONFIG_TORCH_TPS61050 is not set +# CONFIG_VIDEO_SH532U is not set +# CONFIG_VIDEO_AD5820 is not set +# CONFIG_VIDEO_CPIA2 is not set +# CONFIG_VIDEO_SAA7134 is not set +# CONFIG_VIDEO_MXB is not set +# CONFIG_VIDEO_HEXIUM_ORION is not set +# CONFIG_VIDEO_HEXIUM_GEMINI is not set +# CONFIG_VIDEO_CAFE_CCIC is not set +# CONFIG_VIDEO_SR030PC30 is not set +# CONFIG_VIDEO_NOON010PC30 is not set +# CONFIG_SOC_CAMERA is not set +CONFIG_V4L_USB_DRIVERS=y +CONFIG_USB_VIDEO_CLASS=y +CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y +# CONFIG_USB_GSPCA is not set +# CONFIG_VIDEO_PVRUSB2 is not set +# CONFIG_VIDEO_HDPVR is not set +# CONFIG_VIDEO_EM28XX is not set +# CONFIG_VIDEO_USBVISION is not set +# CONFIG_USB_ET61X251 is not set +# CONFIG_USB_SN9C102 is not set +# CONFIG_USB_PWC is not set +# CONFIG_USB_ZR364XX is not set +# CONFIG_USB_STKWEBCAM is not set +# CONFIG_USB_S2255 is not set +# CONFIG_V4L_MEM2MEM_DRIVERS is not set +# CONFIG_RADIO_ADAPTERS is not set + +# +# Graphics support +# +CONFIG_VGA_ARB=y +CONFIG_VGA_ARB_MAX_GPUS=16 +# CONFIG_DRM is not set +# CONFIG_STUB_POULSBO is not set +# CONFIG_ION is not set +# CONFIG_VGASTATE is not set +CONFIG_VIDEO_OUTPUT_CONTROL=y +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_WMT_GE_ROPS is not set +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +# CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_S3 is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_VT8623 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_ARK is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_CARMINE is not set +# CONFIG_FB_TMIO is not set +# CONFIG_FB_UDL is not set +# CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set + +# +# NVIDIA Tegra Display Driver options +# +CONFIG_TEGRA_GRHOST=y +CONFIG_TEGRA_DC=y +CONFIG_FB_TEGRA=y +CONFIG_TEGRA_DC_EXTENSIONS=y +CONFIG_TEGRA_SD_GEN2=y +CONFIG_TEGRA_NVMAP=y +CONFIG_NVMAP_RECLAIM_UNPINNED_VM=y +CONFIG_NVMAP_ALLOW_SYSMEM=y +# CONFIG_NVMAP_HIGHMEM_ONLY is not set +# CONFIG_NVMAP_CARVEOUT_KILLER is not set +CONFIG_NVMAP_CARVEOUT_COMPACTOR=y +# CONFIG_NVMAP_VPR is not set +CONFIG_TEGRA_DSI=y +CONFIG_NVMAP_CONVERT_CARVEOUT_TO_IOVMM=y +CONFIG_TEGRA_NVHDCP=y +# CONFIG_TEGRA_HDMI_74MHZ_LIMIT is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LCD_CLASS_DEVICE=y +# CONFIG_LCD_L4F00242T03 is not set +# CONFIG_LCD_LMS283GF05 is not set +# CONFIG_LCD_LTV350QV is not set +# CONFIG_LCD_TDO24M is not set +# CONFIG_LCD_VGG2432A4 is not set +# CONFIG_LCD_PLATFORM is not set +# CONFIG_LCD_S6E63M0 is not set +# CONFIG_LCD_LD9040 is not set +# CONFIG_LCD_AMS369FG06 is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_GENERIC is not set +CONFIG_BACKLIGHT_PWM=y +CONFIG_BACKLIGHT_TEGRA_PWM=y +# CONFIG_BACKLIGHT_ADP8860 is not set +# CONFIG_BACKLIGHT_ADP8870 is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_LOGO is not set +CONFIG_SOUND=y +# CONFIG_SOUND_OSS_CORE is not set +CONFIG_SND=y +CONFIG_SND_TIMER=y +CONFIG_SND_PCM=y +CONFIG_SND_HWDEP=y +CONFIG_SND_RAWMIDI=y +CONFIG_SND_JACK=y +# CONFIG_SND_SEQUENCER is not set +# CONFIG_SND_MIXER_OSS is not set +# CONFIG_SND_PCM_OSS is not set +# CONFIG_SND_HRTIMER is not set +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SUPPORT_OLD_API=y +CONFIG_SND_VERBOSE_PROCFS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y +# CONFIG_SND_RAWMIDI_SEQ is not set +# CONFIG_SND_OPL3_LIB_SEQ is not set +# CONFIG_SND_OPL4_LIB_SEQ is not set +# CONFIG_SND_SBAWE_SEQ is not set +# CONFIG_SND_EMU10K1_SEQ is not set +CONFIG_SND_DRIVERS=y +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_ALOOP is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set +CONFIG_SND_PCI=y +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ALS300 is not set +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_OXYGEN is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS5535AUDIO is not set +# CONFIG_SND_CTXFI is not set +# CONFIG_SND_DARLA20 is not set +# CONFIG_SND_GINA20 is not set +# CONFIG_SND_LAYLA20 is not set +# CONFIG_SND_DARLA24 is not set +# CONFIG_SND_GINA24 is not set +# CONFIG_SND_LAYLA24 is not set +# CONFIG_SND_MONA is not set +# CONFIG_SND_MIA is not set +# CONFIG_SND_ECHO3G is not set +# CONFIG_SND_INDIGO is not set +# CONFIG_SND_INDIGOIO is not set +# CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_INDIGOIOX is not set +# CONFIG_SND_INDIGODJX is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_EMU10K1X is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_FM801 is not set +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_PREALLOC_SIZE=64 +# CONFIG_SND_HDA_HWDEP is not set +# CONFIG_SND_HDA_INPUT_BEEP is not set +# CONFIG_SND_HDA_INPUT_JACK is not set +# CONFIG_SND_HDA_PATCH_LOADER is not set +CONFIG_SND_HDA_PLATFORM_DRIVER=y +CONFIG_SND_HDA_PLATFORM_NVIDIA_TEGRA=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_ENABLE_REALTEK_QUIRKS=y +CONFIG_SND_HDA_CODEC_ANALOG=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_VIA=y +CONFIG_SND_HDA_CODEC_HDMI=y +CONFIG_SND_HDA_CODEC_CIRRUS=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_HDA_CODEC_CA0110=y +CONFIG_SND_HDA_CODEC_CA0132=y +CONFIG_SND_HDA_CODEC_CMEDIA=y +CONFIG_SND_HDA_CODEC_SI3054=y +CONFIG_SND_HDA_GENERIC=y +CONFIG_SND_HDA_POWER_SAVE=y +CONFIG_SND_HDA_POWER_SAVE_DEFAULT=10 +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_LOLA is not set +# CONFIG_SND_LX6464ES is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_PCXHR is not set +# CONFIG_SND_RIPTIDE is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VIRTUOSO is not set +# CONFIG_SND_VX222 is not set +# CONFIG_SND_YMFPCI is not set +CONFIG_SND_ARM=y +CONFIG_SND_SPI=y +CONFIG_SND_USB=y +CONFIG_SND_USB_AUDIO=y +# CONFIG_SND_USB_UA101 is not set +# CONFIG_SND_USB_CAIAQ is not set +# CONFIG_SND_USB_6FIRE is not set +CONFIG_SND_SOC=y +# CONFIG_SND_SOC_CACHE_LZO is not set +CONFIG_SND_SOC_TEGRA=y +CONFIG_SND_SOC_TEGRA30_AHUB=y +CONFIG_SND_SOC_TEGRA30_DAM=y +CONFIG_SND_SOC_TEGRA30_I2S=y +CONFIG_SND_SOC_TEGRA30_SPDIF=y +CONFIG_MACH_HAS_SND_SOC_TEGRA_WM8903=y +# CONFIG_SND_SOC_TEGRA_WM8903 is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_TLV320AIC326X=y +# CONFIG_SND_SOC_TEGRA_TLV320AIC326X is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_RT5639=y +# CONFIG_SND_SOC_TEGRA_RT5639 is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_RT5640=y +CONFIG_SND_SOC_TEGRA_RT5640=y +CONFIG_MACH_HAS_SND_SOC_TEGRA_MAX98095=y +# CONFIG_SND_SOC_TEGRA_MAX98095 is not set +CONFIG_HEADSET_FUNCTION=y +CONFIG_SND_SOC_I2C_AND_SPI=y +# CONFIG_SND_SOC_ALL_CODECS is not set +CONFIG_SND_SOC_RT5640=y +CONFIG_SND_SOC_RT5642=y +CONFIG_SND_SOC_SPDIF=y +# CONFIG_SND_SOC_TLV320AIC326X is not set +# CONFIG_SOUND_PRIME is not set +CONFIG_HID_SUPPORT=y +CONFIG_HID=y +# CONFIG_HIDRAW is not set +CONFIG_UHID=y + +# +# USB Input Devices +# +CONFIG_USB_HID=y +# CONFIG_HID_PID is not set +CONFIG_USB_HIDDEV=y + +# +# Special HID drivers +# +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +# CONFIG_HID_PRODIKEYS is not set +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HOLTEK_FF=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_LOGIWII_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +# CONFIG_HID_PICOLCD is not set +# CONFIG_HID_QUANTA is not set +# CONFIG_HID_ROCCAT is not set +# CONFIG_HID_SAMSUNG is not set +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_HID_WACOM=y +# CONFIG_HID_WACOM_POWER_SUPPLY is not set +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_ZEROPLUS_FF=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +CONFIG_USB_DEVICE_CLASS=y +# CONFIG_USB_DYNAMIC_MINORS is not set +CONFIG_USB_SUSPEND=y +CONFIG_USB_OTG=y +# CONFIG_USB_OTG_WHITELIST is not set +# CONFIG_USB_OTG_BLACKLIST_HUB is not set +# CONFIG_USB_MON is not set +# CONFIG_USB_WUSB is not set +# CONFIG_USB_WUSB_CBAF is not set + +# +# USB Host Controller Drivers +# +# CONFIG_USB_C67X00_HCD is not set +# CONFIG_USB_XHCI_HCD is not set +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_ROOT_HUB_TT=y +CONFIG_USB_EHCI_TT_NEWSCHED=y +CONFIG_USB_EHCI_TEGRA=y +# CONFIG_USB_OXU210HP_HCD is not set +# CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set +# CONFIG_USB_ISP1362_HCD is not set +# CONFIG_USB_OHCI_HCD is not set +# CONFIG_USB_UHCI_HCD is not set +# CONFIG_USB_SL811_HCD is not set +# CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_WHCI_HCD is not set +# CONFIG_USB_HWA_HCD is not set +# CONFIG_USB_EHCI_ONOFF_FEATURE is not set +# CONFIG_USB_MUSB_HDRC is not set + +# +# USB Device Class drivers +# +CONFIG_USB_ACM=y +# CONFIG_USB_PRINTER is not set +CONFIG_USB_WDM=y +# CONFIG_USB_TMC is not set + +# +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may +# + +# +# also be needed; see USB_STORAGE Help for more info +# +CONFIG_USB_STORAGE=y +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_REALTEK is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_USBAT is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set +# CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set +# CONFIG_USB_STORAGE_ENE_UB6250 is not set +# CONFIG_USB_UAS is not set +CONFIG_USB_LIBUSUAL=y + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB port drivers +# +CONFIG_USB_SERIAL=y +# CONFIG_USB_SERIAL_CONSOLE is not set +# CONFIG_USB_EZUSB is not set +# CONFIG_USB_SERIAL_GENERIC is not set +# CONFIG_USB_SERIAL_AIRCABLE is not set +# CONFIG_USB_SERIAL_ARK3116 is not set +# CONFIG_USB_SERIAL_BELKIN is not set +# CONFIG_USB_SERIAL_CH341 is not set +# CONFIG_USB_SERIAL_WHITEHEAT is not set +# CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set +# CONFIG_USB_SERIAL_CP210X is not set +# CONFIG_USB_SERIAL_CYPRESS_M8 is not set +# CONFIG_USB_SERIAL_EMPEG is not set +# CONFIG_USB_SERIAL_FTDI_SIO is not set +# CONFIG_USB_SERIAL_FUNSOFT is not set +# CONFIG_USB_SERIAL_VISOR is not set +# CONFIG_USB_SERIAL_IPAQ is not set +# CONFIG_USB_SERIAL_IR is not set +# CONFIG_USB_SERIAL_EDGEPORT is not set +# CONFIG_USB_SERIAL_EDGEPORT_TI is not set +# CONFIG_USB_SERIAL_GARMIN is not set +# CONFIG_USB_SERIAL_IPW is not set +# CONFIG_USB_SERIAL_IUU is not set +# CONFIG_USB_SERIAL_KEYSPAN_PDA is not set +# CONFIG_USB_SERIAL_KEYSPAN is not set +# CONFIG_USB_SERIAL_KLSI is not set +# CONFIG_USB_SERIAL_KOBIL_SCT is not set +# CONFIG_USB_SERIAL_MCT_U232 is not set +# CONFIG_USB_SERIAL_MOS7720 is not set +# CONFIG_USB_SERIAL_MOS7840 is not set +# CONFIG_USB_SERIAL_MOTOROLA is not set +# CONFIG_USB_SERIAL_NAVMAN is not set +CONFIG_USB_SERIAL_PL2303=y +# CONFIG_USB_SERIAL_OTI6858 is not set +# CONFIG_USB_SERIAL_QCAUX is not set +# CONFIG_USB_SERIAL_QUALCOMM is not set +# CONFIG_USB_SERIAL_SPCP8X5 is not set +# CONFIG_USB_SERIAL_HP4X is not set +# CONFIG_USB_SERIAL_SAFE is not set +# CONFIG_USB_SERIAL_SIEMENS_MPI is not set +# CONFIG_USB_SERIAL_SIERRAWIRELESS is not set +# CONFIG_USB_SERIAL_SYMBOL is not set +# CONFIG_USB_SERIAL_TI is not set +# CONFIG_USB_SERIAL_CYBERJACK is not set +# CONFIG_USB_SERIAL_XIRCOM is not set +CONFIG_USB_SERIAL_WWAN=y +CONFIG_USB_SERIAL_OPTION=y +# CONFIG_USB_SERIAL_OMNINET is not set +# CONFIG_USB_SERIAL_OPTICON is not set +# CONFIG_USB_SERIAL_VIVOPAY_SERIAL is not set +# CONFIG_USB_SERIAL_ZIO is not set +# CONFIG_USB_SERIAL_SSU100 is not set +# CONFIG_USB_SERIAL_DEBUG is not set +CONFIG_USB_SERIAL_BASEBAND=y + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_ADUTUX is not set +# CONFIG_USB_SEVSEG is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYPRESS_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set +# CONFIG_USB_APPLEDISPLAY is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TRANCEVIBRATOR is not set +# CONFIG_USB_IOWARRIOR is not set +# CONFIG_USB_TEST is not set +# CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_YUREX is not set +CONFIG_USB_GADGET=y +# CONFIG_USB_GADGET_DEBUG is not set +# CONFIG_USB_GADGET_DEBUG_FILES is not set +# CONFIG_USB_GADGET_DEBUG_FS is not set +CONFIG_USB_GADGET_VBUS_DRAW=500 +CONFIG_USB_FSL_USB2=y +# CONFIG_USB_FUSB300 is not set +# CONFIG_USB_R8A66597 is not set +# CONFIG_USB_M66592 is not set +# CONFIG_USB_AMD5536UDC is not set +# CONFIG_USB_CI13XXX_PCI is not set +# CONFIG_USB_NET2272 is not set +# CONFIG_USB_NET2280 is not set +# CONFIG_USB_GOKU is not set +# CONFIG_USB_LANGWELL is not set +# CONFIG_USB_EG20T is not set +# CONFIG_USB_DUMMY_HCD is not set +CONFIG_USB_GADGET_DUALSPEED=y +# CONFIG_USB_ZERO is not set +# CONFIG_USB_AUDIO is not set +# CONFIG_USB_ETH is not set +# CONFIG_USB_G_NCM is not set +# CONFIG_USB_GADGETFS is not set +# CONFIG_USB_FUNCTIONFS is not set +# CONFIG_USB_FILE_STORAGE is not set +# CONFIG_USB_MASS_STORAGE is not set +# CONFIG_USB_G_SERIAL is not set +# CONFIG_USB_MIDI_GADGET is not set +# CONFIG_USB_G_PRINTER is not set +CONFIG_USB_G_ANDROID=y +# CONFIG_USB_CDC_COMPOSITE is not set +# CONFIG_USB_G_MULTI is not set +# CONFIG_USB_G_HID is not set +# CONFIG_USB_G_DBGP is not set +# CONFIG_USB_G_WEBCAM is not set + +# +# OTG and related infrastructure +# +CONFIG_USB_OTG_UTILS=y +# CONFIG_USB_OTG_WAKELOCK is not set +# CONFIG_USB_GPIO_VBUS is not set +CONFIG_USB_ULPI=y +CONFIG_USB_ULPI_VIEWPORT=y +# CONFIG_NOP_USB_XCEIV is not set +CONFIG_USB_TEGRA_OTG=y +# CONFIG_UWB is not set +CONFIG_MMC=y +# CONFIG_MMC_DEBUG is not set +CONFIG_MMC_UNSAFE_RESUME=y +# CONFIG_MMC_CLKGATE is not set +CONFIG_MMC_EMBEDDED_SDIO=y +# CONFIG_MMC_PARANOID_SD_INIT is not set + +# +# MMC/SD/SDIO Card Drivers +# +CONFIG_MMC_BLOCK=y +CONFIG_MMC_BLOCK_MINORS=16 +CONFIG_MMC_BLOCK_BOUNCE=y +CONFIG_MMC_BLOCK_DEFERRED_RESUME=y +# CONFIG_SDIO_UART is not set +CONFIG_MMC_TEST=y + +# +# MMC/SD/SDIO Host Controller Drivers +# +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_IO_ACCESSORS=y +CONFIG_MMC_SDHCI_NATIVE_BLOCKSIZE=y +# CONFIG_MMC_SDHCI_PCI is not set +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_SDHCI_TEGRA=y +# CONFIG_MMC_SDHCI_PXAV3 is not set +# CONFIG_MMC_SDHCI_PXAV2 is not set +# CONFIG_MMC_TIFM_SD is not set +# CONFIG_MMC_CB710 is not set +# CONFIG_MMC_VIA_SDMMC is not set +# CONFIG_MMC_DW is not set +# CONFIG_MMC_VUB300 is not set +# CONFIG_MMC_USHC is not set +# CONFIG_MEMSTICK is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y + +# +# LED drivers +# +# CONFIG_LEDS_LM3530 is not set +# CONFIG_LEDS_PCA9532 is not set +CONFIG_LEDS_GPIO=y +# CONFIG_LEDS_LP3944 is not set +# CONFIG_LEDS_LP5521 is not set +# CONFIG_LEDS_LP5523 is not set +# CONFIG_LEDS_PCA955X is not set +# CONFIG_LEDS_DAC124S085 is not set +# CONFIG_LEDS_PWM is not set +# CONFIG_LEDS_REGULATOR is not set +# CONFIG_LEDS_BD2802 is not set +# CONFIG_LEDS_LT3593 is not set +# CONFIG_LEDS_TRIGGERS is not set + +# +# LED Triggers +# +CONFIG_SWITCH=y +# CONFIG_SWITCH_GPIO is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_INFINIBAND is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_HCTOSYS_DEVICE="rtc0" +# CONFIG_RTC_DEBUG is not set + +# +# RTC interfaces +# +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +CONFIG_RTC_INTF_ALARM=y +CONFIG_RTC_INTF_ALARM_DEV=y +# CONFIG_RTC_DRV_TEST is not set + +# +# I2C RTC drivers +# +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_DS3232 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +CONFIG_RTC_DRV_MAX77663=y +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_ISL12022 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_BQ32K is not set +CONFIG_RTC_DRV_TPS6586X=y +# CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set +# CONFIG_RTC_DRV_RX8025 is not set +# CONFIG_RTC_DRV_EM3027 is not set +# CONFIG_RTC_DRV_RV3029C2 is not set + +# +# SPI RTC drivers +# +# CONFIG_RTC_DRV_M41T93 is not set +# CONFIG_RTC_DRV_M41T94 is not set +# CONFIG_RTC_DRV_DS1305 is not set +# CONFIG_RTC_DRV_DS1390 is not set +# CONFIG_RTC_DRV_MAX6902 is not set +# CONFIG_RTC_DRV_R9701 is not set +# CONFIG_RTC_DRV_RS5C348 is not set +# CONFIG_RTC_DRV_DS3234 is not set +# CONFIG_RTC_DRV_PCF2123 is not set + +# +# Platform RTC drivers +# +# CONFIG_RTC_DRV_CMOS is not set +# CONFIG_RTC_DRV_DS1286 is not set +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T35 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_MSM6242 is not set +# CONFIG_RTC_DRV_BQ4802 is not set +# CONFIG_RTC_DRV_RP5C01 is not set +# CONFIG_RTC_DRV_V3020 is not set + +# +# on-CPU RTC drivers +# +# CONFIG_RTC_DRV_TEGRA is not set +CONFIG_RTC_DRV_TPS6591x=y +CONFIG_RTC_DRV_TPS80031=y +CONFIG_RTC_DRV_RC5T583=y +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set +# CONFIG_UIO is not set + +# +# Virtio drivers +# +# CONFIG_VIRTIO_PCI is not set +# CONFIG_VIRTIO_BALLOON is not set +CONFIG_STAGING=y +# CONFIG_ET131X is not set +# CONFIG_USBIP_CORE is not set +# CONFIG_PRISM2_USB is not set +# CONFIG_ECHO is not set +# CONFIG_BRCMUTIL is not set +# CONFIG_ASUS_OLED is not set +# CONFIG_R8712U is not set +# CONFIG_RTS_PSTOR is not set +# CONFIG_TRANZPORT is not set + +# +# Android +# +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_LOGGER=y +CONFIG_ANDROID_RAM_CONSOLE=y +CONFIG_ANDROID_RAM_CONSOLE_ENABLE_VERBOSE=y +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION=y +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_DATA_SIZE=128 +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_ECC_SIZE=16 +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE=8 +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_POLYNOMIAL=0x11d +# CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT is not set +CONFIG_ANDROID_TIMED_OUTPUT=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +# CONFIG_POHMELFS is not set +# CONFIG_LINE6_USB is not set +# CONFIG_USB_SERIAL_QUATECH2 is not set +# CONFIG_USB_SERIAL_QUATECH_USB2 is not set +# CONFIG_VME_BUS is not set +# CONFIG_DX_SEP is not set +CONFIG_IIO=y +# CONFIG_IIO_ST_HWMON is not set +CONFIG_IIO_BUFFER=y +# CONFIG_IIO_SW_RING is not set +CONFIG_IIO_KFIFO_BUF=y +CONFIG_IIO_TRIGGER=y +CONFIG_IIO_CONSUMERS_PER_TRIGGER=2 + +# +# Accelerometers +# +# CONFIG_ADIS16201 is not set +# CONFIG_ADIS16203 is not set +# CONFIG_ADIS16204 is not set +# CONFIG_ADIS16209 is not set +# CONFIG_ADIS16220 is not set +# CONFIG_ADIS16240 is not set +# CONFIG_KXSD9 is not set +# CONFIG_LIS3L02DQ is not set + +# +# Analog to digital convertors +# +# CONFIG_AD7150 is not set +# CONFIG_AD7152 is not set +# CONFIG_AD7291 is not set +# CONFIG_AD7298 is not set +# CONFIG_AD7314 is not set +# CONFIG_AD7606 is not set +# CONFIG_AD799X is not set +# CONFIG_AD7476 is not set +# CONFIG_AD7887 is not set +# CONFIG_AD7780 is not set +# CONFIG_AD7793 is not set +# CONFIG_AD7745 is not set +# CONFIG_AD7816 is not set +# CONFIG_ADT75 is not set +# CONFIG_ADT7310 is not set +# CONFIG_ADT7410 is not set +# CONFIG_MAX1363 is not set + +# +# Analog digital bi-direction convertors +# +# CONFIG_ADT7316 is not set + +# +# Digital to analog convertors +# +# CONFIG_AD5624R_SPI is not set +# CONFIG_AD5446 is not set +# CONFIG_AD5504 is not set +# CONFIG_AD5791 is not set +# CONFIG_AD5686 is not set +# CONFIG_MAX517 is not set + +# +# Direct Digital Synthesis +# +# CONFIG_AD5930 is not set +# CONFIG_AD9832 is not set +# CONFIG_AD9834 is not set +# CONFIG_AD9850 is not set +# CONFIG_AD9852 is not set +# CONFIG_AD9910 is not set +# CONFIG_AD9951 is not set + +# +# Digital gyroscope sensors +# +# CONFIG_ADIS16060 is not set +# CONFIG_ADIS16080 is not set +# CONFIG_ADIS16130 is not set +# CONFIG_ADIS16260 is not set +# CONFIG_ADXRS450 is not set + +# +# Inertial measurement units +# +# CONFIG_ADIS16400 is not set +CONFIG_INV_MPU_IIO=y +# CONFIG_INV_IIO_MPU3050_ACCEL_SLAVE_BMA250 is not set + +# +# Light sensors +# +# CONFIG_SENSORS_ISL29018 is not set +CONFIG_SENSORS_ISL29028=y +# CONFIG_SENSORS_TSL2563 is not set +# CONFIG_TSL2583 is not set +CONFIG_SENSORS_LTR558=y + +# +# Magnetometer sensors +# +# CONFIG_SENSORS_HMC5843 is not set +# CONFIG_INV_YAS53X_IIO is not set +CONFIG_INV_AMI306_IIO=y + +# +# Active energy metering IC +# +# CONFIG_ADE7753 is not set +# CONFIG_ADE7754 is not set +# CONFIG_ADE7758 is not set +# CONFIG_ADE7759 is not set +# CONFIG_ADE7854 is not set + +# +# Resolver to digital converters +# +# CONFIG_AD2S90 is not set +# CONFIG_AD2S120X is not set +# CONFIG_AD2S1210 is not set + +# +# Triggers - standalone +# +# CONFIG_IIO_PERIODIC_RTC_TRIGGER is not set +# CONFIG_IIO_GPIO_TRIGGER is not set +# CONFIG_IIO_SYSFS_TRIGGER is not set +# CONFIG_IIO_SIMPLE_DUMMY is not set +# CONFIG_XVMALLOC is not set +# CONFIG_ZRAM is not set +# CONFIG_FB_SM7XX is not set +# CONFIG_VIDEO_DT3155 is not set +# CONFIG_CRYSTALHD is not set +# CONFIG_FB_XGI is not set +# CONFIG_EASYCAP is not set +# CONFIG_SOLO6X10 is not set +# CONFIG_ATH6K_LEGACY is not set +# CONFIG_BCM_WIMAX is not set +# CONFIG_FT1000 is not set + +# +# Speakup console speech +# +# CONFIG_TOUCHSCREEN_CLEARPAD_TM1217 is not set +# CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4 is not set +# CONFIG_ALTERA_STAPL is not set +# CONFIG_MFD_NVEC is not set +CONFIG_CLKDEV_LOOKUP=y +CONFIG_CLKSRC_MMIO=y +CONFIG_IOMMU_SUPPORT=y +# CONFIG_TEGRA_IOMMU_SMMU is not set +# CONFIG_VIRT_DRIVERS is not set +CONFIG_RIL=y + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_XATTR=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +# CONFIG_EXT4_DEBUG is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_JBD2=y +# CONFIG_JBD2_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_FILE_LOCKING=y +CONFIG_FSNOTIFY=y +# CONFIG_DNOTIFY is not set +CONFIG_INOTIFY_USER=y +# CONFIG_FANOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_QUOTACTL is not set +# CONFIG_AUTOFS4_FS is not set +CONFIG_FUSE_FS=y +# CONFIG_CUSE is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +CONFIG_NTFS_FS=y +# CONFIG_NTFS_DEBUG is not set +CONFIG_NTFS_RW=y + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_REPORT_PRESENT_CPUS=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_TMPFS_XATTR is not set +# CONFIG_HUGETLB_PAGE is not set +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_LOGFS is not set +# CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_PSTORE is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_F2FS_FS=y +CONFIG_F2FS_STAT_FS=y +CONFIG_F2FS_FS_XATTR=y +CONFIG_F2FS_FS_POSIX_ACL=y +CONFIG_F2FS_FS_SECURITY=y +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=y +# CONFIG_NFS_V3 is not set +CONFIG_NFS_V4=y +# CONFIG_NFS_V4_1 is not set +CONFIG_ROOT_NFS=y +# CONFIG_NFS_USE_LEGACY_DNS is not set +CONFIG_NFS_USE_KERNEL_DNS=y +# CONFIG_NFS_USE_NEW_IDMAPPER is not set +# CONFIG_NFSD is not set +CONFIG_LOCKD=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y +# CONFIG_CEPH_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=y +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set +CONFIG_DYNAMIC_FSYNC=y + +# +# Kernel hacking +# +CONFIG_PRINTK_TIME=y +CONFIG_DEFAULT_MESSAGE_LOGLEVEL=4 +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +CONFIG_MAGIC_SYSRQ=y +# CONFIG_STRIP_ASM_SYMS is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +# CONFIG_DEBUG_SECTION_MISMATCH is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_LOCKUP_DETECTOR=y +# CONFIG_HARDLOCKUP_DETECTOR is not set +# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0 +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +# CONFIG_DETECT_HUNG_TASK is not set +CONFIG_SCHED_DEBUG=y +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_KMEMLEAK is not set +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_SPARSE_RCU_POINTER is not set +# CONFIG_LOCK_STAT is not set +# CONFIG_DEBUG_ATOMIC_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_INFO_REDUCED is not set +CONFIG_DEBUG_VM=y +# CONFIG_DEBUG_WRITECOUNT is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_TEST_LIST_SORT is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_DEBUG_CREDENTIALS is not set +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_RCU_CPU_STALL_VERBOSE=y +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set +# CONFIG_DEBUG_PER_CPU_MAPS is not set +# CONFIG_LKDTM is not set +# CONFIG_CPU_NOTIFIER_ERROR_INJECT is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set +# CONFIG_DEBUG_PAGEALLOC is not set +CONFIG_NOP_TRACER=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_HAVE_C_RECORDMCOUNT=y +CONFIG_RING_BUFFER=y +CONFIG_EVENT_TRACING=y +# CONFIG_EVENT_POWER_TRACING_DEPRECATED is not set +CONFIG_CONTEXT_SWITCH_TRACER=y +CONFIG_RING_BUFFER_ALLOW_SWAP=y +CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y +CONFIG_FTRACE=y +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_BRANCH_PROFILE_NONE=y +# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set +# CONFIG_PROFILE_ALL_BRANCHES is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_RING_BUFFER_BENCHMARK is not set +# CONFIG_TRACELEVEL is not set +CONFIG_DYNAMIC_DEBUG=y +# CONFIG_DMA_API_DEBUG is not set +# CONFIG_ATOMIC64_SELFTEST is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +# CONFIG_TEST_KSTRTOX is not set +# CONFIG_STRICT_DEVMEM is not set +CONFIG_ARM_UNWIND=y +# CONFIG_DEBUG_USER is not set +# CONFIG_DEBUG_LL is not set +# CONFIG_OC_ETM is not set + +# +# Security options +# +CONFIG_KEYS=y +# CONFIG_KEYS_DEBUG_PROC_KEYS is not set +# CONFIG_SECURITY_DMESG_RESTRICT is not set +CONFIG_SECURITY=y +# CONFIG_SECURITYFS is not set +CONFIG_SECURITY_NETWORK=y +# CONFIG_SECURITY_NETWORK_XFRM is not set +# CONFIG_SECURITY_PATH is not set +CONFIG_LSM_MMAP_MIN_ADDR=4096 +CONFIG_SECURITY_SELINUX=y +# CONFIG_SECURITY_SELINUX_BOOTPARAM is not set +# CONFIG_SECURITY_SELINUX_DISABLE is not set +CONFIG_SECURITY_SELINUX_DEVELOP=y +CONFIG_SECURITY_SELINUX_AVC_STATS=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set +# CONFIG_SECURITY_TOMOYO is not set +# CONFIG_SECURITY_APPARMOR is not set +CONFIG_TRUSTED_FOUNDATIONS=y +# CONFIG_IMA is not set +CONFIG_DEFAULT_SECURITY_SELINUX=y +# CONFIG_DEFAULT_SECURITY_DAC is not set +CONFIG_DEFAULT_SECURITY="selinux" +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP2=y +CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y +CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_PCRYPT is not set +CONFIG_CRYPTO_WORKQUEUE=y +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=y + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set +# CONFIG_CRYPTO_VMAC is not set + +# +# Digest +# +CONFIG_CRYPTO_CRC32C=y +# CONFIG_CRYPTO_GHASH is not set +CONFIG_CRYPTO_MD4=y +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA1_ARM=y +CONFIG_CRYPTO_SHA256=y +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_AES_ARM=y +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_ARC4=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +CONFIG_CRYPTO_TWOFISH=y +CONFIG_CRYPTO_TWOFISH_COMMON=y + +# +# Compression +# +CONFIG_CRYPTO_DEFLATE=y +# CONFIG_CRYPTO_ZLIB is not set +# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_LZ4=y +# CONFIG_CRYPTO_LZ4HC is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +# CONFIG_CRYPTO_USER_API_HASH is not set +# CONFIG_CRYPTO_USER_API_SKCIPHER is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_HIFN_795X is not set +# CONFIG_CRYPTO_DEV_TEGRA_AES is not set +CONFIG_CRYPTO_DEV_TEGRA_SE=y +CONFIG_BINARY_PRINTF=y + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_CRC_CCITT=y +CONFIG_CRC16=y +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=y +# CONFIG_CRC8 is not set +CONFIG_AUDIT_GENERIC=y +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_LZO_COMPRESS=y +CONFIG_LZO_DECOMPRESS=y +CONFIG_LZ4_COMPRESS=y +CONFIG_LZ4_DECOMPRESS=y +# CONFIG_XZ_DEC is not set +# CONFIG_XZ_DEC_BCJ is not set +CONFIG_DECOMPRESS_GZIP=y +CONFIG_DECOMPRESS_LZ4=y +CONFIG_REED_SOLOMON=y +CONFIG_REED_SOLOMON_ENC8=y +CONFIG_REED_SOLOMON_DEC8=y +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=y +CONFIG_TEXTSEARCH_BM=y +CONFIG_TEXTSEARCH_FSM=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_CPU_RMAP=y +CONFIG_NLATTR=y +# CONFIG_AVERAGE is not set +# CONFIG_CORDIC is not set diff --git a/arch/arm/configs/motley_grouper_defconfig b/arch/arm/configs/motley_grouper_defconfig new file mode 100644 index 00000000000..d59826a235e --- /dev/null +++ b/arch/arm/configs/motley_grouper_defconfig @@ -0,0 +1,3353 @@ +CONFIG_ARM=y +CONFIG_HAVE_PWM=y +CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_HAVE_SCHED_CLOCK=y +CONFIG_GENERIC_GPIO=y +# CONFIG_ARCH_USES_GETTIMEOFFSET is not set +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +CONFIG_KTIME_SCALAR=y +CONFIG_HAVE_PROC_CPU=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_LOCKBREAK=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_ARCH_HAS_CPUFREQ=y +CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_NEED_DMA_MAP_STATE=y +CONFIG_FIQ=y +CONFIG_ARCH_PROVIDES_UDELAY=y +CONFIG_VECTORS_BASE=0xffff0000 +# CONFIG_ARM_PATCH_PHYS_VIRT is not set +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_HAVE_IRQ_WORK=y +CONFIG_IRQ_WORK=y + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_CROSS_COMPILE="" +CONFIG_LOCALVERSION="-motley" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_HAVE_KERNEL_GZIP=y +CONFIG_HAVE_KERNEL_LZMA=y +CONFIG_HAVE_KERNEL_LZO=y +# CONFIG_KERNEL_GZIP is not set +CONFIG_KERNEL_LZMA=y +# CONFIG_KERNEL_LZO is not set +CONFIG_DEFAULT_HOSTNAME="(none)" +CONFIG_SWAP=y +# CONFIG_SYSVIPC is not set +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_FHANDLE is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set +CONFIG_HAVE_GENERIC_HARDIRQS=y + +# +# IRQ subsystem +# +CONFIG_GENERIC_HARDIRQS=y +CONFIG_HAVE_SPARSE_IRQ=y +CONFIG_GENERIC_IRQ_SHOW=y +# CONFIG_SPARSE_IRQ is not set + +# +# RCU Subsystem +# +CONFIG_TREE_PREEMPT_RCU=y +CONFIG_PREEMPT_RCU=y +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_RCU_BOOST is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_FREEZER=y +# CONFIG_CGROUP_DEVICE is not set +# CONFIG_CPUSETS is not set +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +# CONFIG_CGROUP_MEM_RES_CTLR is not set +# CONFIG_CGROUP_PERF is not set +CONFIG_CGROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +# CONFIG_BLK_CGROUP is not set +# CONFIG_NAMESPACES is not set +CONFIG_SCHED_AUTOGROUP=y +# CONFIG_SYSFS_DEPRECATED is not set +# CONFIG_RELAY is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y +CONFIG_PANIC_TIMEOUT=10 +CONFIG_EXPERT=y +CONFIG_UID16=y +# CONFIG_SYSCTL_SYSCALL is not set +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +# CONFIG_ELF_CORE is not set +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_ASHMEM=y +CONFIG_AIO=y +CONFIG_EMBEDDED=y +CONFIG_HAVE_PERF_EVENTS=y +CONFIG_PERF_USE_VMALLOC=y + +# +# Kernel Performance Events And Counters +# +CONFIG_PERF_EVENTS=y +# CONFIG_PERF_COUNTERS is not set +# CONFIG_DEBUG_PERF_USE_VMALLOC is not set +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_PCI_QUIRKS=y +CONFIG_COMPAT_BRK=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y +CONFIG_OPROFILE=y +CONFIG_HAVE_OPROFILE=y +# CONFIG_KPROBES is not set +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_USE_GENERIC_SMP_HELPERS=y +CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y +CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +CONFIG_HAVE_HW_BREAKPOINT=y + +# +# GCOV-based kernel profiling +# +# CONFIG_GCOV_KERNEL is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_STOP_MACHINE=y +CONFIG_BLOCK=y +CONFIG_LBDAF=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_BSGLIB is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_IOSCHED_SIO=y +CONFIG_IOSCHED_VR=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_SIO=y +# CONFIG_DEFAULT_VR is not set +CONFIG_DEFAULT_IOSCHED="sio" +# CONFIG_INLINE_SPIN_TRYLOCK is not set +# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK is not set +# CONFIG_INLINE_SPIN_LOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK_IRQ is not set +# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set +# CONFIG_INLINE_SPIN_UNLOCK is not set +# CONFIG_INLINE_SPIN_UNLOCK_BH is not set +# CONFIG_INLINE_SPIN_UNLOCK_IRQ is not set +# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_READ_TRYLOCK is not set +# CONFIG_INLINE_READ_LOCK is not set +# CONFIG_INLINE_READ_LOCK_BH is not set +# CONFIG_INLINE_READ_LOCK_IRQ is not set +# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set +# CONFIG_INLINE_READ_UNLOCK is not set +# CONFIG_INLINE_READ_UNLOCK_BH is not set +# CONFIG_INLINE_READ_UNLOCK_IRQ is not set +# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_WRITE_TRYLOCK is not set +# CONFIG_INLINE_WRITE_LOCK is not set +# CONFIG_INLINE_WRITE_LOCK_BH is not set +# CONFIG_INLINE_WRITE_LOCK_IRQ is not set +# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set +# CONFIG_INLINE_WRITE_UNLOCK is not set +# CONFIG_INLINE_WRITE_UNLOCK_BH is not set +# CONFIG_INLINE_WRITE_UNLOCK_IRQ is not set +# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set +CONFIG_MUTEX_SPIN_ON_OWNER=y +CONFIG_FREEZER=y + +# +# System Type +# +CONFIG_MMU=y +# CONFIG_ARCH_INTEGRATOR is not set +# CONFIG_ARCH_REALVIEW is not set +# CONFIG_ARCH_VERSATILE is not set +# CONFIG_ARCH_VEXPRESS is not set +# CONFIG_ARCH_AT91 is not set +# CONFIG_ARCH_BCMRING is not set +# CONFIG_ARCH_CLPS711X is not set +# CONFIG_ARCH_CNS3XXX is not set +# CONFIG_ARCH_GEMINI is not set +# CONFIG_ARCH_PRIMA2 is not set +# CONFIG_ARCH_EBSA110 is not set +# CONFIG_ARCH_EP93XX is not set +# CONFIG_ARCH_FOOTBRIDGE is not set +# CONFIG_ARCH_MXC is not set +# CONFIG_ARCH_MXS is not set +# CONFIG_ARCH_NETX is not set +# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_IOP13XX is not set +# CONFIG_ARCH_IOP32X is not set +# CONFIG_ARCH_IOP33X is not set +# CONFIG_ARCH_IXP23XX is not set +# CONFIG_ARCH_IXP2000 is not set +# CONFIG_ARCH_IXP4XX is not set +# CONFIG_ARCH_DOVE is not set +# CONFIG_ARCH_KIRKWOOD is not set +# CONFIG_ARCH_LPC32XX is not set +# CONFIG_ARCH_MV78XX0 is not set +# CONFIG_ARCH_ORION5X is not set +# CONFIG_ARCH_MMP is not set +# CONFIG_ARCH_KS8695 is not set +# CONFIG_ARCH_W90X900 is not set +# CONFIG_ARCH_NUC93X is not set +CONFIG_ARCH_TEGRA=y +# CONFIG_ARCH_PNX4008 is not set +# CONFIG_ARCH_PXA is not set +# CONFIG_ARCH_MSM is not set +# CONFIG_ARCH_SHMOBILE is not set +# CONFIG_ARCH_RPC is not set +# CONFIG_ARCH_SA1100 is not set +# CONFIG_ARCH_S3C2410 is not set +# CONFIG_ARCH_S3C64XX is not set +# CONFIG_ARCH_S5P64X0 is not set +# CONFIG_ARCH_S5PC100 is not set +# CONFIG_ARCH_S5PV210 is not set +# CONFIG_ARCH_EXYNOS4 is not set +# CONFIG_ARCH_SHARK is not set +# CONFIG_ARCH_TCC_926 is not set +# CONFIG_ARCH_U300 is not set +# CONFIG_ARCH_U8500 is not set +# CONFIG_ARCH_NOMADIK is not set +# CONFIG_ARCH_DAVINCI is not set +# CONFIG_ARCH_OMAP is not set +# CONFIG_PLAT_SPEAR is not set +# CONFIG_ARCH_VT8500 is not set +# CONFIG_ARCH_ZYNQ is not set +CONFIG_GPIO_PCA953X=y +# CONFIG_KEYBOARD_GPIO_POLLED is not set + +# +# System MMU +# + +# +# NVIDIA Tegra options +# +CONFIG_ARCH_TEGRA_3x_SOC=y +CONFIG_ARCH_TEGRA_HAS_DUAL_3D=y +CONFIG_ARCH_TEGRA_HAS_DUAL_CPU_CLUSTERS=y +CONFIG_ARCH_TEGRA_HAS_PCIE=y +CONFIG_ARCH_TEGRA_HAS_SATA=y +CONFIG_TEGRA_PCI=y + +# +# Tegra board type +# +# CONFIG_MACH_TEGRA_DT is not set +# CONFIG_MACH_ARUBA is not set +CONFIG_MACH_CARDHU=y +# CONFIG_MACH_P1852 is not set +CONFIG_MACH_TEGRA_ENTERPRISE=y +# CONFIG_MACH_KAI is not set +CONFIG_MACH_GROUPER=y +CONFIG_TEGRA_SILICON_PLATFORM=y +# CONFIG_TEGRA_SIMULATION_PLATFORM is not set +# CONFIG_TEGRA_FPGA_PLATFORM is not set +CONFIG_TEGRA_DEBUG_UART_NONE=y +CONFIG_TEGRA_SYSTEM_DMA=y +CONFIG_TEGRA_PWM=y +CONFIG_TEGRA_FIQ_DEBUGGER=y +# CONFIG_TEGRA_CARDHU_DSI is not set +CONFIG_TEGRA_EMC_SCALING_ENABLE=y +CONFIG_VOLTAGE_CONTROL=y +CONFIG_GPU_OVERCLOCK=y +# CONFIG_GPU_OC_446 is not set +CONFIG_GPU_OC_484=y +# CONFIG_GPU_OC_520 is not set +CONFIG_TEGRA_CPU_DVFS=y +CONFIG_TEGRA_CORE_DVFS=y +CONFIG_TEGRA_IOVMM_SMMU=y +# CONFIG_TEGRA_SMMU_BASE_AT_E0000000 is not set +# CONFIG_TEGRA_IOVMM_SMMU_SYSFS is not set +CONFIG_TEGRA_IOVMM=y +CONFIG_TEGRA_AVP_KERNEL_ON_SMMU=y +CONFIG_TEGRA_THERMAL_THROTTLE=y +CONFIG_WIFI_CONTROL_FUNC=y +CONFIG_TEGRA_CLOCK_DEBUG_WRITE=y +CONFIG_TEGRA_CLUSTER_CONTROL=y +CONFIG_TEGRA_AUTO_HOTPLUG=y +CONFIG_TEGRA_MC_EARLY_ACK=y +CONFIG_TEGRA_MC_PROFILE=y +CONFIG_TEGRA_EDP_LIMITS=y +CONFIG_TEGRA_EMC_TO_DDR_CLOCK=1 +# CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND is not set +CONFIG_USB_HOTPLUG=y +CONFIG_TEGRA_DYNAMIC_PWRDET=y +CONFIG_TEGRA_EDP_EXACT_FREQ=y +# CONFIG_TEGRA_USB_MODEM_POWER is not set +# CONFIG_TEGRA_BB_XMM_POWER is not set +# CONFIG_TEGRA_BB_XMM_POWER2 is not set +# CONFIG_TEGRA_THERMAL_SYSFS is not set +CONFIG_TEGRA_PLLM_RESTRICTED=y +# CONFIG_TEGRA_WDT_RECOVERY is not set +CONFIG_TEGRA_LP2_ARM_TWD=y +CONFIG_TEGRA_SLOW_CSITE=y +# CONFIG_TEGRA_PREINIT_CLOCKS is not set + +# +# Processor Type +# +CONFIG_CPU_V7=y +CONFIG_CPU_32v6K=y +CONFIG_CPU_32v7=y +CONFIG_CPU_ABRT_EV7=y +CONFIG_CPU_PABRT_V7=y +CONFIG_CPU_CACHE_V7=y +CONFIG_CPU_CACHE_VIPT=y +CONFIG_CPU_COPY_V6=y +CONFIG_CPU_TLB_V7=y +CONFIG_CPU_HAS_ASID=y +CONFIG_CPU_CP15=y +CONFIG_CPU_CP15_MMU=y + +# +# Processor Features +# +CONFIG_ARM_THUMB=y +# CONFIG_ARM_THUMBEE is not set +CONFIG_SWP_EMULATE=y +# CONFIG_CPU_ICACHE_DISABLE is not set +# CONFIG_CPU_DCACHE_DISABLE is not set +# CONFIG_CPU_BPREDICT_DISABLE is not set +CONFIG_OUTER_CACHE=y +CONFIG_OUTER_CACHE_SYNC=y +CONFIG_CACHE_L2X0=y +CONFIG_CACHE_PL310=y +CONFIG_ARM_L1_CACHE_SHIFT=5 +CONFIG_ARM_DMA_MEM_BUFFERABLE=y +CONFIG_ARM_SAVE_DEBUG_CONTEXT=y +CONFIG_CPA=y +CONFIG_CPU_HAS_PMU=y +# CONFIG_ARM_ERRATA_430973 is not set +# CONFIG_ARM_ERRATA_458693 is not set +# CONFIG_ARM_ERRATA_460075 is not set +CONFIG_ARM_ERRATA_742230=y +# CONFIG_ARM_ERRATA_742231 is not set +# CONFIG_PL310_ERRATA_588369 is not set +# CONFIG_ARM_ERRATA_720789 is not set +# CONFIG_PL310_ERRATA_727915 is not set +CONFIG_ARM_ERRATA_743622=y +CONFIG_ARM_ERRATA_751472=y +# CONFIG_ARM_ERRATA_753970 is not set +CONFIG_ARM_ERRATA_754322=y +# CONFIG_ARM_ERRATA_754327 is not set +# CONFIG_ARM_ERRATA_764369 is not set +# CONFIG_ARM_ERRATA_720791 is not set +CONFIG_ARM_ERRATA_752520=y +# CONFIG_PL310_ERRATA_769419 is not set +CONFIG_ARM_GIC=y +CONFIG_FIQ_GLUE=y +CONFIG_FIQ_DEBUGGER=y +# CONFIG_FIQ_DEBUGGER_NO_SLEEP is not set +# CONFIG_FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON is not set +CONFIG_FIQ_DEBUGGER_CONSOLE=y +# CONFIG_FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE is not set +CONFIG_GIC_SET_MULTIPLE_CPUS=y + +# +# Bus support +# +CONFIG_PCI=y +CONFIG_PCI_SYSCALL=y +CONFIG_ARCH_SUPPORTS_MSI=y +CONFIG_PCI_MSI=y +# CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_STUB is not set +# CONFIG_PCI_IOV is not set +# CONFIG_PCCARD is not set + +# +# Kernel Features +# +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_SMP=y +CONFIG_SMP_ON_UP=y +CONFIG_HAVE_ARM_SCU=y +CONFIG_HAVE_ARM_TWD=y +CONFIG_VMSPLIT_3G=y +# CONFIG_VMSPLIT_2G is not set +# CONFIG_VMSPLIT_1G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_TASK_SIZE_3G_LESS_16M=y +# CONFIG_TASK_SIZE_3G_LESS_24M is not set +CONFIG_TASK_SIZE=0xBF000000 +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_LOCAL_TIMERS=y +CONFIG_ARCH_NR_GPIO=512 +# CONFIG_PREEMPT_NONE is not set +# CONFIG_PREEMPT_VOLUNTARY is not set +CONFIG_PREEMPT=y +CONFIG_PREEMPT_COUNT=y +CONFIG_HZ=100 +# CONFIG_THUMB2_KERNEL is not set +CONFIG_AEABI=y +# CONFIG_OABI_COMPAT is not set +# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set +# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set +CONFIG_HAVE_ARCH_PFN_VALID=y +CONFIG_HIGHMEM=y +# CONFIG_HIGHPTE is not set +CONFIG_HW_PERF_EVENTS=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_HAVE_MEMBLOCK=y +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_COMPACTION is not set +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_BOUNCE=y +CONFIG_VIRT_TO_BUS=y +CONFIG_KSM=y +CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 +# CONFIG_CLEANCACHE is not set +CONFIG_FORCE_MAX_ZONEORDER=11 +CONFIG_ALIGNMENT_TRAP=y +# CONFIG_UACCESS_WITH_MEMCPY is not set +# CONFIG_SECCOMP is not set +# CONFIG_CC_STACKPROTECTOR is not set +# CONFIG_DEPRECATED_PARAM_STRUCT is not set +CONFIG_ARM_FLUSH_CONSOLE_ON_RESTART=y + +# +# Boot options +# +# CONFIG_USE_OF is not set +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_CMDLINE="tegra_wdt.heartbeat=30" +# CONFIG_CMDLINE_FROM_BOOTLOADER is not set +CONFIG_CMDLINE_EXTEND=y +# CONFIG_CMDLINE_FORCE is not set +# CONFIG_XIP_KERNEL is not set +CONFIG_KEXEC=y +CONFIG_ATAGS_PROC=y +CONFIG_KEXEC_HARDBOOT=y +# CONFIG_CRASH_DUMP is not set +# CONFIG_AUTO_ZRELADDR is not set + +# +# CPU Power Management +# + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +CONFIG_CPU_FREQ_STAT=y +# CONFIG_CPU_FREQ_STAT_DETAILS is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE=y +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_INTERACTIVE=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +# CONFIG_CPU_FREQ_GOV_LULZACTIVE is not set +# CONFIG_CPU_FREQ_GOV_PEGASUSQ is not set + +# +# ARM CPU frequency scaling drivers +# +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y + +# +# Floating point emulation +# + +# +# At least one emulation must be selected +# +CONFIG_VFP=y +CONFIG_VFPv3=y +CONFIG_NEON=y + +# +# Userspace binary formats +# +CONFIG_BINFMT_ELF=y +CONFIG_HAVE_AOUT=y +# CONFIG_BINFMT_AOUT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Power management options +# +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +CONFIG_HAS_WAKELOCK=y +CONFIG_HAS_EARLYSUSPEND=y +CONFIG_WAKELOCK=y +CONFIG_WAKELOCK_STAT=y +CONFIG_USER_WAKELOCK=y +CONFIG_EARLYSUSPEND=y +# CONFIG_NO_USER_SPACE_SCREEN_ACCESS_CONTROL is not set +CONFIG_FB_EARLYSUSPEND=y +CONFIG_PM_SLEEP=y +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_RUNTIME=y +CONFIG_PM=y +# CONFIG_PM_DEBUG is not set +# CONFIG_APM_EMULATION is not set +CONFIG_PM_CLK=y +CONFIG_SUSPEND_TIME=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +CONFIG_XFRM_IPCOMP=y +CONFIG_NET_KEY=y +# CONFIG_NET_KEY_MIGRATE is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +# CONFIG_IP_FIB_TRIE_STATS is not set +CONFIG_IP_MULTIPLE_TABLES=y +# CONFIG_IP_ROUTE_MULTIPATH is not set +# CONFIG_IP_ROUTE_VERBOSE is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +# CONFIG_NET_IPIP is not set +CONFIG_NET_IPGRE_DEMUX=y +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +CONFIG_INET_AH=y +CONFIG_INET_ESP=y +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +CONFIG_INET_TUNNEL=y +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +CONFIG_IPV6=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +# CONFIG_IPV6_ROUTE_INFO is not set +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_INET6_XFRM_TUNNEL=y +CONFIG_INET6_TUNNEL=y +CONFIG_INET6_XFRM_MODE_TRANSPORT=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_INET6_XFRM_MODE_BEET=y +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=y +# CONFIG_IPV6_SIT_6RD is not set +CONFIG_IPV6_NDISC_NODETYPE=y +CONFIG_IPV6_TUNNEL=y +CONFIG_IPV6_MULTIPLE_TABLES=y +# CONFIG_IPV6_SUBTREES is not set +# CONFIG_IPV6_MROUTE is not set +CONFIG_ANDROID_PARANOID_NETWORK=y +CONFIG_NET_ACTIVITY_STATS=y +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETWORK_PHY_TIMESTAMPING is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_ADVANCED=y + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +# CONFIG_NF_CONNTRACK_TIMESTAMP is not set +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_GRE=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_BROADCAST=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +# CONFIG_NF_CONNTRACK_SNMP is not set +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +# CONFIG_NF_CONNTRACK_SIP is not set +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_TPROXY=y +CONFIG_NETFILTER_XTABLES=y + +# +# Xtables combined modules +# +CONFIG_NETFILTER_XT_MARK=y +CONFIG_NETFILTER_XT_CONNMARK=y + +# +# Xtables targets +# +# CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +# CONFIG_NETFILTER_XT_TARGET_CT is not set +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +# CONFIG_NETFILTER_XT_TARGET_HL is not set +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set +# CONFIG_NETFILTER_XT_TARGET_RATEEST is not set +# CONFIG_NETFILTER_XT_TARGET_TEE is not set +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set +# CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set + +# +# Xtables matches +# +# CONFIG_NETFILTER_XT_MATCH_ADDRTYPE is not set +# CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +# CONFIG_NETFILTER_XT_MATCH_CPU is not set +# CONFIG_NETFILTER_XT_MATCH_DCCP is not set +# CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set +# CONFIG_NETFILTER_XT_MATCH_DSCP is not set +# CONFIG_NETFILTER_XT_MATCH_ESP is not set +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_HL=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set +# CONFIG_NETFILTER_XT_MATCH_OSF is not set +# CONFIG_NETFILTER_XT_MATCH_OWNER is not set +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y +# CONFIG_NETFILTER_XT_MATCH_RATEEST is not set +# CONFIG_NETFILTER_XT_MATCH_REALM is not set +# CONFIG_NETFILTER_XT_MATCH_RECENT is not set +# CONFIG_NETFILTER_XT_MATCH_SCTP is not set +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +# CONFIG_IP_SET is not set +# CONFIG_IP_VS is not set + +# +# IP: Netfilter Configuration +# +CONFIG_NF_DEFRAG_IPV4=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_REJECT_SKERR=y +CONFIG_IP_NF_TARGET_LOG=y +# CONFIG_IP_NF_TARGET_ULOG is not set +CONFIG_NF_NAT=y +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_NF_NAT_PROTO_DCCP=y +CONFIG_NF_NAT_PROTO_GRE=y +CONFIG_NF_NAT_PROTO_UDPLITE=y +CONFIG_NF_NAT_PROTO_SCTP=y +CONFIG_NF_NAT_FTP=y +CONFIG_NF_NAT_IRC=y +CONFIG_NF_NAT_TFTP=y +CONFIG_NF_NAT_AMANDA=y +CONFIG_NF_NAT_PPTP=y +CONFIG_NF_NAT_H323=y +# CONFIG_NF_NAT_SIP is not set +CONFIG_IP_NF_MANGLE=y +# CONFIG_IP_NF_TARGET_CLUSTERIP is not set +# CONFIG_IP_NF_TARGET_ECN is not set +# CONFIG_IP_NF_TARGET_TTL is not set +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_DEFRAG_IPV6=y +CONFIG_NF_CONNTRACK_IPV6=y +# CONFIG_IP6_NF_QUEUE is not set +CONFIG_IP6_NF_IPTABLES=y +# CONFIG_IP6_NF_MATCH_AH is not set +# CONFIG_IP6_NF_MATCH_EUI64 is not set +# CONFIG_IP6_NF_MATCH_FRAG is not set +# CONFIG_IP6_NF_MATCH_OPTS is not set +# CONFIG_IP6_NF_MATCH_HL is not set +# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set +# CONFIG_IP6_NF_MATCH_MH is not set +# CONFIG_IP6_NF_MATCH_RT is not set +# CONFIG_IP6_NF_TARGET_HL is not set +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_TARGET_REJECT_SKERR=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_RDS is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +CONFIG_L2TP=y +# CONFIG_L2TP_DEBUGFS is not set +# CONFIG_L2TP_V3 is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +# CONFIG_IEEE802154 is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +# CONFIG_NET_SCH_CBQ is not set +CONFIG_NET_SCH_HTB=y +# CONFIG_NET_SCH_HFSC is not set +# CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_MULTIQ is not set +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFB is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_DRR is not set +# CONFIG_NET_SCH_MQPRIO is not set +# CONFIG_NET_SCH_CHOKE is not set +# CONFIG_NET_SCH_QFQ is not set +CONFIG_NET_SCH_INGRESS=y + +# +# Classification +# +CONFIG_NET_CLS=y +# CONFIG_NET_CLS_BASIC is not set +# CONFIG_NET_CLS_TCINDEX is not set +# CONFIG_NET_CLS_ROUTE4 is not set +# CONFIG_NET_CLS_FW is not set +CONFIG_NET_CLS_U32=y +# CONFIG_CLS_U32_PERF is not set +# CONFIG_CLS_U32_MARK is not set +# CONFIG_NET_CLS_RSVP is not set +# CONFIG_NET_CLS_RSVP6 is not set +# CONFIG_NET_CLS_FLOW is not set +# CONFIG_NET_CLS_CGROUP is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +# CONFIG_NET_EMATCH_CMP is not set +# CONFIG_NET_EMATCH_NBYTE is not set +CONFIG_NET_EMATCH_U32=y +# CONFIG_NET_EMATCH_META is not set +# CONFIG_NET_EMATCH_TEXT is not set +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=y +CONFIG_NET_ACT_GACT=y +# CONFIG_GACT_PROB is not set +CONFIG_NET_ACT_MIRRED=y +# CONFIG_NET_ACT_IPT is not set +# CONFIG_NET_ACT_NAT is not set +# CONFIG_NET_ACT_PEDIT is not set +# CONFIG_NET_ACT_SIMP is not set +# CONFIG_NET_ACT_SKBEDIT is not set +# CONFIG_NET_ACT_CSUM is not set +# CONFIG_NET_CLS_IND is not set +CONFIG_NET_SCH_FIFO=y +# CONFIG_DCB is not set +CONFIG_DNS_RESOLVER=y +# CONFIG_BATMAN_ADV is not set +CONFIG_RPS=y +CONFIG_RFS_ACCEL=y +CONFIG_XPS=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_NET_DROP_MONITOR is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +CONFIG_BT=y +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y +CONFIG_BT_RFCOMM=y +CONFIG_BT_RFCOMM_TTY=y +CONFIG_BT_BNEP=y +# CONFIG_BT_BNEP_MC_FILTER is not set +# CONFIG_BT_BNEP_PROTO_FILTER is not set +CONFIG_BT_HIDP=y + +# +# Bluetooth device drivers +# +# CONFIG_BT_HCIBTUSB is not set +# CONFIG_BT_HCIBTSDIO is not set +CONFIG_BT_HCIUART=y +CONFIG_BT_HCIUART_H4=y +# CONFIG_BT_HCIUART_BCSP is not set +# CONFIG_BT_HCIUART_ATH3K is not set +CONFIG_BT_HCIUART_LL=y +# CONFIG_BT_HCIBCM203X is not set +CONFIG_BT_BLUESLEEP=y +# CONFIG_BT_TIBLUESLEEP is not set +# CONFIG_BT_HCIBPA10X is not set +# CONFIG_BT_HCIBFUSB is not set +# CONFIG_BT_HCIVHCI is not set +# CONFIG_BT_MRVL is not set +# CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y +CONFIG_WIRELESS=y +CONFIG_WEXT_CORE=y +CONFIG_WEXT_PROC=y +CONFIG_CFG80211=y +CONFIG_NL80211_TESTMODE=y +# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +# CONFIG_CFG80211_REG_DEBUG is not set +CONFIG_CFG80211_DEFAULT_PS=y +# CONFIG_CFG80211_DEBUGFS is not set +# CONFIG_CFG80211_INTERNAL_REGDB is not set +CONFIG_CFG80211_WEXT=y +CONFIG_WIRELESS_EXT_SYSFS=y +# CONFIG_LIB80211 is not set +# CONFIG_CFG80211_ALLOW_RECONNECT is not set +# CONFIG_MAC80211 is not set +# CONFIG_WIMAX is not set +CONFIG_RFKILL=y +CONFIG_RFKILL_PM=y +# CONFIG_RFKILL_INPUT is not set +# CONFIG_RFKILL_REGULATOR is not set +# CONFIG_RFKILL_GPIO is not set +# CONFIG_NET_9P is not set +CONFIG_CAIF=y +# CONFIG_CAIF_DEBUG is not set +CONFIG_CAIF_NETDEV=y +# CONFIG_CEPH_LIB is not set +CONFIG_NFC=y + +# +# Near Field Communication (NFC) devices +# +CONFIG_PN544_NFC=y +# CONFIG_NFC_PN533 is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="" +# CONFIG_DEVTMPFS is not set +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +CONFIG_REGMAP=y +CONFIG_REGMAP_I2C=y +# CONFIG_DMA_SHARED_BUFFER is not set +# CONFIG_CONNECTOR is not set +# CONFIG_MTD is not set +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 +# CONFIG_BLK_DEV_CRYPTOLOOP is not set + +# +# DRBD disabled because PROC_FS, INET or CONNECTOR not selected +# +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +# CONFIG_MG_DISK is not set +# CONFIG_BLK_DEV_RBD is not set +# CONFIG_SENSORS_LIS3LV02D is not set +CONFIG_MISC_DEVICES=y +CONFIG_AD525X_DPOT=y +CONFIG_AD525X_DPOT_I2C=y +# CONFIG_AD525X_DPOT_SPI is not set +# CONFIG_PHANTOM is not set +# CONFIG_INTEL_MID_PTI is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +# CONFIG_ICS932S401 is not set +# CONFIG_ENCLOSURE_SERVICES is not set +# CONFIG_HP_ILO is not set +CONFIG_APDS9802ALS=y +# CONFIG_ISL29003 is not set +# CONFIG_ISL29020 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_SENSORS_BH1780 is not set +# CONFIG_SENSORS_BH1770 is not set +# CONFIG_SENSORS_APDS990X is not set +# CONFIG_HMC6352 is not set +# CONFIG_SENSORS_AK8975 is not set +CONFIG_SENSORS_NCT1008=y +# CONFIG_DS1682 is not set +# CONFIG_TI_DAC7512 is not set +CONFIG_UID_STAT=y +# CONFIG_BMP085 is not set +# CONFIG_PCH_PHUB is not set +# CONFIG_USB_SWITCH_FSA9480 is not set +# CONFIG_WL127X_RFKILL is not set +# CONFIG_APANIC is not set +# CONFIG_BCM4329_RFKILL is not set +CONFIG_BCM4330_RFKILL=y +CONFIG_TEGRA_CRYPTO_DEV=y +CONFIG_MAX1749_VIBRATOR=y +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +CONFIG_EEPROM_AT24=y +# CONFIG_EEPROM_AT25 is not set +# CONFIG_EEPROM_LEGACY is not set +# CONFIG_EEPROM_MAX6875 is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_EEPROM_93XX46 is not set +# CONFIG_CB710_CORE is not set +# CONFIG_IWMC3200TOP is not set + +# +# Texas Instruments shared transport line discipline +# +# CONFIG_TI_ST is not set +# CONFIG_ST_GPS is not set +# CONFIG_SENSORS_LIS3_SPI is not set +# CONFIG_SENSORS_LIS3_I2C is not set +CONFIG_TEGRA_BB_SUPPORT=y +CONFIG_TEGRA_BB_POWER=y +CONFIG_TEGRA_BB_M7400=y +CONFIG_FSYNC_CONTROL=y +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +CONFIG_SCSI_MOD=y +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set +CONFIG_SCSI_MULTI_LUN=y +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set +# CONFIG_SCSI_WAIT_SCAN is not set + +# +# SCSI Transports +# +# CONFIG_SCSI_SPI_ATTRS is not set +# CONFIG_SCSI_FC_ATTRS is not set +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set +CONFIG_SCSI_LOWLEVEL=y +# CONFIG_ISCSI_TCP is not set +# CONFIG_ISCSI_BOOT_SYSFS is not set +# CONFIG_SCSI_CXGB3_ISCSI is not set +# CONFIG_SCSI_CXGB4_ISCSI is not set +# CONFIG_SCSI_BNX2_ISCSI is not set +# CONFIG_SCSI_BNX2X_FCOE is not set +# CONFIG_BE2ISCSI is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_HPSA is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_3W_SAS is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_MVSAS is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_MPT2SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set +# CONFIG_FCOE is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_PMCRAID is not set +# CONFIG_SCSI_PM8001 is not set +# CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_BFA_FC is not set +# CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set +# CONFIG_ATA is not set +CONFIG_MD=y +# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_DM=y +# CONFIG_DM_DEBUG is not set +CONFIG_DM_CRYPT=y +# CONFIG_DM_SNAPSHOT is not set +# CONFIG_DM_MIRROR is not set +# CONFIG_DM_RAID is not set +# CONFIG_DM_ZERO is not set +# CONFIG_DM_MULTIPATH is not set +# CONFIG_DM_DELAY is not set +CONFIG_DM_UEVENT=y +# CONFIG_DM_FLAKEY is not set +# CONFIG_TARGET_CORE is not set +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_FIREWIRE is not set +# CONFIG_FIREWIRE_NOSY is not set +# CONFIG_I2O is not set +CONFIG_NETDEVICES=y +# CONFIG_IFB is not set +CONFIG_DUMMY=y +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=y +# CONFIG_VETH is not set +# CONFIG_ARCNET is not set +CONFIG_MII=y +# CONFIG_PHYLIB is not set +# CONFIG_NET_ETHERNET is not set +CONFIG_NETDEV_1000=y +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_E1000E is not set +# CONFIG_IP1000 is not set +# CONFIG_IGB is not set +# CONFIG_IGBVF is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set +# CONFIG_CNIC is not set +# CONFIG_QLA3XXX is not set +# CONFIG_ATL1 is not set +# CONFIG_ATL1E is not set +# CONFIG_ATL1C is not set +# CONFIG_JME is not set +# CONFIG_STMMAC_ETH is not set +# CONFIG_PCH_GBE is not set +# CONFIG_FTGMAC100 is not set +# CONFIG_NETDEV_10000 is not set +# CONFIG_TR is not set +CONFIG_WLAN=y +# CONFIG_ATMEL is not set +# CONFIG_PRISM54 is not set +# CONFIG_USB_ZD1201 is not set +# CONFIG_USB_NET_RNDIS_WLAN is not set +# CONFIG_ATH_COMMON is not set +# CONFIG_BCM4329 is not set +CONFIG_BCMDHD=y +CONFIG_BCMDHD_FW_PATH="/system/vendor/firmware/fw_bcmdhd.bin" +CONFIG_BCMDHD_NVRAM_PATH="/system/etc/nvram.txt" +# CONFIG_DHD_USE_STATIC_BUF is not set +# CONFIG_DHD_USE_SCHED_SCAN is not set +CONFIG_DHD_ENABLE_P2P=y +CONFIG_BCMDHD_WIFI_PM=y +# CONFIG_HOSTAP is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_IWM is not set +# CONFIG_LIBERTAS is not set +# CONFIG_HERMES is not set +# CONFIG_MWIFIEX is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +CONFIG_USB_USBNET=y +CONFIG_USB_NET_AX8817X=y +CONFIG_USB_NET_CDCETHER=y +# CONFIG_USB_NET_CDC_EEM is not set +CONFIG_USB_NET_CDC_NCM=y +# CONFIG_USB_NET_DM9601 is not set +# CONFIG_USB_NET_SMSC75XX is not set +CONFIG_USB_NET_SMSC95XX=y +# CONFIG_USB_NET_GL620A is not set +# CONFIG_USB_NET_NET1080 is not set +# CONFIG_USB_NET_PLUSB is not set +# CONFIG_USB_NET_MCS7830 is not set +# CONFIG_USB_NET_RNDIS_HOST is not set +CONFIG_USB_NET_CDC_SUBSET=y +# CONFIG_USB_ALI_M5632 is not set +# CONFIG_USB_AN2720 is not set +# CONFIG_USB_BELKIN is not set +# CONFIG_USB_ARMLINUX is not set +# CONFIG_USB_EPSON2888 is not set +# CONFIG_USB_KC2190 is not set +# CONFIG_USB_NET_ZAURUS is not set +# CONFIG_USB_NET_CX82310_ETH is not set +# CONFIG_USB_NET_KALMIA is not set +# CONFIG_USB_HSO is not set +# CONFIG_USB_NET_INT51X1 is not set +# CONFIG_USB_IPHETH is not set +# CONFIG_USB_SIERRA_NET is not set +# CONFIG_USB_VL600 is not set +# CONFIG_USB_NET_RAW_IP is not set +# CONFIG_WAN is not set + +# +# CAIF transport drivers +# +# CONFIG_CAIF_TTY is not set +# CONFIG_CAIF_SPI_SLAVE is not set +# CONFIG_CAIF_HSI is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PPP=y +# CONFIG_PPP_MULTILINK is not set +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=y +CONFIG_PPP_SYNC_TTY=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_MPPE=y +# CONFIG_PPPOE is not set +# CONFIG_PPTP is not set +# CONFIG_PPPOL2TP is not set +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +# CONFIG_SLIP is not set +CONFIG_SLHC=y +# CONFIG_NET_FC is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_VMXNET3 is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +CONFIG_INPUT_FF_MEMLESS=y +# CONFIG_INPUT_POLLDEV is not set +# CONFIG_INPUT_SPARSEKMAP is not set + +# +# Userland interfaces +# +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_JOYDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set +CONFIG_INPUT_KEYRESET=y +CONFIG_INPUT_LID=y + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +# CONFIG_KEYBOARD_ADP5588 is not set +# CONFIG_KEYBOARD_ADP5589 is not set +# CONFIG_KEYBOARD_ATKBD is not set +# CONFIG_KEYBOARD_QT1070 is not set +# CONFIG_KEYBOARD_QT2160 is not set +# CONFIG_KEYBOARD_LKKBD is not set +CONFIG_KEYBOARD_GPIO=y +# CONFIG_KEYBOARD_TCA6416 is not set +# CONFIG_KEYBOARD_MATRIX is not set +# CONFIG_KEYBOARD_LM8323 is not set +# CONFIG_KEYBOARD_MAX7359 is not set +# CONFIG_KEYBOARD_MCS is not set +# CONFIG_KEYBOARD_MPR121 is not set +# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_KEYBOARD_TEGRA=y +# CONFIG_KEYBOARD_OPENCORES is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +# CONFIG_JOYSTICK_ANALOG is not set +# CONFIG_JOYSTICK_A3D is not set +# CONFIG_JOYSTICK_ADI is not set +# CONFIG_JOYSTICK_COBRA is not set +# CONFIG_JOYSTICK_GF2K is not set +# CONFIG_JOYSTICK_GRIP is not set +# CONFIG_JOYSTICK_GRIP_MP is not set +# CONFIG_JOYSTICK_GUILLEMOT is not set +# CONFIG_JOYSTICK_INTERACT is not set +# CONFIG_JOYSTICK_SIDEWINDER is not set +# CONFIG_JOYSTICK_TMDC is not set +# CONFIG_JOYSTICK_IFORCE is not set +# CONFIG_JOYSTICK_WARRIOR is not set +# CONFIG_JOYSTICK_MAGELLAN is not set +# CONFIG_JOYSTICK_SPACEORB is not set +# CONFIG_JOYSTICK_SPACEBALL is not set +# CONFIG_JOYSTICK_STINGER is not set +# CONFIG_JOYSTICK_TWIDJOY is not set +# CONFIG_JOYSTICK_ZHENHUA is not set +# CONFIG_JOYSTICK_AS5011 is not set +# CONFIG_JOYSTICK_JOYDUMP is not set +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_TABLET_USB_WACOM=y +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_ADS7846 is not set +# CONFIG_TOUCHSCREEN_AD7877 is not set +# CONFIG_TOUCHSCREEN_AD7879 is not set +# CONFIG_TOUCHSCREEN_ATMEL_MXT is not set +# CONFIG_TOUCHSCREEN_BU21013 is not set +# CONFIG_TOUCHSCREEN_CY8CTMG110 is not set +# CONFIG_TOUCHSCREEN_DYNAPRO is not set +# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set +# CONFIG_TOUCHSCREEN_EETI is not set +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set +# CONFIG_TOUCHSCREEN_MAX11801 is not set +# CONFIG_TOUCHSCREEN_MCS5000 is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_INEXIO is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_PANJIT_I2C is not set +# CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set +# CONFIG_TOUCHSCREEN_TSC2005 is not set +# CONFIG_TOUCHSCREEN_TSC2007 is not set +# CONFIG_TOUCHSCREEN_W90X900 is not set +# CONFIG_TOUCHSCREEN_ST1232 is not set +# CONFIG_TOUCHSCREEN_TPS6507X is not set +CONFIG_TOUCHSCREEN_ELAN_TF_3K=y +CONFIG_TOUCHSCREEN_RM31080A=y +CONFIG_TOUCHSCREEN_SYN_RMI4_SPI=y +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_AD714X is not set +# CONFIG_INPUT_MMA8450 is not set +# CONFIG_INPUT_MPU3050 is not set +# CONFIG_INPUT_ATI_REMOTE is not set +# CONFIG_INPUT_ATI_REMOTE2 is not set +CONFIG_INPUT_KEYCHORD=y +# CONFIG_INPUT_KEYSPAN_REMOTE is not set +# CONFIG_INPUT_KXTJ9 is not set +# CONFIG_INPUT_POWERMATE is not set +# CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_CM109 is not set +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_INPUT_PCF8574 is not set +# CONFIG_INPUT_PWM_BEEPER is not set +# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set +# CONFIG_INPUT_ADXL34X is not set +# CONFIG_INPUT_CMA3000 is not set +# CONFIG_INPUT_ALPS_GPIO_SCROLLWHEEL is not set +# CONFIG_INPUT_CAPELLA_CM3217 is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_SERIO_ALTERA_PS2 is not set +# CONFIG_SERIO_PS2MULT is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_NOZOMI is not set +# CONFIG_N_GSM is not set +# CONFIG_TRACE_SINK is not set +CONFIG_DEVMEM=y +# CONFIG_DEVKMEM is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_TEGRA=y +# CONFIG_SERIAL_MAX3100 is not set +# CONFIG_SERIAL_MAX3107 is not set +# CONFIG_SERIAL_MFD_HSU is not set +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +# CONFIG_SERIAL_TIMBERDALE is not set +# CONFIG_SERIAL_ALTERA_JTAGUART is not set +# CONFIG_SERIAL_ALTERA_UART is not set +# CONFIG_SERIAL_IFX6X60 is not set +# CONFIG_SERIAL_PCH_UART is not set +# CONFIG_SERIAL_XILINX_PS_UART is not set +# CONFIG_TTY_PRINTK is not set +# CONFIG_HVC_DCC is not set +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y +# CONFIG_DCC_TTY is not set +# CONFIG_RAMOOPS is not set +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_COMPAT is not set +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_MUX=y + +# +# Multiplexer I2C Chip support +# +# CONFIG_I2C_MUX_GPIO is not set +# CONFIG_I2C_MUX_PCA9541 is not set +CONFIG_I2C_MUX_PCA954x=y +# CONFIG_I2C_SLAVE is not set +# CONFIG_I2C_HELPER_AUTO is not set +# CONFIG_I2C_SMBUS is not set + +# +# I2C Algorithms +# +# CONFIG_I2C_ALGOBIT is not set +# CONFIG_I2C_ALGOPCF is not set +# CONFIG_I2C_ALGOPCA is not set + +# +# I2C Hardware Bus support +# + +# +# PC SMBus host controller drivers +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_ISCH is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_DESIGNWARE is not set +# CONFIG_I2C_GPIO is not set +# CONFIG_I2C_INTEL_MID is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_PXA_PCI is not set +# CONFIG_I2C_SIMTEC is not set +CONFIG_I2C_TEGRA=y +# CONFIG_I2C_XILINX is not set +# CONFIG_I2C_EG20T is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_DIOLAN_U2C is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_TINY_USB is not set + +# +# Other I2C/SMBus bus drivers +# +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +CONFIG_SPI=y +# CONFIG_SPI_DEBUG is not set +CONFIG_SPI_MASTER=y + +# +# SPI Master Controller Drivers +# +# CONFIG_SPI_ALTERA is not set +# CONFIG_SPI_BITBANG is not set +# CONFIG_SPI_GPIO is not set +# CONFIG_SPI_OC_TINY is not set +# CONFIG_SPI_PXA2XX_PCI is not set +CONFIG_SPI_TEGRA=y +CONFIG_SPI_SLAVE_TEGRA=y +# CONFIG_SPI_TOPCLIFF_PCH is not set +# CONFIG_SPI_XILINX is not set +# CONFIG_SPI_DESIGNWARE is not set + +# +# SPI Protocol Masters +# +# CONFIG_SPI_SPIDEV is not set +# CONFIG_SPI_TLE62X0 is not set + +# +# PPS support +# +# CONFIG_PPS is not set + +# +# PPS generators support +# + +# +# PTP clock support +# + +# +# Enable Device Drivers -> PPS to see the PTP clock options. +# +CONFIG_ARCH_REQUIRE_GPIOLIB=y +CONFIG_GPIOLIB=y +CONFIG_DEBUG_GPIO=y +CONFIG_GPIO_SYSFS=y + +# +# Memory mapped GPIO drivers: +# +# CONFIG_GPIO_GENERIC_PLATFORM is not set +# CONFIG_GPIO_IT8761E is not set +# CONFIG_GPIO_VX855 is not set + +# +# I2C GPIO expanders: +# +# CONFIG_GPIO_MAX7300 is not set +# CONFIG_GPIO_MAX732X is not set +# CONFIG_GPIO_PCA953X_IRQ is not set +# CONFIG_GPIO_PCF857X is not set +# CONFIG_GPIO_SX150X is not set +# CONFIG_GPIO_ADP5588 is not set + +# +# PCI GPIO expanders: +# +# CONFIG_GPIO_BT8XX is not set +# CONFIG_GPIO_ML_IOH is not set +# CONFIG_GPIO_RDC321X is not set + +# +# SPI GPIO expanders: +# +# CONFIG_GPIO_MAX7301 is not set +# CONFIG_GPIO_MCP23S08 is not set +# CONFIG_GPIO_MC33880 is not set +# CONFIG_GPIO_74X164 is not set + +# +# AC97 GPIO expanders: +# + +# +# MODULbus GPIO expanders: +# +CONFIG_GPIO_TPS65910=y +# CONFIG_W1 is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_TEST_POWER is not set +# CONFIG_BATTERY_DS2780 is not set +# CONFIG_BATTERY_DS2782 is not set +# CONFIG_BATTERY_BQ20Z75 is not set +# CONFIG_BATTERY_BQ27x00 is not set +# CONFIG_CHARGER_TPS8003X is not set +# CONFIG_BATTERY_GAUGE_TPS8003X is not set +CONFIG_CHARGER_SMB347=y +# CONFIG_BATTERY_MAX17040 is not set +# CONFIG_BATTERY_MAX17042 is not set +# CONFIG_BATTERY_MAX17048 is not set +# CONFIG_CHARGER_ISP1704 is not set +# CONFIG_CHARGER_MAX8903 is not set +# CONFIG_CHARGER_GPIO is not set +CONFIG_BATTERY_BQ27541=y +# CONFIG_TEGRA_BPC_MGMT is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_HWMON_DEBUG_CHIP is not set + +# +# Native drivers +# +# CONFIG_SENSORS_AD7414 is not set +# CONFIG_SENSORS_AD7418 is not set +# CONFIG_SENSORS_ADCXX is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1029 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7411 is not set +# CONFIG_SENSORS_ADT7461 is not set +# CONFIG_SENSORS_ADT7462 is not set +# CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7475 is not set +# CONFIG_SENSORS_ASC7621 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS620 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_I5K_AMB is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_F71882FG is not set +# CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_G760A is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_GPIO_FAN is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_JC42 is not set +# CONFIG_SENSORS_LINEAGE is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM70 is not set +# CONFIG_SENSORS_LM73 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LTC4151 is not set +# CONFIG_SENSORS_LTC4215 is not set +# CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_LTC4261 is not set +# CONFIG_SENSORS_LM95241 is not set +# CONFIG_SENSORS_LM95245 is not set +# CONFIG_SENSORS_MAX1111 is not set +# CONFIG_SENSORS_MAX16065 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_MAX1668 is not set +# CONFIG_SENSORS_MAX6639 is not set +# CONFIG_SENSORS_MAX6642 is not set +# CONFIG_SENSORS_MAX6650 is not set +# CONFIG_SENSORS_NTC_THERMISTOR is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_PMBUS is not set +# CONFIG_SENSORS_SHT15 is not set +# CONFIG_SENSORS_SHT21 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_SMM665 is not set +# CONFIG_SENSORS_DME1737 is not set +# CONFIG_SENSORS_EMC1403 is not set +# CONFIG_SENSORS_EMC2103 is not set +# CONFIG_SENSORS_EMC6W201 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_SCH56XX_COMMON is not set +# CONFIG_SENSORS_SCH5627 is not set +# CONFIG_SENSORS_SCH5636 is not set +# CONFIG_SENSORS_ADS1015 is not set +# CONFIG_SENSORS_ADS7828 is not set +# CONFIG_SENSORS_ADS7871 is not set +# CONFIG_SENSORS_AMC6821 is not set +CONFIG_SENSORS_TEGRA_TSENSOR=y +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_TMP102 is not set +# CONFIG_SENSORS_TMP401 is not set +# CONFIG_SENSORS_TMP421 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set +# CONFIG_SENSORS_W83795 is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +CONFIG_SENSORS_INA219=y +# CONFIG_SENSORS_INA230 is not set +CONFIG_SENSORS_AL3010=y +CONFIG_THERMAL=y +CONFIG_THERMAL_HWMON=y +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_CORE is not set +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +# CONFIG_DW_WATCHDOG is not set +# CONFIG_MPCORE_WATCHDOG is not set +CONFIG_TEGRA_WATCHDOG=y +CONFIG_TEGRA_WATCHDOG_ENABLE_ON_PROBE=y +# CONFIG_MAX63XX_WATCHDOG is not set +# CONFIG_ALIM7101_WDT is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set +CONFIG_BCMA_POSSIBLE=y + +# +# Broadcom specific AMBA +# +# CONFIG_BCMA is not set +CONFIG_MFD_SUPPORT=y +CONFIG_MFD_CORE=y +# CONFIG_MFD_88PM860X is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_ASIC3 is not set +# CONFIG_HTC_EGPIO is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_HTC_I2CPLD is not set +# CONFIG_TPS6105X is not set +# CONFIG_TPS65010 is not set +# CONFIG_TPS6507X is not set +CONFIG_MFD_TPS6586X=y +CONFIG_MFD_TPS65910=y +# CONFIG_MFD_TPS65912_I2C is not set +# CONFIG_MFD_TPS65912_SPI is not set +# CONFIG_TWL4030_CORE is not set +# CONFIG_MFD_STMPE is not set +# CONFIG_MFD_TC3589X is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_T7L66XB is not set +# CONFIG_MFD_TC6387XB is not set +# CONFIG_MFD_TC6393XB is not set +# CONFIG_PMIC_DA903X is not set +# CONFIG_PMIC_ADP5520 is not set +# CONFIG_MFD_MAX8925 is not set +# CONFIG_MFD_MAX8997 is not set +# CONFIG_MFD_MAX8998 is not set +# CONFIG_MFD_MAX8907C is not set +CONFIG_MFD_MAX77663=y +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM831X_I2C is not set +# CONFIG_MFD_WM831X_SPI is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_WM8994 is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_MFD_MC13XXX is not set +# CONFIG_ABX500_CORE is not set +# CONFIG_EZX_PCAP is not set +# CONFIG_MFD_TIMBERDALE is not set +# CONFIG_LPC_SCH is not set +# CONFIG_MFD_RDC321X is not set +# CONFIG_MFD_JANZ_CMODIO is not set +# CONFIG_MFD_VX855 is not set +# CONFIG_MFD_WL1273_CORE is not set +# CONFIG_MFD_AAT2870_CORE is not set +CONFIG_MFD_TPS6591X=y +# CONFIG_MFD_TPS65090 is not set +# CONFIG_MFD_RC5T583 is not set +CONFIG_MFD_TPS80031=y +CONFIG_GPADC_TPS80031=y +CONFIG_MFD_RICOH583=y +CONFIG_REGULATOR=y +# CONFIG_REGULATOR_DEBUG is not set +# CONFIG_REGULATOR_DUMMY is not set +CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_REGULATOR_VIRTUAL_CONSUMER=y +# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set +# CONFIG_REGULATOR_GPIO is not set +# CONFIG_REGULATOR_BQ24022 is not set +# CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX8649 is not set +# CONFIG_REGULATOR_MAX8660 is not set +# CONFIG_REGULATOR_MAX8952 is not set +CONFIG_REGULATOR_MAX77663=y +# CONFIG_REGULATOR_LP3971 is not set +# CONFIG_REGULATOR_LP3972 is not set +# CONFIG_REGULATOR_TPS65023 is not set +# CONFIG_REGULATOR_TPS6507X is not set +# CONFIG_REGULATOR_ISL6271A is not set +# CONFIG_REGULATOR_AD5398 is not set +CONFIG_REGULATOR_TPS6586X=y +# CONFIG_REGULATOR_TPS6524X is not set +CONFIG_REGULATOR_TPS65910=y +CONFIG_REGULATOR_TPS62360=y +CONFIG_REGULATOR_TPS6591X=y +CONFIG_REGULATOR_TPS80031=y +CONFIG_REGULATOR_RICOH583=y +# CONFIG_REGULATOR_FAN53555 is not set +CONFIG_MEDIA_SUPPORT=y + +# +# Multimedia core support +# +# CONFIG_MEDIA_CONTROLLER is not set +CONFIG_VIDEO_DEV=y +CONFIG_VIDEO_V4L2_COMMON=y +# CONFIG_DVB_CORE is not set +CONFIG_VIDEO_MEDIA=y + +# +# Multimedia drivers +# +# CONFIG_RC_CORE is not set +# CONFIG_MEDIA_ATTACH is not set +CONFIG_MEDIA_TUNER=y +# CONFIG_MEDIA_TUNER_CUSTOMISE is not set +CONFIG_MEDIA_TUNER_SIMPLE=y +CONFIG_MEDIA_TUNER_TDA8290=y +CONFIG_MEDIA_TUNER_TDA827X=y +CONFIG_MEDIA_TUNER_TDA18271=y +CONFIG_MEDIA_TUNER_TDA9887=y +CONFIG_MEDIA_TUNER_TEA5761=y +CONFIG_MEDIA_TUNER_TEA5767=y +CONFIG_MEDIA_TUNER_MT20XX=y +CONFIG_MEDIA_TUNER_XC2028=y +CONFIG_MEDIA_TUNER_XC5000=y +CONFIG_MEDIA_TUNER_XC4000=y +CONFIG_MEDIA_TUNER_MC44S803=y +CONFIG_VIDEO_V4L2=y +CONFIG_VIDEO_CAPTURE_DRIVERS=y +# CONFIG_VIDEO_ADV_DEBUG is not set +# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set +CONFIG_VIDEO_HELPER_CHIPS_AUTO=y + +# +# Audio decoders, processors and mixers +# + +# +# RDS decoders +# + +# +# Video decoders +# + +# +# Video and audio decoders +# + +# +# MPEG video encoders +# + +# +# Video encoders +# + +# +# Camera sensor devices +# + +# +# Flash devices +# + +# +# Video improvement chips +# + +# +# Miscelaneous helper chips +# +CONFIG_TEGRA_RPC=y +# CONFIG_TEGRA_AVP is not set +# CONFIG_TEGRA_MEDIASERVER is not set +CONFIG_TEGRA_NVAVP=y +CONFIG_TEGRA_CAMERA=y +CONFIG_VIDEO_MI1040=y +CONFIG_TEGRA_DTV=y +# CONFIG_VIDEO_OV5650 is not set +# CONFIG_VIDEO_OV14810 is not set +# CONFIG_VIDEO_OV9726 is not set +# CONFIG_VIDEO_OV2710 is not set +# CONFIG_VIDEO_AR0832 is not set +# CONFIG_VIDEO_SOC380 is not set +# CONFIG_TORCH_SSL3250A is not set +# CONFIG_TORCH_TPS61050 is not set +# CONFIG_VIDEO_SH532U is not set +# CONFIG_VIDEO_AD5820 is not set +# CONFIG_VIDEO_CPIA2 is not set +# CONFIG_VIDEO_SAA7134 is not set +# CONFIG_VIDEO_MXB is not set +# CONFIG_VIDEO_HEXIUM_ORION is not set +# CONFIG_VIDEO_HEXIUM_GEMINI is not set +# CONFIG_VIDEO_CAFE_CCIC is not set +# CONFIG_VIDEO_SR030PC30 is not set +# CONFIG_VIDEO_NOON010PC30 is not set +# CONFIG_SOC_CAMERA is not set +CONFIG_V4L_USB_DRIVERS=y +CONFIG_USB_VIDEO_CLASS=y +CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y +# CONFIG_USB_GSPCA is not set +# CONFIG_VIDEO_PVRUSB2 is not set +# CONFIG_VIDEO_HDPVR is not set +# CONFIG_VIDEO_EM28XX is not set +# CONFIG_VIDEO_USBVISION is not set +# CONFIG_USB_ET61X251 is not set +# CONFIG_USB_SN9C102 is not set +# CONFIG_USB_PWC is not set +# CONFIG_USB_ZR364XX is not set +# CONFIG_USB_STKWEBCAM is not set +# CONFIG_USB_S2255 is not set +# CONFIG_V4L_MEM2MEM_DRIVERS is not set +# CONFIG_RADIO_ADAPTERS is not set + +# +# Graphics support +# +CONFIG_VGA_ARB=y +CONFIG_VGA_ARB_MAX_GPUS=16 +# CONFIG_DRM is not set +# CONFIG_STUB_POULSBO is not set +# CONFIG_ION is not set +# CONFIG_VGASTATE is not set +CONFIG_VIDEO_OUTPUT_CONTROL=y +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_WMT_GE_ROPS is not set +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +# CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_S3 is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_VT8623 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_ARK is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_CARMINE is not set +# CONFIG_FB_TMIO is not set +# CONFIG_FB_UDL is not set +# CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set + +# +# NVIDIA Tegra Display Driver options +# +CONFIG_TEGRA_GRHOST=y +CONFIG_TEGRA_DC=y +CONFIG_FB_TEGRA=y +CONFIG_TEGRA_DC_EXTENSIONS=y +CONFIG_TEGRA_NVMAP=y +CONFIG_NVMAP_RECLAIM_UNPINNED_VM=y +CONFIG_NVMAP_ALLOW_SYSMEM=y +# CONFIG_NVMAP_HIGHMEM_ONLY is not set +# CONFIG_NVMAP_CARVEOUT_KILLER is not set +CONFIG_NVMAP_CARVEOUT_COMPACTOR=y +# CONFIG_NVMAP_VPR is not set +CONFIG_TEGRA_DSI=y +CONFIG_NVMAP_CONVERT_CARVEOUT_TO_IOVMM=y +CONFIG_TEGRA_NVHDCP=y +# CONFIG_TEGRA_HDMI_74MHZ_LIMIT is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LCD_CLASS_DEVICE=y +# CONFIG_LCD_L4F00242T03 is not set +# CONFIG_LCD_LMS283GF05 is not set +# CONFIG_LCD_LTV350QV is not set +# CONFIG_LCD_TDO24M is not set +# CONFIG_LCD_VGG2432A4 is not set +# CONFIG_LCD_PLATFORM is not set +# CONFIG_LCD_S6E63M0 is not set +# CONFIG_LCD_LD9040 is not set +# CONFIG_LCD_AMS369FG06 is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_GENERIC is not set +CONFIG_BACKLIGHT_PWM=y +CONFIG_BACKLIGHT_TEGRA_PWM=y +# CONFIG_BACKLIGHT_ADP8860 is not set +# CONFIG_BACKLIGHT_ADP8870 is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_LOGO is not set +CONFIG_SOUND=y +# CONFIG_SOUND_OSS_CORE is not set +CONFIG_SND=y +CONFIG_SND_TIMER=y +CONFIG_SND_PCM=y +CONFIG_SND_HWDEP=y +CONFIG_SND_JACK=y +# CONFIG_SND_SEQUENCER is not set +# CONFIG_SND_MIXER_OSS is not set +# CONFIG_SND_PCM_OSS is not set +# CONFIG_SND_HRTIMER is not set +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SUPPORT_OLD_API=y +CONFIG_SND_VERBOSE_PROCFS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y +# CONFIG_SND_RAWMIDI_SEQ is not set +# CONFIG_SND_OPL3_LIB_SEQ is not set +# CONFIG_SND_OPL4_LIB_SEQ is not set +# CONFIG_SND_SBAWE_SEQ is not set +# CONFIG_SND_EMU10K1_SEQ is not set +CONFIG_SND_DRIVERS=y +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_ALOOP is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set +CONFIG_SND_PCI=y +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ALS300 is not set +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_OXYGEN is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS5535AUDIO is not set +# CONFIG_SND_CTXFI is not set +# CONFIG_SND_DARLA20 is not set +# CONFIG_SND_GINA20 is not set +# CONFIG_SND_LAYLA20 is not set +# CONFIG_SND_DARLA24 is not set +# CONFIG_SND_GINA24 is not set +# CONFIG_SND_LAYLA24 is not set +# CONFIG_SND_MONA is not set +# CONFIG_SND_MIA is not set +# CONFIG_SND_ECHO3G is not set +# CONFIG_SND_INDIGO is not set +# CONFIG_SND_INDIGOIO is not set +# CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_INDIGOIOX is not set +# CONFIG_SND_INDIGODJX is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_EMU10K1X is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_FM801 is not set +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_PREALLOC_SIZE=64 +# CONFIG_SND_HDA_HWDEP is not set +# CONFIG_SND_HDA_INPUT_BEEP is not set +# CONFIG_SND_HDA_INPUT_JACK is not set +# CONFIG_SND_HDA_PATCH_LOADER is not set +CONFIG_SND_HDA_PLATFORM_DRIVER=y +CONFIG_SND_HDA_PLATFORM_NVIDIA_TEGRA=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_ENABLE_REALTEK_QUIRKS=y +CONFIG_SND_HDA_CODEC_ANALOG=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_VIA=y +CONFIG_SND_HDA_CODEC_HDMI=y +CONFIG_SND_HDA_CODEC_CIRRUS=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_HDA_CODEC_CA0110=y +CONFIG_SND_HDA_CODEC_CA0132=y +CONFIG_SND_HDA_CODEC_CMEDIA=y +CONFIG_SND_HDA_CODEC_SI3054=y +CONFIG_SND_HDA_GENERIC=y +CONFIG_SND_HDA_POWER_SAVE=y +CONFIG_SND_HDA_POWER_SAVE_DEFAULT=10 +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_LOLA is not set +# CONFIG_SND_LX6464ES is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_PCXHR is not set +# CONFIG_SND_RIPTIDE is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VIRTUOSO is not set +# CONFIG_SND_VX222 is not set +# CONFIG_SND_YMFPCI is not set +CONFIG_SND_ARM=y +CONFIG_SND_SPI=y +CONFIG_SND_USB=y +# CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_UA101 is not set +# CONFIG_SND_USB_CAIAQ is not set +# CONFIG_SND_USB_6FIRE is not set +CONFIG_SND_SOC=y +# CONFIG_SND_SOC_CACHE_LZO is not set +CONFIG_SND_SOC_TEGRA=y +CONFIG_SND_SOC_TEGRA30_AHUB=y +CONFIG_SND_SOC_TEGRA30_DAM=y +CONFIG_SND_SOC_TEGRA30_I2S=y +CONFIG_SND_SOC_TEGRA30_SPDIF=y +CONFIG_MACH_HAS_SND_SOC_TEGRA_WM8903=y +# CONFIG_SND_SOC_TEGRA_WM8903 is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_MAX98088=y +# CONFIG_SND_SOC_TEGRA_MAX98088 is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_TLV320AIC326X=y +# CONFIG_SND_SOC_TEGRA_TLV320AIC326X is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_RT5639=y +# CONFIG_SND_SOC_TEGRA_RT5639 is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_RT5640=y +CONFIG_SND_SOC_TEGRA_RT5640=y +CONFIG_MACH_HAS_SND_SOC_TEGRA_MAX98095=y +# CONFIG_SND_SOC_TEGRA_MAX98095 is not set +CONFIG_HEADSET_FUNCTION=y +CONFIG_SND_SOC_I2C_AND_SPI=y +# CONFIG_SND_SOC_ALL_CODECS is not set +CONFIG_SND_SOC_RT5640=y +CONFIG_SND_SOC_RT5642=y +CONFIG_SND_SOC_SPDIF=y +# CONFIG_SND_SOC_TLV320AIC326X is not set +# CONFIG_SOUND_PRIME is not set +CONFIG_HID_SUPPORT=y +CONFIG_HID=y +# CONFIG_HIDRAW is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +# CONFIG_HID_PID is not set +# CONFIG_USB_HIDDEV is not set + +# +# Special HID drivers +# +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +# CONFIG_HID_PRODIKEYS is not set +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HOLTEK_FF=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_LOGIWII_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +# CONFIG_HID_PICOLCD is not set +# CONFIG_HID_QUANTA is not set +# CONFIG_HID_ROCCAT is not set +# CONFIG_HID_SAMSUNG is not set +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_HID_WACOM=y +# CONFIG_HID_WACOM_POWER_SUPPLY is not set +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_ZEROPLUS_FF=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +CONFIG_USB_DEVICE_CLASS=y +# CONFIG_USB_DYNAMIC_MINORS is not set +CONFIG_USB_SUSPEND=y +CONFIG_USB_OTG=y +# CONFIG_USB_OTG_WHITELIST is not set +# CONFIG_USB_OTG_BLACKLIST_HUB is not set +# CONFIG_USB_MON is not set +# CONFIG_USB_WUSB is not set +# CONFIG_USB_WUSB_CBAF is not set + +# +# USB Host Controller Drivers +# +# CONFIG_USB_C67X00_HCD is not set +# CONFIG_USB_XHCI_HCD is not set +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_ROOT_HUB_TT=y +CONFIG_USB_EHCI_TT_NEWSCHED=y +CONFIG_USB_EHCI_TEGRA=y +# CONFIG_USB_OXU210HP_HCD is not set +# CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set +# CONFIG_USB_ISP1362_HCD is not set +# CONFIG_USB_OHCI_HCD is not set +# CONFIG_USB_UHCI_HCD is not set +# CONFIG_USB_SL811_HCD is not set +# CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_WHCI_HCD is not set +# CONFIG_USB_HWA_HCD is not set +# CONFIG_USB_EHCI_ONOFF_FEATURE is not set +# CONFIG_USB_MUSB_HDRC is not set + +# +# USB Device Class drivers +# +CONFIG_USB_ACM=y +# CONFIG_USB_PRINTER is not set +CONFIG_USB_WDM=y +# CONFIG_USB_TMC is not set + +# +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may +# + +# +# also be needed; see USB_STORAGE Help for more info +# +CONFIG_USB_STORAGE=y +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_REALTEK is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_USBAT is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set +# CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set +# CONFIG_USB_STORAGE_ENE_UB6250 is not set +# CONFIG_USB_UAS is not set +CONFIG_USB_LIBUSUAL=y + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB port drivers +# +CONFIG_USB_SERIAL=y +# CONFIG_USB_SERIAL_CONSOLE is not set +# CONFIG_USB_EZUSB is not set +# CONFIG_USB_SERIAL_GENERIC is not set +# CONFIG_USB_SERIAL_AIRCABLE is not set +# CONFIG_USB_SERIAL_ARK3116 is not set +# CONFIG_USB_SERIAL_BELKIN is not set +# CONFIG_USB_SERIAL_CH341 is not set +# CONFIG_USB_SERIAL_WHITEHEAT is not set +# CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set +# CONFIG_USB_SERIAL_CP210X is not set +# CONFIG_USB_SERIAL_CYPRESS_M8 is not set +# CONFIG_USB_SERIAL_EMPEG is not set +# CONFIG_USB_SERIAL_FTDI_SIO is not set +# CONFIG_USB_SERIAL_FUNSOFT is not set +# CONFIG_USB_SERIAL_VISOR is not set +# CONFIG_USB_SERIAL_IPAQ is not set +# CONFIG_USB_SERIAL_IR is not set +# CONFIG_USB_SERIAL_EDGEPORT is not set +# CONFIG_USB_SERIAL_EDGEPORT_TI is not set +# CONFIG_USB_SERIAL_GARMIN is not set +# CONFIG_USB_SERIAL_IPW is not set +# CONFIG_USB_SERIAL_IUU is not set +# CONFIG_USB_SERIAL_KEYSPAN_PDA is not set +# CONFIG_USB_SERIAL_KEYSPAN is not set +# CONFIG_USB_SERIAL_KLSI is not set +# CONFIG_USB_SERIAL_KOBIL_SCT is not set +# CONFIG_USB_SERIAL_MCT_U232 is not set +# CONFIG_USB_SERIAL_MOS7720 is not set +# CONFIG_USB_SERIAL_MOS7840 is not set +# CONFIG_USB_SERIAL_MOTOROLA is not set +# CONFIG_USB_SERIAL_NAVMAN is not set +CONFIG_USB_SERIAL_PL2303=y +# CONFIG_USB_SERIAL_OTI6858 is not set +# CONFIG_USB_SERIAL_QCAUX is not set +# CONFIG_USB_SERIAL_QUALCOMM is not set +# CONFIG_USB_SERIAL_SPCP8X5 is not set +# CONFIG_USB_SERIAL_HP4X is not set +# CONFIG_USB_SERIAL_SAFE is not set +# CONFIG_USB_SERIAL_SIEMENS_MPI is not set +# CONFIG_USB_SERIAL_SIERRAWIRELESS is not set +# CONFIG_USB_SERIAL_SYMBOL is not set +# CONFIG_USB_SERIAL_TI is not set +# CONFIG_USB_SERIAL_CYBERJACK is not set +# CONFIG_USB_SERIAL_XIRCOM is not set +CONFIG_USB_SERIAL_WWAN=y +CONFIG_USB_SERIAL_OPTION=y +# CONFIG_USB_SERIAL_OMNINET is not set +# CONFIG_USB_SERIAL_OPTICON is not set +# CONFIG_USB_SERIAL_VIVOPAY_SERIAL is not set +# CONFIG_USB_SERIAL_ZIO is not set +# CONFIG_USB_SERIAL_SSU100 is not set +# CONFIG_USB_SERIAL_DEBUG is not set +# CONFIG_USB_SERIAL_BASEBAND is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_ADUTUX is not set +# CONFIG_USB_SEVSEG is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYPRESS_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set +# CONFIG_USB_APPLEDISPLAY is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TRANCEVIBRATOR is not set +# CONFIG_USB_IOWARRIOR is not set +# CONFIG_USB_TEST is not set +# CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_YUREX is not set +CONFIG_USB_GADGET=y +# CONFIG_USB_GADGET_DEBUG is not set +# CONFIG_USB_GADGET_DEBUG_FILES is not set +# CONFIG_USB_GADGET_DEBUG_FS is not set +CONFIG_USB_GADGET_VBUS_DRAW=500 +CONFIG_USB_FSL_USB2=y +# CONFIG_USB_FUSB300 is not set +# CONFIG_USB_R8A66597 is not set +# CONFIG_USB_M66592 is not set +# CONFIG_USB_AMD5536UDC is not set +# CONFIG_USB_CI13XXX_PCI is not set +# CONFIG_USB_NET2272 is not set +# CONFIG_USB_NET2280 is not set +# CONFIG_USB_GOKU is not set +# CONFIG_USB_LANGWELL is not set +# CONFIG_USB_EG20T is not set +# CONFIG_USB_DUMMY_HCD is not set +CONFIG_USB_GADGET_DUALSPEED=y +# CONFIG_USB_ZERO is not set +# CONFIG_USB_AUDIO is not set +# CONFIG_USB_ETH is not set +# CONFIG_USB_G_NCM is not set +# CONFIG_USB_GADGETFS is not set +# CONFIG_USB_FUNCTIONFS is not set +# CONFIG_USB_FILE_STORAGE is not set +# CONFIG_USB_MASS_STORAGE is not set +# CONFIG_USB_G_SERIAL is not set +# CONFIG_USB_MIDI_GADGET is not set +# CONFIG_USB_G_PRINTER is not set +CONFIG_USB_G_ANDROID=y +# CONFIG_USB_CDC_COMPOSITE is not set +# CONFIG_USB_G_MULTI is not set +# CONFIG_USB_G_HID is not set +# CONFIG_USB_G_DBGP is not set +# CONFIG_USB_G_WEBCAM is not set + +# +# OTG and related infrastructure +# +CONFIG_USB_OTG_UTILS=y +# CONFIG_USB_OTG_WAKELOCK is not set +# CONFIG_USB_GPIO_VBUS is not set +CONFIG_USB_ULPI=y +CONFIG_USB_ULPI_VIEWPORT=y +# CONFIG_NOP_USB_XCEIV is not set +CONFIG_USB_TEGRA_OTG=y +# CONFIG_UWB is not set +CONFIG_MMC=y +# CONFIG_MMC_DEBUG is not set +CONFIG_MMC_UNSAFE_RESUME=y +# CONFIG_MMC_CLKGATE is not set +CONFIG_MMC_EMBEDDED_SDIO=y +# CONFIG_MMC_PARANOID_SD_INIT is not set + +# +# MMC/SD/SDIO Card Drivers +# +CONFIG_MMC_BLOCK=y +CONFIG_MMC_BLOCK_MINORS=16 +CONFIG_MMC_BLOCK_BOUNCE=y +CONFIG_MMC_BLOCK_DEFERRED_RESUME=y +# CONFIG_SDIO_UART is not set +CONFIG_MMC_TEST=y + +# +# MMC/SD/SDIO Host Controller Drivers +# +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_IO_ACCESSORS=y +CONFIG_MMC_SDHCI_NATIVE_BLOCKSIZE=y +# CONFIG_MMC_SDHCI_PCI is not set +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_SDHCI_TEGRA=y +# CONFIG_MMC_SDHCI_PXAV3 is not set +# CONFIG_MMC_SDHCI_PXAV2 is not set +# CONFIG_MMC_TIFM_SD is not set +# CONFIG_MMC_CB710 is not set +# CONFIG_MMC_VIA_SDMMC is not set +# CONFIG_MMC_DW is not set +# CONFIG_MMC_VUB300 is not set +# CONFIG_MMC_USHC is not set +# CONFIG_MEMSTICK is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y + +# +# LED drivers +# +# CONFIG_LEDS_LM3530 is not set +# CONFIG_LEDS_PCA9532 is not set +CONFIG_LEDS_GPIO=y +# CONFIG_LEDS_LP3944 is not set +# CONFIG_LEDS_LP5521 is not set +# CONFIG_LEDS_LP5523 is not set +# CONFIG_LEDS_PCA955X is not set +# CONFIG_LEDS_DAC124S085 is not set +# CONFIG_LEDS_PWM is not set +# CONFIG_LEDS_REGULATOR is not set +# CONFIG_LEDS_BD2802 is not set +# CONFIG_LEDS_LT3593 is not set +# CONFIG_LEDS_TRIGGERS is not set + +# +# LED Triggers +# +CONFIG_SWITCH=y +# CONFIG_SWITCH_GPIO is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_INFINIBAND is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_HCTOSYS_DEVICE="rtc0" +# CONFIG_RTC_DEBUG is not set + +# +# RTC interfaces +# +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +CONFIG_RTC_INTF_ALARM=y +CONFIG_RTC_INTF_ALARM_DEV=y +# CONFIG_RTC_DRV_TEST is not set + +# +# I2C RTC drivers +# +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_DS3232 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +CONFIG_RTC_DRV_MAX77663=y +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_ISL12022 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_BQ32K is not set +CONFIG_RTC_DRV_TPS6586X=y +# CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set +# CONFIG_RTC_DRV_RX8025 is not set +# CONFIG_RTC_DRV_EM3027 is not set +# CONFIG_RTC_DRV_RV3029C2 is not set + +# +# SPI RTC drivers +# +# CONFIG_RTC_DRV_M41T93 is not set +# CONFIG_RTC_DRV_M41T94 is not set +# CONFIG_RTC_DRV_DS1305 is not set +# CONFIG_RTC_DRV_DS1390 is not set +# CONFIG_RTC_DRV_MAX6902 is not set +# CONFIG_RTC_DRV_R9701 is not set +# CONFIG_RTC_DRV_RS5C348 is not set +# CONFIG_RTC_DRV_DS3234 is not set +# CONFIG_RTC_DRV_PCF2123 is not set + +# +# Platform RTC drivers +# +# CONFIG_RTC_DRV_CMOS is not set +# CONFIG_RTC_DRV_DS1286 is not set +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T35 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_MSM6242 is not set +# CONFIG_RTC_DRV_BQ4802 is not set +# CONFIG_RTC_DRV_RP5C01 is not set +# CONFIG_RTC_DRV_V3020 is not set + +# +# on-CPU RTC drivers +# +# CONFIG_RTC_DRV_TEGRA is not set +CONFIG_RTC_DRV_TPS6591x=y +CONFIG_RTC_DRV_TPS80031=y +CONFIG_RTC_DRV_RC5T583=y +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set +# CONFIG_UIO is not set + +# +# Virtio drivers +# +# CONFIG_VIRTIO_PCI is not set +# CONFIG_VIRTIO_BALLOON is not set +CONFIG_STAGING=y +# CONFIG_ET131X is not set +# CONFIG_USBIP_CORE is not set +# CONFIG_PRISM2_USB is not set +# CONFIG_ECHO is not set +# CONFIG_BRCMUTIL is not set +# CONFIG_ASUS_OLED is not set +# CONFIG_R8187SE is not set +# CONFIG_RTL8192U is not set +# CONFIG_RTL8192E is not set +# CONFIG_R8712U is not set +# CONFIG_RTS_PSTOR is not set +# CONFIG_TRANZPORT is not set + +# +# Android +# +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_LOGGER=y +CONFIG_ANDROID_RAM_CONSOLE=y +CONFIG_ANDROID_RAM_CONSOLE_ENABLE_VERBOSE=y +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION=y +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_DATA_SIZE=128 +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_ECC_SIZE=16 +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE=8 +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_POLYNOMIAL=0x11d +# CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT is not set +CONFIG_ANDROID_TIMED_OUTPUT=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +# CONFIG_POHMELFS is not set +# CONFIG_LINE6_USB is not set +# CONFIG_USB_SERIAL_QUATECH2 is not set +# CONFIG_USB_SERIAL_QUATECH_USB2 is not set +# CONFIG_VT6655 is not set +# CONFIG_VT6656 is not set +# CONFIG_VME_BUS is not set +# CONFIG_DX_SEP is not set +CONFIG_IIO=y +# CONFIG_IIO_ST_HWMON is not set +CONFIG_IIO_BUFFER=y +# CONFIG_IIO_SW_RING is not set +CONFIG_IIO_KFIFO_BUF=y +CONFIG_IIO_TRIGGER=y +CONFIG_IIO_CONSUMERS_PER_TRIGGER=2 + +# +# Accelerometers +# +# CONFIG_ADIS16201 is not set +# CONFIG_ADIS16203 is not set +# CONFIG_ADIS16204 is not set +# CONFIG_ADIS16209 is not set +# CONFIG_ADIS16220 is not set +# CONFIG_ADIS16240 is not set +# CONFIG_KXSD9 is not set +# CONFIG_LIS3L02DQ is not set + +# +# Analog to digital convertors +# +# CONFIG_AD7150 is not set +# CONFIG_AD7152 is not set +# CONFIG_AD7291 is not set +# CONFIG_AD7298 is not set +# CONFIG_AD7314 is not set +# CONFIG_AD7606 is not set +# CONFIG_AD799X is not set +# CONFIG_AD7476 is not set +# CONFIG_AD7887 is not set +# CONFIG_AD7780 is not set +# CONFIG_AD7793 is not set +# CONFIG_AD7745 is not set +# CONFIG_AD7816 is not set +# CONFIG_ADT75 is not set +# CONFIG_ADT7310 is not set +# CONFIG_ADT7410 is not set +# CONFIG_MAX1363 is not set + +# +# Analog digital bi-direction convertors +# +# CONFIG_ADT7316 is not set + +# +# Digital to analog convertors +# +# CONFIG_AD5624R_SPI is not set +# CONFIG_AD5446 is not set +# CONFIG_AD5504 is not set +# CONFIG_AD5791 is not set +# CONFIG_AD5686 is not set +# CONFIG_MAX517 is not set + +# +# Direct Digital Synthesis +# +# CONFIG_AD5930 is not set +# CONFIG_AD9832 is not set +# CONFIG_AD9834 is not set +# CONFIG_AD9850 is not set +# CONFIG_AD9852 is not set +# CONFIG_AD9910 is not set +# CONFIG_AD9951 is not set + +# +# Digital gyroscope sensors +# +# CONFIG_ADIS16060 is not set +# CONFIG_ADIS16080 is not set +# CONFIG_ADIS16130 is not set +# CONFIG_ADIS16260 is not set +# CONFIG_ADXRS450 is not set + +# +# Inertial measurement units +# +# CONFIG_ADIS16400 is not set +CONFIG_INV_MPU_IIO=y + +# +# Light sensors +# +# CONFIG_SENSORS_ISL29018 is not set +CONFIG_SENSORS_ISL29028=y +# CONFIG_SENSORS_TSL2563 is not set +# CONFIG_TSL2583 is not set +CONFIG_SENSORS_LTR558=y + +# +# Magnetometer sensors +# +# CONFIG_SENSORS_HMC5843 is not set +CONFIG_AMI306=y + +# +# Active energy metering IC +# +# CONFIG_ADE7753 is not set +# CONFIG_ADE7754 is not set +# CONFIG_ADE7758 is not set +# CONFIG_ADE7759 is not set +# CONFIG_ADE7854 is not set + +# +# Resolver to digital converters +# +# CONFIG_AD2S90 is not set +# CONFIG_AD2S120X is not set +# CONFIG_AD2S1210 is not set + +# +# Triggers - standalone +# +# CONFIG_IIO_PERIODIC_RTC_TRIGGER is not set +# CONFIG_IIO_GPIO_TRIGGER is not set +# CONFIG_IIO_SYSFS_TRIGGER is not set +# CONFIG_IIO_SIMPLE_DUMMY is not set +CONFIG_XVMALLOC=y +CONFIG_ZRAM=y +# CONFIG_ZRAM_DEBUG is not set +# CONFIG_FB_SM7XX is not set +# CONFIG_VIDEO_DT3155 is not set +# CONFIG_CRYSTALHD is not set +# CONFIG_FB_XGI is not set +# CONFIG_EASYCAP is not set +# CONFIG_SOLO6X10 is not set +# CONFIG_ATH6K_LEGACY is not set +# CONFIG_USB_ENESTORAGE is not set +# CONFIG_BCM_WIMAX is not set +# CONFIG_FT1000 is not set + +# +# Speakup console speech +# +# CONFIG_TOUCHSCREEN_CLEARPAD_TM1217 is not set +# CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4 is not set +# CONFIG_ALTERA_STAPL is not set +# CONFIG_MFD_NVEC is not set +CONFIG_CLKDEV_LOOKUP=y +CONFIG_CLKSRC_MMIO=y +CONFIG_IOMMU_SUPPORT=y +# CONFIG_TEGRA_IOMMU_SMMU is not set +# CONFIG_VIRT_DRIVERS is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_XATTR=y +CONFIG_EXT4_FS_POSIX_ACL=y +# CONFIG_EXT4_FS_SECURITY is not set +# CONFIG_EXT4_DEBUG is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_JBD2=y +# CONFIG_JBD2_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_FILE_LOCKING=y +CONFIG_FSNOTIFY=y +# CONFIG_DNOTIFY is not set +CONFIG_INOTIFY_USER=y +# CONFIG_FANOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_QUOTACTL is not set +# CONFIG_AUTOFS4_FS is not set +CONFIG_FUSE_FS=y +# CONFIG_CUSE is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +# CONFIG_MSDOS_FS is not set +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +CONFIG_NTFS_FS=y +# CONFIG_NTFS_DEBUG is not set +CONFIG_NTFS_RW=y + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_REPORT_PRESENT_CPUS=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_TMPFS_XATTR is not set +# CONFIG_HUGETLB_PAGE is not set +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_LOGFS is not set +# CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_PSTORE is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +CONFIG_NFS_V4=y +# CONFIG_NFS_V4_1 is not set +CONFIG_ROOT_NFS=y +# CONFIG_NFS_USE_LEGACY_DNS is not set +CONFIG_NFS_USE_KERNEL_DNS=y +# CONFIG_NFS_USE_NEW_IDMAPPER is not set +# CONFIG_NFSD is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y +# CONFIG_CEPH_FS is not set +CONFIG_CIFS=y +CONFIG_CIFS_STATS=y +# CONFIG_CIFS_STATS2 is not set +CONFIG_CIFS_WEAK_PW_HASH=y +# CONFIG_CIFS_UPCALL is not set +# CONFIG_CIFS_XATTR is not set +# CONFIG_CIFS_DEBUG2 is not set +# CONFIG_CIFS_DFS_UPCALL is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=y +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +CONFIG_NLS_UTF8=y + +# +# Kernel hacking +# +CONFIG_PRINTK_TIME=y +CONFIG_DEFAULT_MESSAGE_LOGLEVEL=4 +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +CONFIG_MAGIC_SYSRQ=y +# CONFIG_STRIP_ASM_SYMS is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +# CONFIG_DEBUG_SECTION_MISMATCH is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_LOCKUP_DETECTOR=y +# CONFIG_HARDLOCKUP_DETECTOR is not set +# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0 +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +# CONFIG_DETECT_HUNG_TASK is not set +# CONFIG_SCHED_DEBUG is not set +# CONFIG_SCHEDSTATS is not set +CONFIG_TIMER_STATS=y +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_KMEMLEAK is not set +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_SPARSE_RCU_POINTER is not set +# CONFIG_LOCK_STAT is not set +# CONFIG_DEBUG_ATOMIC_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_TEST_LIST_SORT is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_DEBUG_CREDENTIALS is not set +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +# CONFIG_RCU_CPU_STALL_VERBOSE is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set +# CONFIG_DEBUG_PER_CPU_MAPS is not set +# CONFIG_LKDTM is not set +# CONFIG_CPU_NOTIFIER_ERROR_INJECT is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set +# CONFIG_DEBUG_PAGEALLOC is not set +CONFIG_NOP_TRACER=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_HAVE_C_RECORDMCOUNT=y +CONFIG_RING_BUFFER=y +CONFIG_EVENT_TRACING=y +# CONFIG_EVENT_POWER_TRACING_DEPRECATED is not set +CONFIG_CONTEXT_SWITCH_TRACER=y +CONFIG_RING_BUFFER_ALLOW_SWAP=y +CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y +CONFIG_FTRACE=y +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_BRANCH_PROFILE_NONE=y +# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set +# CONFIG_PROFILE_ALL_BRANCHES is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_RING_BUFFER_BENCHMARK is not set +# CONFIG_TRACELEVEL is not set +CONFIG_DYNAMIC_DEBUG=y +# CONFIG_DMA_API_DEBUG is not set +# CONFIG_ATOMIC64_SELFTEST is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +# CONFIG_TEST_KSTRTOX is not set +# CONFIG_STRICT_DEVMEM is not set +CONFIG_ARM_UNWIND=y +# CONFIG_DEBUG_USER is not set +# CONFIG_DEBUG_LL is not set +# CONFIG_OC_ETM is not set + +# +# Security options +# +CONFIG_KEYS=y +# CONFIG_KEYS_DEBUG_PROC_KEYS is not set +# CONFIG_SECURITY_DMESG_RESTRICT is not set +# CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set +CONFIG_TRUSTED_FOUNDATIONS=y +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEFAULT_SECURITY="" +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP2=y +CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y +CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_PCRYPT is not set +CONFIG_CRYPTO_WORKQUEUE=y +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=y +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set +# CONFIG_CRYPTO_VMAC is not set + +# +# Digest +# +CONFIG_CRYPTO_CRC32C=y +# CONFIG_CRYPTO_GHASH is not set +CONFIG_CRYPTO_MD4=y +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA1_ARM=y +CONFIG_CRYPTO_SHA256=y +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_AES_ARM=y +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_ARC4=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +CONFIG_CRYPTO_TWOFISH=y +CONFIG_CRYPTO_TWOFISH_COMMON=y + +# +# Compression +# +CONFIG_CRYPTO_DEFLATE=y +# CONFIG_CRYPTO_ZLIB is not set +# CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +# CONFIG_CRYPTO_USER_API_HASH is not set +# CONFIG_CRYPTO_USER_API_SKCIPHER is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_HIFN_795X is not set +# CONFIG_CRYPTO_DEV_TEGRA_AES is not set +CONFIG_CRYPTO_DEV_TEGRA_SE=y +CONFIG_BINARY_PRINTF=y + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_CRC_CCITT=y +CONFIG_CRC16=y +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=y +# CONFIG_CRC8 is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_LZO_COMPRESS=y +CONFIG_LZO_DECOMPRESS=y +# CONFIG_XZ_DEC is not set +# CONFIG_XZ_DEC_BCJ is not set +CONFIG_DECOMPRESS_GZIP=y +CONFIG_REED_SOLOMON=y +CONFIG_REED_SOLOMON_ENC8=y +CONFIG_REED_SOLOMON_DEC8=y +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=y +CONFIG_TEXTSEARCH_BM=y +CONFIG_TEXTSEARCH_FSM=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_CPU_RMAP=y +CONFIG_NLATTR=y +# CONFIG_AVERAGE is not set +# CONFIG_CORDIC is not set diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig index b744c47a923..584a9e6b307 100644 --- a/arch/arm/configs/tegra3_android_defconfig +++ b/arch/arm/configs/tegra3_android_defconfig @@ -1,6 +1,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y +CONFIG_IKCONFIG_PROC=n +CONFIG_AUDIT=y CONFIG_CGROUPS=y CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_FREEZER=y @@ -17,7 +18,7 @@ CONFIG_EMBEDDED=y CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y -CONFIG_MODULES=y +CONFIG_MODULES=n CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set @@ -97,6 +98,7 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=y CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CT_PROTO_DCCP=y CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y @@ -112,10 +114,13 @@ CONFIG_NF_CT_NETLINK=y CONFIG_NETFILTER_TPROXY=y CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y CONFIG_NETFILTER_XT_TARGET_MARK=y CONFIG_NETFILTER_XT_TARGET_NFLOG=y CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y CONFIG_NETFILTER_XT_TARGET_TPROXY=y CONFIG_NETFILTER_XT_TARGET_TRACE=y CONFIG_NETFILTER_XT_MATCH_COMMENT=y @@ -156,6 +161,7 @@ CONFIG_IP_NF_TARGET_NETMAP=y CONFIG_IP_NF_TARGET_REDIRECT=y CONFIG_IP_NF_MANGLE=y CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y CONFIG_IP_NF_ARPTABLES=y CONFIG_IP_NF_ARPFILTER=y CONFIG_IP_NF_ARP_MANGLE=y @@ -354,6 +360,7 @@ CONFIG_SND_SOC_TEGRA=y CONFIG_SND_SOC_TEGRA_RT5640=y CONFIG_HEADSET_FUNCTION=y CONFIG_UHID=y +CONFIG_USB_HIDDEV=y CONFIG_HID_A4TECH=y CONFIG_HID_ACRUX=y CONFIG_HID_ACRUX_FF=y @@ -447,7 +454,6 @@ CONFIG_RTC_DRV_RC5T583=y CONFIG_STAGING=y CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y -CONFIG_ANDROID_LOGGER=y CONFIG_ANDROID_RAM_CONSOLE=y CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION=y CONFIG_ANDROID_TIMED_GPIO=y @@ -458,7 +464,7 @@ CONFIG_IIO_KFIFO_BUF=y CONFIG_INV_MPU_IIO=y CONFIG_SENSORS_ISL29028=y CONFIG_SENSORS_LTR558=y -CONFIG_AMI306=y +CONFIG_INV_AMI306_IIO=y CONFIG_RIL=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y @@ -470,6 +476,7 @@ CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_DNOTIFY is not set CONFIG_FUSE_FS=y CONFIG_VFAT_FS=y @@ -494,6 +501,10 @@ CONFIG_DEBUG_VM=y CONFIG_ENABLE_DEFAULT_TRACERS=y CONFIG_DYNAMIC_DEBUG=y CONFIG_TRUSTED_FOUNDATIONS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_LSM_MMAP_MIN_ADDR=4096 +CONFIG_SECURITY_SELINUX=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_TWOFISH=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile new file mode 100644 index 00000000000..82955273d5e --- /dev/null +++ b/arch/arm/crypto/Makefile @@ -0,0 +1,10 @@ +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o +obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o + +aes-arm-y := aes-armv4.o aes_glue.o +sha1-arm-y := sha1-armv4-large.o sha1_glue.o + diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S new file mode 100644 index 00000000000..e59b1d505d6 --- /dev/null +++ b/arch/arm/crypto/aes-armv4.S @@ -0,0 +1,1112 @@ +#define __ARM_ARCH__ __LINUX_ARM_ARCH__ +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ ==================================================================== + +@ AES for ARMv4 + +@ January 2007. +@ +@ Code uses single 1K S-box and is >2 times faster than code generated +@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which +@ allows to merge logical or arithmetic operation with shift or rotate +@ in one instruction and emit combined result every cycle. The module +@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit +@ key [on single-issue Xscale PXA250 core]. + +@ May 2007. +@ +@ AES_set_[en|de]crypt_key is added. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 12% improvement on +@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~21.5 cycles per byte. + +@ A little glue here to select the correct code below for the ARM CPU +@ that is being targetted. + +.text +.code 32 + +.type AES_Te,%object +.align 5 +AES_Te: +.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d +.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 +.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d +.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a +.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 +.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b +.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea +.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b +.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a +.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f +.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 +.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f +.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e +.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 +.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d +.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f +.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e +.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb +.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce +.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 +.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c +.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed +.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b +.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a +.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 +.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 +.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 +.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 +.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a +.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 +.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 +.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d +.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f +.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 +.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 +.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 +.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f +.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 +.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c +.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 +.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e +.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 +.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 +.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b +.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 +.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 +.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 +.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 +.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 +.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 +.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 +.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 +.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa +.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 +.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 +.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 +.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 +.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 +.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 +.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a +.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 +.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 +.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 +.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a +@ Te4[256] +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +@ rcon[] +.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 +.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 +.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 +.size AES_Te,.-AES_Te + +@ void AES_encrypt(const unsigned char *in, unsigned char *out, +@ const AES_KEY *key) { +.global AES_encrypt +.type AES_encrypt,%function +.align 5 +AES_encrypt: + sub r3,pc,#8 @ AES_encrypt + stmdb sp!,{r1,r4-r12,lr} + mov r12,r0 @ inp + mov r11,r2 + sub r10,r3,#AES_encrypt-AES_Te @ Te +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + orr r3,r3,r5,lsl#16 + orr r3,r3,r6,lsl#24 +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif + bl _armv4_AES_encrypt + + ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else + mov r4,r0,lsr#24 @ write output in endian-neutral + mov r5,r0,lsr#16 @ manner... + mov r6,r0,lsr#8 + strb r4,[r12,#0] + strb r5,[r12,#1] + mov r4,r1,lsr#24 + strb r6,[r12,#2] + mov r5,r1,lsr#16 + strb r0,[r12,#3] + mov r6,r1,lsr#8 + strb r4,[r12,#4] + strb r5,[r12,#5] + mov r4,r2,lsr#24 + strb r6,[r12,#6] + mov r5,r2,lsr#16 + strb r1,[r12,#7] + mov r6,r2,lsr#8 + strb r4,[r12,#8] + strb r5,[r12,#9] + mov r4,r3,lsr#24 + strb r6,[r12,#10] + mov r5,r3,lsr#16 + strb r2,[r12,#11] + mov r6,r3,lsr#8 + strb r4,[r12,#12] + strb r5,[r12,#13] + strb r6,[r12,#14] + strb r3,[r12,#15] +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size AES_encrypt,.-AES_encrypt + +.type _armv4_AES_encrypt,%function +.align 2 +_armv4_AES_encrypt: + str lr,[sp,#-4]! @ push lr + ldmia r11!,{r4-r7} + eor r0,r0,r4 + ldr r12,[r11,#240-16] + eor r1,r1,r5 + eor r2,r2,r6 + eor r3,r3,r7 + sub r12,r12,#1 + mov lr,#255 + + and r7,lr,r0 + and r8,lr,r0,lsr#8 + and r9,lr,r0,lsr#16 + mov r0,r0,lsr#24 +.Lenc_loop: + ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0] + and r7,lr,r1,lsr#16 @ i0 + ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8] + and r8,lr,r1 + ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16] + and r9,lr,r1,lsr#8 + ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24] + mov r1,r1,lsr#24 + + ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16] + ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0] + ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8] + eor r0,r0,r7,ror#8 + ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r5,r8,ror#8 + and r8,lr,r2,lsr#16 @ i1 + eor r6,r6,r9,ror#8 + and r9,lr,r2 + ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8] + eor r1,r1,r4,ror#24 + ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] + mov r2,r2,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0] + eor r0,r0,r7,ror#16 + ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24] + and r7,lr,r3 @ i0 + eor r1,r1,r8,ror#8 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r6,r9,ror#16 + and r9,lr,r3,lsr#16 @ i2 + ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0] + eor r2,r2,r5,ror#16 + ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] + mov r3,r3,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16] + eor r0,r0,r7,ror#24 + ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] + eor r2,r2,r9,ror#8 + ldr r4,[r11,#-12] + eor r3,r3,r6,ror#8 + + ldr r5,[r11,#-8] + eor r0,r0,r7 + ldr r6,[r11,#-4] + and r7,lr,r0 + eor r1,r1,r4 + and r8,lr,r0,lsr#8 + eor r2,r2,r5 + and r9,lr,r0,lsr#16 + eor r3,r3,r6 + mov r0,r0,lsr#24 + + subs r12,r12,#1 + bne .Lenc_loop + + add r10,r10,#2 + + ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0] + and r7,lr,r1,lsr#16 @ i0 + ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8] + and r8,lr,r1 + ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16] + and r9,lr,r1,lsr#8 + ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24] + mov r1,r1,lsr#24 + + ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16] + ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0] + ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8] + eor r0,r7,r0,lsl#8 + ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r8,r5,lsl#8 + and r8,lr,r2,lsr#16 @ i1 + eor r6,r9,r6,lsl#8 + and r9,lr,r2 + ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8] + eor r1,r4,r1,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] + mov r2,r2,lsr#24 + + ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0] + eor r0,r7,r0,lsl#8 + ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24] + and r7,lr,r3 @ i0 + eor r1,r1,r8,lsl#16 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r9,r6,lsl#8 + and r9,lr,r3,lsr#16 @ i2 + ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0] + eor r2,r5,r2,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] + mov r3,r3,lsr#24 + + ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16] + eor r0,r7,r0,lsl#8 + ldr r7,[r11,#0] + ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] + eor r1,r1,r8,lsl#8 + ldr r4,[r11,#4] + eor r2,r2,r9,lsl#16 + ldr r5,[r11,#8] + eor r3,r6,r3,lsl#24 + ldr r6,[r11,#12] + + eor r0,r0,r7 + eor r1,r1,r4 + eor r2,r2,r5 + eor r3,r3,r6 + + sub r10,r10,#2 + ldr pc,[sp],#4 @ pop and return +.size _armv4_AES_encrypt,.-_armv4_AES_encrypt + +.global private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,%function +.align 5 +private_AES_set_encrypt_key: +_armv4_AES_set_encrypt_key: + sub r3,pc,#8 @ AES_set_encrypt_key + teq r0,#0 + moveq r0,#-1 + beq .Labrt + teq r2,#0 + moveq r0,#-1 + beq .Labrt + + teq r1,#128 + beq .Lok + teq r1,#192 + beq .Lok + teq r1,#256 + movne r0,#-1 + bne .Labrt + +.Lok: stmdb sp!,{r4-r12,lr} + sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 + + mov r12,r0 @ inp + mov lr,r1 @ bits + mov r11,r2 @ key + +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + str r0,[r11],#16 + orr r3,r3,r5,lsl#16 + str r1,[r11,#-12] + orr r3,r3,r6,lsl#24 + str r2,[r11,#-8] + str r3,[r11,#-4] +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r11],#16 + str r1,[r11,#-12] + str r2,[r11,#-8] + str r3,[r11,#-4] +#endif + + teq lr,#128 + bne .Lnot128 + mov r12,#10 + str r12,[r11,#240-16] + add r6,r10,#256 @ rcon + mov lr,#255 + +.L128_loop: + and r5,lr,r3,lsr#24 + and r7,lr,r3,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r3,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r3 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r5,r5,r4 + eor r0,r0,r5 @ rk[4]=rk[0]^... + eor r1,r1,r0 @ rk[5]=rk[1]^rk[4] + str r0,[r11],#16 + eor r2,r2,r1 @ rk[6]=rk[2]^rk[5] + str r1,[r11,#-12] + eor r3,r3,r2 @ rk[7]=rk[3]^rk[6] + str r2,[r11,#-8] + subs r12,r12,#1 + str r3,[r11,#-4] + bne .L128_loop + sub r2,r11,#176 + b .Ldone + +.Lnot128: +#if __ARM_ARCH__<7 + ldrb r8,[r12,#19] + ldrb r4,[r12,#18] + ldrb r5,[r12,#17] + ldrb r6,[r12,#16] + orr r8,r8,r4,lsl#8 + ldrb r9,[r12,#23] + orr r8,r8,r5,lsl#16 + ldrb r4,[r12,#22] + orr r8,r8,r6,lsl#24 + ldrb r5,[r12,#21] + ldrb r6,[r12,#20] + orr r9,r9,r4,lsl#8 + orr r9,r9,r5,lsl#16 + str r8,[r11],#8 + orr r9,r9,r6,lsl#24 + str r9,[r11,#-4] +#else + ldr r8,[r12,#16] + ldr r9,[r12,#20] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif + + teq lr,#192 + bne .Lnot192 + mov r12,#12 + str r12,[r11,#240-24] + add r6,r10,#256 @ rcon + mov lr,#255 + mov r12,#8 + +.L192_loop: + and r5,lr,r9,lsr#24 + and r7,lr,r9,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r9,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r9 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r9,r5,r4 + eor r0,r0,r9 @ rk[6]=rk[0]^... + eor r1,r1,r0 @ rk[7]=rk[1]^rk[6] + str r0,[r11],#24 + eor r2,r2,r1 @ rk[8]=rk[2]^rk[7] + str r1,[r11,#-20] + eor r3,r3,r2 @ rk[9]=rk[3]^rk[8] + str r2,[r11,#-16] + subs r12,r12,#1 + str r3,[r11,#-12] + subeq r2,r11,#216 + beq .Ldone + + ldr r7,[r11,#-32] + ldr r8,[r11,#-28] + eor r7,r7,r3 @ rk[10]=rk[4]^rk[9] + eor r9,r8,r7 @ rk[11]=rk[5]^rk[10] + str r7,[r11,#-8] + str r9,[r11,#-4] + b .L192_loop + +.Lnot192: +#if __ARM_ARCH__<7 + ldrb r8,[r12,#27] + ldrb r4,[r12,#26] + ldrb r5,[r12,#25] + ldrb r6,[r12,#24] + orr r8,r8,r4,lsl#8 + ldrb r9,[r12,#31] + orr r8,r8,r5,lsl#16 + ldrb r4,[r12,#30] + orr r8,r8,r6,lsl#24 + ldrb r5,[r12,#29] + ldrb r6,[r12,#28] + orr r9,r9,r4,lsl#8 + orr r9,r9,r5,lsl#16 + str r8,[r11],#8 + orr r9,r9,r6,lsl#24 + str r9,[r11,#-4] +#else + ldr r8,[r12,#24] + ldr r9,[r12,#28] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif + + mov r12,#14 + str r12,[r11,#240-32] + add r6,r10,#256 @ rcon + mov lr,#255 + mov r12,#7 + +.L256_loop: + and r5,lr,r9,lsr#24 + and r7,lr,r9,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r9,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r9 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r9,r5,r4 + eor r0,r0,r9 @ rk[8]=rk[0]^... + eor r1,r1,r0 @ rk[9]=rk[1]^rk[8] + str r0,[r11],#32 + eor r2,r2,r1 @ rk[10]=rk[2]^rk[9] + str r1,[r11,#-28] + eor r3,r3,r2 @ rk[11]=rk[3]^rk[10] + str r2,[r11,#-24] + subs r12,r12,#1 + str r3,[r11,#-20] + subeq r2,r11,#256 + beq .Ldone + + and r5,lr,r3 + and r7,lr,r3,lsr#8 + ldrb r5,[r10,r5] + and r8,lr,r3,lsr#16 + ldrb r7,[r10,r7] + and r9,lr,r3,lsr#24 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#8 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r11,#-48] + orr r5,r5,r9,lsl#24 + + ldr r7,[r11,#-44] + ldr r8,[r11,#-40] + eor r4,r4,r5 @ rk[12]=rk[4]^... + ldr r9,[r11,#-36] + eor r7,r7,r4 @ rk[13]=rk[5]^rk[12] + str r4,[r11,#-16] + eor r8,r8,r7 @ rk[14]=rk[6]^rk[13] + str r7,[r11,#-12] + eor r9,r9,r8 @ rk[15]=rk[7]^rk[14] + str r8,[r11,#-8] + str r9,[r11,#-4] + b .L256_loop + +.Ldone: mov r0,#0 + ldmia sp!,{r4-r12,lr} +.Labrt: tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key + +.global private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,%function +.align 5 +private_AES_set_decrypt_key: + str lr,[sp,#-4]! @ push lr +#if 0 + @ kernel does both of these in setkey so optimise this bit out by + @ expecting the key to already have the enc_key work done (see aes_glue.c) + bl _armv4_AES_set_encrypt_key +#else + mov r0,#0 +#endif + teq r0,#0 + ldrne lr,[sp],#4 @ pop lr + bne .Labrt + + stmdb sp!,{r4-r12} + + ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2, + mov r11,r2 @ which is AES_KEY *key + mov r7,r2 + add r8,r2,r12,lsl#4 + +.Linv: ldr r0,[r7] + ldr r1,[r7,#4] + ldr r2,[r7,#8] + ldr r3,[r7,#12] + ldr r4,[r8] + ldr r5,[r8,#4] + ldr r6,[r8,#8] + ldr r9,[r8,#12] + str r0,[r8],#-16 + str r1,[r8,#16+4] + str r2,[r8,#16+8] + str r3,[r8,#16+12] + str r4,[r7],#16 + str r5,[r7,#-12] + str r6,[r7,#-8] + str r9,[r7,#-4] + teq r7,r8 + bne .Linv + ldr r0,[r11,#16]! @ prefetch tp1 + mov r7,#0x80 + mov r8,#0x1b + orr r7,r7,#0x8000 + orr r8,r8,#0x1b00 + orr r7,r7,r7,lsl#16 + orr r8,r8,r8,lsl#16 + sub r12,r12,#1 + mvn r9,r7 + mov r12,r12,lsl#2 @ (rounds-1)*4 + +.Lmix: and r4,r0,r7 + and r1,r0,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r1,r4,r1,lsl#1 @ tp2 + + and r4,r1,r7 + and r2,r1,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r2,r4,r2,lsl#1 @ tp4 + + and r4,r2,r7 + and r3,r2,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r3,r4,r3,lsl#1 @ tp8 + + eor r4,r1,r2 + eor r5,r0,r3 @ tp9 + eor r4,r4,r3 @ tpe + eor r4,r4,r1,ror#24 + eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) + eor r4,r4,r2,ror#16 + eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) + eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24) + + ldr r0,[r11,#4] @ prefetch tp1 + str r4,[r11],#4 + subs r12,r12,#1 + bne .Lmix + + mov r0,#0 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key + +.type AES_Td,%object +.align 5 +AES_Td: +.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 +.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 +.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 +.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f +.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 +.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 +.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da +.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 +.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd +.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 +.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 +.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 +.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 +.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a +.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 +.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c +.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 +.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a +.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 +.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 +.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 +.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff +.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 +.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb +.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 +.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e +.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 +.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a +.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e +.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 +.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d +.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 +.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd +.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 +.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 +.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 +.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d +.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 +.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 +.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef +.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 +.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 +.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 +.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 +.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 +.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b +.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 +.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 +.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 +.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 +.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 +.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f +.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df +.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f +.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e +.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 +.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 +.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c +.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf +.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 +.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f +.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 +.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 +.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 +@ Td4[256] +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +.size AES_Td,.-AES_Td + +@ void AES_decrypt(const unsigned char *in, unsigned char *out, +@ const AES_KEY *key) { +.global AES_decrypt +.type AES_decrypt,%function +.align 5 +AES_decrypt: + sub r3,pc,#8 @ AES_decrypt + stmdb sp!,{r1,r4-r12,lr} + mov r12,r0 @ inp + mov r11,r2 + sub r10,r3,#AES_decrypt-AES_Td @ Td +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + orr r3,r3,r5,lsl#16 + orr r3,r3,r6,lsl#24 +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif + bl _armv4_AES_decrypt + + ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else + mov r4,r0,lsr#24 @ write output in endian-neutral + mov r5,r0,lsr#16 @ manner... + mov r6,r0,lsr#8 + strb r4,[r12,#0] + strb r5,[r12,#1] + mov r4,r1,lsr#24 + strb r6,[r12,#2] + mov r5,r1,lsr#16 + strb r0,[r12,#3] + mov r6,r1,lsr#8 + strb r4,[r12,#4] + strb r5,[r12,#5] + mov r4,r2,lsr#24 + strb r6,[r12,#6] + mov r5,r2,lsr#16 + strb r1,[r12,#7] + mov r6,r2,lsr#8 + strb r4,[r12,#8] + strb r5,[r12,#9] + mov r4,r3,lsr#24 + strb r6,[r12,#10] + mov r5,r3,lsr#16 + strb r2,[r12,#11] + mov r6,r3,lsr#8 + strb r4,[r12,#12] + strb r5,[r12,#13] + strb r6,[r12,#14] + strb r3,[r12,#15] +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size AES_decrypt,.-AES_decrypt + +.type _armv4_AES_decrypt,%function +.align 2 +_armv4_AES_decrypt: + str lr,[sp,#-4]! @ push lr + ldmia r11!,{r4-r7} + eor r0,r0,r4 + ldr r12,[r11,#240-16] + eor r1,r1,r5 + eor r2,r2,r6 + eor r3,r3,r7 + sub r12,r12,#1 + mov lr,#255 + + and r7,lr,r0,lsr#16 + and r8,lr,r0,lsr#8 + and r9,lr,r0 + mov r0,r0,lsr#24 +.Ldec_loop: + ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16] + and r7,lr,r1 @ i0 + ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8] + and r8,lr,r1,lsr#16 + ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0] + and r9,lr,r1,lsr#8 + ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24] + mov r1,r1,lsr#24 + + ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0] + ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16] + ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8] + eor r0,r0,r7,ror#24 + ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r8,r5,ror#8 + and r8,lr,r2 @ i1 + eor r6,r9,r6,ror#8 + and r9,lr,r2,lsr#16 + ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8] + eor r1,r1,r4,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] + mov r2,r2,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16] + eor r0,r0,r7,ror#16 + ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24] + and r7,lr,r3,lsr#16 @ i0 + eor r1,r1,r8,ror#24 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r9,r6,ror#8 + and r9,lr,r3 @ i2 + ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16] + eor r2,r2,r5,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] + mov r3,r3,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0] + eor r0,r0,r7,ror#8 + ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] + eor r2,r2,r9,ror#24 + + ldr r4,[r11,#-12] + eor r0,r0,r7 + ldr r5,[r11,#-8] + eor r3,r3,r6,ror#8 + ldr r6,[r11,#-4] + and r7,lr,r0,lsr#16 + eor r1,r1,r4 + and r8,lr,r0,lsr#8 + eor r2,r2,r5 + and r9,lr,r0 + eor r3,r3,r6 + mov r0,r0,lsr#24 + + subs r12,r12,#1 + bne .Ldec_loop + + add r10,r10,#1024 + + ldr r5,[r10,#0] @ prefetch Td4 + ldr r6,[r10,#32] + ldr r4,[r10,#64] + ldr r5,[r10,#96] + ldr r6,[r10,#128] + ldr r4,[r10,#160] + ldr r5,[r10,#192] + ldr r6,[r10,#224] + + ldrb r0,[r10,r0] @ Td4[s0>>24] + ldrb r4,[r10,r7] @ Td4[s0>>16] + and r7,lr,r1 @ i0 + ldrb r5,[r10,r8] @ Td4[s0>>8] + and r8,lr,r1,lsr#16 + ldrb r6,[r10,r9] @ Td4[s0>>0] + and r9,lr,r1,lsr#8 + + ldrb r7,[r10,r7] @ Td4[s1>>0] + ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24] + ldrb r8,[r10,r8] @ Td4[s1>>16] + eor r0,r7,r0,lsl#24 + ldrb r9,[r10,r9] @ Td4[s1>>8] + eor r1,r4,r1,lsl#8 + and r7,lr,r2,lsr#8 @ i0 + eor r5,r5,r8,lsl#8 + and r8,lr,r2 @ i1 + ldrb r7,[r10,r7] @ Td4[s2>>8] + eor r6,r6,r9,lsl#8 + ldrb r8,[r10,r8] @ Td4[s2>>0] + and r9,lr,r2,lsr#16 + + ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] + eor r0,r0,r7,lsl#8 + ldrb r9,[r10,r9] @ Td4[s2>>16] + eor r1,r8,r1,lsl#16 + and r7,lr,r3,lsr#16 @ i0 + eor r2,r5,r2,lsl#16 + and r8,lr,r3,lsr#8 @ i1 + ldrb r7,[r10,r7] @ Td4[s3>>16] + eor r6,r6,r9,lsl#16 + ldrb r8,[r10,r8] @ Td4[s3>>8] + and r9,lr,r3 @ i2 + + ldrb r9,[r10,r9] @ Td4[s3>>0] + ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] + eor r0,r0,r7,lsl#16 + ldr r7,[r11,#0] + eor r1,r1,r8,lsl#8 + ldr r4,[r11,#4] + eor r2,r9,r2,lsl#8 + ldr r5,[r11,#8] + eor r3,r6,r3,lsl#24 + ldr r6,[r11,#12] + + eor r0,r0,r7 + eor r1,r1,r4 + eor r2,r2,r5 + eor r3,r3,r6 + + sub r10,r10,#1024 + ldr pc,[sp],#4 @ pop and return +.size _armv4_AES_decrypt,.-_armv4_AES_decrypt +.asciz "AES for ARMv4, CRYPTOGAMS by " +.align 2 diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c new file mode 100644 index 00000000000..59f7877ead6 --- /dev/null +++ b/arch/arm/crypto/aes_glue.c @@ -0,0 +1,108 @@ +/* + * Glue Code for the asm optimized version of the AES Cipher Algorithm + */ + +#include +#include +#include + +#define AES_MAXNR 14 + +typedef struct { + unsigned int rd_key[4 *(AES_MAXNR + 1)]; + int rounds; +} AES_KEY; + +struct AES_CTX { + AES_KEY enc_key; + AES_KEY dec_key; +}; + +asmlinkage void AES_encrypt(const u8 *in, u8 *out, AES_KEY *ctx); +asmlinkage void AES_decrypt(const u8 *in, u8 *out, AES_KEY *ctx); +asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); +asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct AES_CTX *ctx = crypto_tfm_ctx(tfm); + AES_encrypt(src, dst, &ctx->enc_key); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct AES_CTX *ctx = crypto_tfm_ctx(tfm); + AES_decrypt(src, dst, &ctx->dec_key); +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct AES_CTX *ctx = crypto_tfm_ctx(tfm); + + switch (key_len) { + case AES_KEYSIZE_128: + key_len = 128; + break; + case AES_KEYSIZE_192: + key_len = 192; + break; + case AES_KEYSIZE_256: + key_len = 256; + break; + default: + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + /* private_AES_set_decrypt_key expects an encryption key as input */ + ctx->dec_key = ctx->enc_key; + if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + return 0; +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct AES_CTX), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}; + +static int __init aes_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("aes"); +MODULE_ALIAS("aes-asm"); +MODULE_AUTHOR("David McCullough "); diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S new file mode 100644 index 00000000000..7050ab133b9 --- /dev/null +++ b/arch/arm/crypto/sha1-armv4-large.S @@ -0,0 +1,503 @@ +#define __ARM_ARCH__ __LINUX_ARM_ARCH__ +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ ==================================================================== + +@ sha1_block procedure for ARMv4. +@ +@ January 2007. + +@ Size/performance trade-off +@ ==================================================================== +@ impl size in bytes comp cycles[*] measured performance +@ ==================================================================== +@ thumb 304 3212 4420 +@ armv4-small 392/+29% 1958/+64% 2250/+96% +@ armv4-compact 740/+89% 1552/+26% 1840/+22% +@ armv4-large 1420/+92% 1307/+19% 1370/+34%[***] +@ full unroll ~5100/+260% ~1260/+4% ~1300/+5% +@ ==================================================================== +@ thumb = same as 'small' but in Thumb instructions[**] and +@ with recurring code in two private functions; +@ small = detached Xload/update, loops are folded; +@ compact = detached Xload/update, 5x unroll; +@ large = interleaved Xload/update, 5x unroll; +@ full unroll = interleaved Xload/update, full unroll, estimated[!]; +@ +@ [*] Manually counted instructions in "grand" loop body. Measured +@ performance is affected by prologue and epilogue overhead, +@ i-cache availability, branch penalties, etc. +@ [**] While each Thumb instruction is twice smaller, they are not as +@ diverse as ARM ones: e.g., there are only two arithmetic +@ instructions with 3 arguments, no [fixed] rotate, addressing +@ modes are limited. As result it takes more instructions to do +@ the same job in Thumb, therefore the code is never twice as +@ small and always slower. +@ [***] which is also ~35% better than compiler generated code. Dual- +@ issue Cortex A8 core was measured to process input block in +@ ~990 cycles. + +@ August 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 13% improvement on +@ Cortex A8 core and in absolute terms ~870 cycles per input block +@ [or 13.6 cycles per byte]. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 10% +@ improvement on Cortex A8 core and 12.2 cycles per byte. + +.text + +.global sha1_block_data_order +.type sha1_block_data_order,%function + +.align 2 +sha1_block_data_order: + stmdb sp!,{r4-r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + ldmia r0,{r3,r4,r5,r6,r7} +.Lloop: + ldr r8,.LK_00_19 + mov r14,sp + sub sp,sp,#15*4 + mov r5,r5,ror#30 + mov r6,r6,ror#30 + mov r7,r7,ror#30 @ [6] +.L_00_15: +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r6,r8,r6,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r4,r5 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r6,r8,r6,ror#2 @ E+=K_00_19 + eor r10,r4,r5 @ F_xx_xx + add r6,r6,r7,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r3,r10,ror#2 + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r6,r6,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r5,r8,r5,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r3,r4 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r5,r8,r5,ror#2 @ E+=K_00_19 + eor r10,r3,r4 @ F_xx_xx + add r5,r5,r6,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r7,r10,ror#2 + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r5,r5,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r4,r8,r4,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r7,r3 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r4,r8,r4,ror#2 @ E+=K_00_19 + eor r10,r7,r3 @ F_xx_xx + add r4,r4,r5,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r6,r10,ror#2 + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r4,r4,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r3,r8,r3,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r6,r7 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r3,r8,r3,ror#2 @ E+=K_00_19 + eor r10,r6,r7 @ F_xx_xx + add r3,r3,r4,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r5,r10,ror#2 + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r3,r3,r10 @ E+=F_00_19(B,C,D) + teq r14,sp + bne .L_00_15 @ [((11+4)*5+2)*3] +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + add r6,r6,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + add r5,r5,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + add r4,r4,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + add r3,r3,r10 @ E+=F_00_19(B,C,D) + + ldr r8,.LK_20_39 @ [+15+16*4] + sub sp,sp,#25*4 + cmn sp,#0 @ [+3], clear carry to denote 20_39 +.L_20_39_or_60_79: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r4,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_20_39(B,C,D) + teq r14,sp @ preserve carry + bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] + bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes + + ldr r8,.LK_40_59 + sub sp,sp,#20*4 @ [+2] +.L_40_59: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r4,r10,ror#2 @ F_xx_xx + and r11,r5,r6 @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_40_59(B,C,D) + add r7,r7,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + and r11,r4,r5 @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_40_59(B,C,D) + add r6,r6,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + and r11,r3,r4 @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_40_59(B,C,D) + add r5,r5,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + and r11,r7,r3 @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_40_59(B,C,D) + add r4,r4,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + and r11,r6,r7 @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_40_59(B,C,D) + add r3,r3,r11,ror#2 + teq r14,sp + bne .L_40_59 @ [+((12+5)*5+2)*4] + + ldr r8,.LK_60_79 + sub sp,sp,#20*4 + cmp sp,#0 @ set carry to denote 60_79 + b .L_20_39_or_60_79 @ [+4], spare 300 bytes +.L_done: + add sp,sp,#80*4 @ "deallocate" stack frame + ldmia r0,{r8,r9,r10,r11,r12} + add r3,r8,r3 + add r4,r9,r4 + add r5,r10,r5,ror#2 + add r6,r11,r6,ror#2 + add r7,r12,r7,ror#2 + stmia r0,{r3,r4,r5,r6,r7} + teq r1,r2 + bne .Lloop @ [+18], total 1307 + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.align 2 +.LK_00_19: .word 0x5a827999 +.LK_20_39: .word 0x6ed9eba1 +.LK_40_59: .word 0x8f1bbcdc +.LK_60_79: .word 0xca62c1d6 +.size sha1_block_data_order,.-sha1_block_data_order +.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " +.align 2 diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c new file mode 100644 index 00000000000..76cd976230b --- /dev/null +++ b/arch/arm/crypto/sha1_glue.c @@ -0,0 +1,179 @@ +/* + * Cryptographic API. + * Glue code for the SHA1 Secure Hash Algorithm assembler implementation + * + * This file is based on sha1_generic.c and sha1_ssse3_glue.c + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald + * Copyright (c) Jean-Francois Dive + * Copyright (c) Mathias Krause + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct SHA1_CTX { + uint32_t h0,h1,h2,h3,h4; + u64 count; + u8 data[SHA1_BLOCK_SIZE]; +}; + +asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest, + const unsigned char *data, unsigned int rounds); + + +static int sha1_init(struct shash_desc *desc) +{ + struct SHA1_CTX *sctx = shash_desc_ctx(desc); + memset(sctx, 0, sizeof(*sctx)); + sctx->h0 = SHA1_H0; + sctx->h1 = SHA1_H1; + sctx->h2 = SHA1_H2; + sctx->h3 = SHA1_H3; + sctx->h4 = SHA1_H4; + return 0; +} + + +static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + sctx->count += len; + + if (partial) { + done = SHA1_BLOCK_SIZE - partial; + memcpy(sctx->data + partial, data, done); + sha1_block_data_order(sctx, sctx->data, 1); + } + + if (len - done >= SHA1_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; + sha1_block_data_order(sctx, data + done, rounds); + done += rounds * SHA1_BLOCK_SIZE; + } + + memcpy(sctx->data, data + done, len - done); + return 0; +} + + +static int sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct SHA1_CTX *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + int res; + + /* Handle the fast case right here */ + if (partial + len < SHA1_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->data + partial, data, len); + return 0; + } + res = __sha1_update(sctx, data, len, partial); + return res; +} + + +/* Add padding and return the message digest. */ +static int sha1_final(struct shash_desc *desc, u8 *out) +{ + struct SHA1_CTX *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA1_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); + /* We need to fill a whole block for __sha1_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->data + index, padding, padlen); + } else { + __sha1_update(sctx, padding, padlen, index); + } + __sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 5; i++) + dst[i] = cpu_to_be32(((u32 *)sctx)[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + return 0; +} + + +static int sha1_export(struct shash_desc *desc, void *out) +{ + struct SHA1_CTX *sctx = shash_desc_ctx(desc); + memcpy(out, sctx, sizeof(*sctx)); + return 0; +} + + +static int sha1_import(struct shash_desc *desc, const void *in) +{ + struct SHA1_CTX *sctx = shash_desc_ctx(desc); + memcpy(sctx, in, sizeof(*sctx)); + return 0; +} + + +static struct shash_alg alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_init, + .update = sha1_update, + .final = sha1_final, + .export = sha1_export, + .import = sha1_import, + .descsize = sizeof(struct SHA1_CTX), + .statesize = sizeof(struct SHA1_CTX), + .base = { + .cra_name = "sha1", + .cra_driver_name= "sha1-asm", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + + +static int __init sha1_mod_init(void) +{ + return crypto_register_shash(&alg); +} + + +static void __exit sha1_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + + +module_init(sha1_mod_init); +module_exit(sha1_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (ARM)"); +MODULE_ALIAS("sha1"); +MODULE_AUTHOR("David McCullough "); diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 7bb8bf972c0..4b0c4eddbcb 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -231,7 +231,7 @@ */ #ifdef CONFIG_THUMB2_KERNEL - .macro usraccoff, instr, reg, ptr, inc, off, cond, abort, t=T() + .macro usraccoff, instr, reg, ptr, inc, off, cond, abort, t=TUSER() 9999: .if \inc == 1 \instr\cond\()b\()\t\().w \reg, [\ptr, #\off] @@ -271,7 +271,7 @@ #else /* !CONFIG_THUMB2_KERNEL */ - .macro usracc, instr, reg, ptr, inc, cond, rept, abort, t=T() + .macro usracc, instr, reg, ptr, inc, cond, rept, abort, t=TUSER() .rept \rept 9999: .if \inc == 1 @@ -307,4 +307,12 @@ .size \name , . - \name .endm + .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req +#ifndef CONFIG_CPU_USE_DOMAINS + adds \tmp, \addr, #\size - 1 + sbcccs \tmp, \tmp, \limit + bcs \bad +#endif + .endm + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index af18ceaacf5..b5dc173d336 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -83,9 +83,9 @@ * instructions (inline assembly) */ #ifdef CONFIG_CPU_USE_DOMAINS -#define T(instr) #instr "t" +#define TUSER(instr) #instr "t" #else -#define T(instr) #instr +#define TUSER(instr) #instr #endif #else /* __ASSEMBLY__ */ @@ -95,9 +95,9 @@ * instructions */ #ifdef CONFIG_CPU_USE_DOMAINS -#define T(instr) instr ## t +#define TUSER(instr) instr ## t #else -#define T(instr) instr +#define TUSER(instr) instr #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 253cc86318b..7be54690aee 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -75,9 +75,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ __asm__ __volatile__( \ - "1: " T(ldr) " %1, [%3]\n" \ + "1: " TUSER(ldr) " %1, [%3]\n" \ " " insn "\n" \ - "2: " T(str) " %0, [%3]\n" \ + "2: " TUSER(str) " %0, [%3]\n" \ " mov %0, #0\n" \ __futex_atomic_ex_table("%5") \ : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ @@ -95,10 +95,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" - "1: " T(ldr) " %1, [%4]\n" + "1: " TUSER(ldr) " %1, [%4]\n" " teq %1, %2\n" " it eq @ explicit IT needed for the 2b label\n" - "2: " T(streq) " %3, [%4]\n" + "2: " TUSER(streq) " %3, [%4]\n" __futex_atomic_ex_table("%5") : "+r" (ret), "=&r" (val) : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index c2b9b4bdec0..564c55b394e 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -17,6 +17,10 @@ #define KEXEC_ARM_ATAGS_OFFSET 0x1000 #define KEXEC_ARM_ZIMAGE_OFFSET 0x8000 +#ifdef CONFIG_KEXEC_HARDBOOT + #define KEXEC_HB_PAGE_MAGIC 0x4a5db007 +#endif + #ifndef __ASSEMBLY__ /** @@ -53,6 +57,10 @@ static inline void crash_setup_regs(struct pt_regs *newregs, /* Function pointer to optional machine-specific reinitialization */ extern void (*kexec_reinit)(void); +#ifdef CONFIG_KEXEC_HARDBOOT +extern void (*kexec_hardboot_hook)(void); +#endif + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_KEXEC */ diff --git a/arch/arm/include/asm/rwsem.h b/arch/arm/include/asm/rwsem.h new file mode 100644 index 00000000000..2066674d8e6 --- /dev/null +++ b/arch/arm/include/asm/rwsem.h @@ -0,0 +1,138 @@ +/* rwsem.h: R/W semaphores implemented using ARM atomic functions. + * + * Copyright (c) 2010, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef _ASM_ARM_RWSEM_H +#define _ASM_ARM_RWSEM_H + +#ifndef _LINUX_RWSEM_H +#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" +#endif + +#ifdef __KERNEL__ +#include +#include + +#define RWSEM_UNLOCKED_VALUE 0x00000000 +#define RWSEM_ACTIVE_BIAS 0x00000001 +#define RWSEM_ACTIVE_MASK 0x0000ffff +#define RWSEM_WAITING_BIAS (-0x00010000) +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + +/* + * lock for reading + */ +static inline void __down_read(struct rw_semaphore *sem) +{ + if (atomic_inc_return((atomic_t *)(&sem->count)) < 0) + rwsem_down_read_failed(sem); +} + +static inline int __down_read_trylock(struct rw_semaphore *sem) +{ + int tmp; + + while ((tmp = sem->count) >= 0) { + if (tmp == cmpxchg(&sem->count, tmp, + tmp + RWSEM_ACTIVE_READ_BIAS)) { + return 1; + } + } + return 0; +} + +/* + * lock for writing + */ +static inline void __down_write(struct rw_semaphore *sem) +{ + int tmp; + + tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS, + (atomic_t *)(&sem->count)); + if (tmp != RWSEM_ACTIVE_WRITE_BIAS) + rwsem_down_write_failed(sem); +} + +static inline int __down_write_trylock(struct rw_semaphore *sem) +{ + int tmp; + + tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, + RWSEM_ACTIVE_WRITE_BIAS); + return tmp == RWSEM_UNLOCKED_VALUE; +} + +/* + * unlock after reading + */ +static inline void __up_read(struct rw_semaphore *sem) +{ + int tmp; + + tmp = atomic_dec_return((atomic_t *)(&sem->count)); + if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0) + rwsem_wake(sem); +} + +/* + * unlock after writing + */ +static inline void __up_write(struct rw_semaphore *sem) +{ + if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, + (atomic_t *)(&sem->count)) < 0) + rwsem_wake(sem); +} + +/* + * implement atomic add functionality + */ +static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +{ + atomic_add(delta, (atomic_t *)(&sem->count)); +} + +/* + * downgrade write lock to read lock + */ +static inline void __downgrade_write(struct rw_semaphore *sem) +{ + int tmp; + + tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count)); + if (tmp < 0) + rwsem_downgrade_wake(sem); +} + +static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +{ + __down_write(sem); +} + +/* + * implement exchange and add functionality + */ +static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +{ + return atomic_add_return(delta, (atomic_t *)(&sem->count)); +} + +#endif /* __KERNEL__ */ +#endif /* _ASM_ARM_RWSEM_H */ diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 65fa3c88095..dcca63802a0 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -59,18 +59,13 @@ static inline void dsb_sev(void) } /* - * ARMv6 Spin-locking. + * ARMv6 ticket-based spin-locking. * - * We exclusively read the old value. If it is zero, we may have - * won the lock, so we try exclusively storing it. A memory barrier - * is required after we get a lock, and before we release it, because - * V6 CPUs are assumed to have weakly ordered memory. - * - * Unlocked value: 0 - * Locked value: 1 + * A memory barrier is required after we get a lock, and before we + * release it, because V6 CPUs are assumed to have weakly ordered + * memory. */ -#define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_spin_unlock_wait(lock) \ do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) @@ -79,31 +74,40 @@ static inline void dsb_sev(void) static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; + u32 newval; + arch_spinlock_t lockval; __asm__ __volatile__( -"1: ldrex %0, [%1]\n" -" teq %0, #0\n" - WFE("ne") -" strexeq %0, %2, [%1]\n" -" teqeq %0, #0\n" +"1: ldrex %0, [%3]\n" +" add %1, %0, %4\n" +" strex %2, %1, [%3]\n" +" teq %2, #0\n" " bne 1b" - : "=&r" (tmp) - : "r" (&lock->lock), "r" (1) + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) : "cc"); + while (lockval.tickets.next != lockval.tickets.owner) { + wfe(); + lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner); + } + smp_mb(); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned long tmp; + u32 slock; __asm__ __volatile__( -" ldrex %0, [%1]\n" -" teq %0, #0\n" -" strexeq %0, %2, [%1]" - : "=&r" (tmp) - : "r" (&lock->lock), "r" (1) +" ldrex %0, [%2]\n" +" cmp %0, %0, ror #16\n" +" movne %1, #1\n" +" addeq %0, %0, %3\n" +" strexeq %1, %0, [%2]" + : "=&r" (slock), "=&r" (tmp) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) : "cc"); if (tmp == 0) { @@ -116,17 +120,38 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { + unsigned long tmp; + u32 slock; + smp_mb(); __asm__ __volatile__( -" str %1, [%0]\n" - : - : "r" (&lock->lock), "r" (0) +" mov %1, #1\n" +"1: ldrex %0, [%2]\n" +" uadd16 %0, %0, %1\n" +" strex %1, %0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (slock), "=&r" (tmp) + : "r" (&lock->slock) : "cc"); dsb_sev(); } +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + return tickets.owner != tickets.next; +} + +static inline int arch_spin_is_contended(arch_spinlock_t *lock) +{ + struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + return (tickets.next - tickets.owner) > 1; +} +#define arch_spin_is_contended arch_spin_is_contended + /* * RWLOCKS * diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index d14d197ae04..b262d2f8b47 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h @@ -5,11 +5,24 @@ # error "please don't include this file directly" #endif +#define TICKET_SHIFT 16 + typedef struct { - volatile unsigned int lock; + union { + u32 slock; + struct __raw_tickets { +#ifdef __ARMEB__ + u16 next; + u16 owner; +#else + u16 owner; + u16 next; +#endif + } tickets; + }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } typedef struct { volatile unsigned int lock; diff --git a/arch/arm/include/asm/swab.h b/arch/arm/include/asm/swab.h index 9997ad20eff..32ee164a2f6 100644 --- a/arch/arm/include/asm/swab.h +++ b/arch/arm/include/asm/swab.h @@ -24,12 +24,13 @@ #if defined(__KERNEL__) && __LINUX_ARM_ARCH__ >= 6 -static inline __attribute_const__ __u16 __arch_swab16(__u16 x) +static inline __attribute_const__ __u32 __arch_swahb32(__u32 x) { __asm__ ("rev16 %0, %1" : "=r" (x) : "r" (x)); return x; } -#define __arch_swab16 __arch_swab16 +#define __arch_swahb32 __arch_swahb32 +#define __arch_swab16(x) ((__u16)__arch_swahb32(x)) static inline __attribute_const__ __u32 __arch_swab32(__u32 x) { diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index b293616a1a1..5b1a81ebfe5 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -101,28 +101,39 @@ extern int __get_user_1(void *); extern int __get_user_2(void *); extern int __get_user_4(void *); -#define __get_user_x(__r2,__p,__e,__s,__i...) \ +#define __GUP_CLOBBER_1 "lr", "cc" +#ifdef CONFIG_CPU_USE_DOMAINS +#define __GUP_CLOBBER_2 "ip", "lr", "cc" +#else +#define __GUP_CLOBBER_2 "lr", "cc" +#endif +#define __GUP_CLOBBER_4 "lr", "cc" + +#define __get_user_x(__r2,__p,__e,__l,__s) \ __asm__ __volatile__ ( \ __asmeq("%0", "r0") __asmeq("%1", "r2") \ + __asmeq("%3", "r1") \ "bl __get_user_" #__s \ : "=&r" (__e), "=r" (__r2) \ - : "0" (__p) \ - : __i, "cc") + : "0" (__p), "r" (__l) \ + : __GUP_CLOBBER_##__s) #define get_user(x,p) \ ({ \ + unsigned long __limit = current_thread_info()->addr_limit - 1; \ register const typeof(*(p)) __user *__p asm("r0") = (p);\ register unsigned long __r2 asm("r2"); \ + register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ case 1: \ - __get_user_x(__r2, __p, __e, 1, "lr"); \ - break; \ + __get_user_x(__r2, __p, __e, __l, 1); \ + break; \ case 2: \ - __get_user_x(__r2, __p, __e, 2, "r3", "lr"); \ + __get_user_x(__r2, __p, __e, __l, 2); \ break; \ case 4: \ - __get_user_x(__r2, __p, __e, 4, "lr"); \ + __get_user_x(__r2, __p, __e, __l, 4); \ break; \ default: __e = __get_user_bad(); break; \ } \ @@ -135,31 +146,34 @@ extern int __put_user_2(void *, unsigned int); extern int __put_user_4(void *, unsigned int); extern int __put_user_8(void *, unsigned long long); -#define __put_user_x(__r2,__p,__e,__s) \ +#define __put_user_x(__r2,__p,__e,__l,__s) \ __asm__ __volatile__ ( \ __asmeq("%0", "r0") __asmeq("%2", "r2") \ + __asmeq("%3", "r1") \ "bl __put_user_" #__s \ : "=&r" (__e) \ - : "0" (__p), "r" (__r2) \ + : "0" (__p), "r" (__r2), "r" (__l) \ : "ip", "lr", "cc") #define put_user(x,p) \ ({ \ + unsigned long __limit = current_thread_info()->addr_limit - 1; \ register const typeof(*(p)) __r2 asm("r2") = (x); \ register const typeof(*(p)) __user *__p asm("r0") = (p);\ + register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ case 1: \ - __put_user_x(__r2, __p, __e, 1); \ + __put_user_x(__r2, __p, __e, __l, 1); \ break; \ case 2: \ - __put_user_x(__r2, __p, __e, 2); \ + __put_user_x(__r2, __p, __e, __l, 2); \ break; \ case 4: \ - __put_user_x(__r2, __p, __e, 4); \ + __put_user_x(__r2, __p, __e, __l, 4); \ break; \ case 8: \ - __put_user_x(__r2, __p, __e, 8); \ + __put_user_x(__r2, __p, __e, __l, 8); \ break; \ default: __e = __put_user_bad(); break; \ } \ @@ -227,7 +241,7 @@ do { \ #define __get_user_asm_byte(x,addr,err) \ __asm__ __volatile__( \ - "1: " T(ldrb) " %1,[%2],#0\n" \ + "1: " TUSER(ldrb) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -263,7 +277,7 @@ do { \ #define __get_user_asm_word(x,addr,err) \ __asm__ __volatile__( \ - "1: " T(ldr) " %1,[%2],#0\n" \ + "1: " TUSER(ldr) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -308,7 +322,7 @@ do { \ #define __put_user_asm_byte(x,__pu_addr,err) \ __asm__ __volatile__( \ - "1: " T(strb) " %1,[%2],#0\n" \ + "1: " TUSER(strb) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -341,7 +355,7 @@ do { \ #define __put_user_asm_word(x,__pu_addr,err) \ __asm__ __volatile__( \ - "1: " T(str) " %1,[%2],#0\n" \ + "1: " TUSER(str) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -366,10 +380,10 @@ do { \ #define __put_user_asm_dword(x,__pu_addr,err) \ __asm__ __volatile__( \ - ARM( "1: " T(str) " " __reg_oper1 ", [%1], #4\n" ) \ - ARM( "2: " T(str) " " __reg_oper0 ", [%1]\n" ) \ - THUMB( "1: " T(str) " " __reg_oper1 ", [%1]\n" ) \ - THUMB( "2: " T(str) " " __reg_oper0 ", [%1, #4]\n" ) \ + ARM( "1: " TUSER(str) " " __reg_oper1 ", [%1], #4\n" ) \ + ARM( "2: " TUSER(str) " " __reg_oper0 ", [%1]\n" ) \ + THUMB( "1: " TUSER(str) " " __reg_oper1 ", [%1]\n" ) \ + THUMB( "2: " TUSER(str) " " __reg_oper0 ", [%1, #4]\n" ) \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 4f8e30f183b..2be87bf0610 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -208,6 +208,11 @@ ENDPROC(__dabt_svc) .align 5 __irq_svc: svc_entry + +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif + irq_handler #ifdef CONFIG_PREEMPT diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index e59bbd496c3..812c0cbd1e4 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -22,6 +22,10 @@ extern unsigned long kexec_start_address; extern unsigned long kexec_indirection_page; extern unsigned long kexec_mach_type; extern unsigned long kexec_boot_atags; +#ifdef CONFIG_KEXEC_HARDBOOT +extern unsigned long kexec_hardboot; +void (*kexec_hardboot_hook)(void); +#endif static atomic_t waiting_for_crash_ipi; @@ -99,6 +103,9 @@ void machine_kexec(struct kimage *image) kexec_indirection_page = page_list; kexec_mach_type = machine_arch_type; kexec_boot_atags = image->start - KEXEC_ARM_ZIMAGE_OFFSET + KEXEC_ARM_ATAGS_OFFSET; +#ifdef CONFIG_KEXEC_HARDBOOT + kexec_hardboot = image->hardboot; +#endif /* copy our kernel relocation code to the control code page */ memcpy(reboot_code_buffer, @@ -114,11 +121,23 @@ void machine_kexec(struct kimage *image) local_irq_disable(); local_fiq_disable(); setup_mm_for_reboot(0); /* mode is not used, so just pass 0*/ + +#ifdef CONFIG_KEXEC_HARDBOOT + /* Run any final machine-specific shutdown code. */ + if (image->hardboot && kexec_hardboot_hook) + kexec_hardboot_hook(); +#endif + flush_cache_all(); outer_flush_all(); outer_disable(); cpu_proc_fin(); - outer_inv_all(); - flush_cache_all(); - cpu_reset(reboot_code_buffer_phys); + + // Freezes the tegra 3 + //outer_inv_all(); + //flush_cache_all(); + + /* Must call cpu_reset via physical address since ARMv7 (& v6) stalls the + * pipeline after disabling the MMU. */ + ((typeof(cpu_reset) *)virt_to_phys(cpu_reset))(reboot_code_buffer_phys); } diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index d0cdedf4864..98e0a89782c 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -4,6 +4,13 @@ #include +#ifdef CONFIG_KEXEC_HARDBOOT +#include +#if defined(CONFIG_ARCH_TEGRA_2x_SOC) || defined(CONFIG_ARCH_TEGRA_3x_SOC) + #include +#endif +#endif + .globl relocate_new_kernel relocate_new_kernel: @@ -52,6 +59,12 @@ relocate_new_kernel: b 0b 2: +#ifdef CONFIG_KEXEC_HARDBOOT + ldr r0, kexec_hardboot + teq r0, #0 + bne hardboot +#endif + /* Jump to relocated kernel */ mov lr,r1 mov r0,#0 @@ -60,6 +73,34 @@ relocate_new_kernel: ARM( mov pc, lr ) THUMB( bx lr ) +#ifdef CONFIG_KEXEC_HARDBOOT +hardboot: + /* Stash boot arguments in hardboot page: + * 0: KEXEC_HB_PAGE_MAGIC + * 4: kexec_start_address + * 8: kexec_mach_type + * 12: kexec_boot_atags */ + ldr r0, =KEXEC_HB_PAGE_ADDR + str r1, [r0, #4] + ldr r1, kexec_mach_type + str r1, [r0, #8] + ldr r1, kexec_boot_atags + str r1, [r0, #12] + ldr r1, =KEXEC_HB_PAGE_MAGIC + str r1, [r0] + +#if defined(CONFIG_ARCH_TEGRA_2x_SOC) || defined(CONFIG_ARCH_TEGRA_3x_SOC) + ldr r0, =TEGRA_PMC_BASE + ldr r1, [r0] + orr r1, r1, #0x10 + str r1, [r0] +loop: b loop +#else +#error "No reboot method defined for hardboot." +#endif + + .ltorg +#endif .align .globl kexec_start_address @@ -79,6 +120,12 @@ kexec_mach_type: kexec_boot_atags: .long 0x0 +#ifdef CONFIG_KEXEC_HARDBOOT + .globl kexec_hardboot +kexec_hardboot: + .long 0x0 +#endif + relocate_new_kernel_end: .globl relocate_new_kernel_size diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index bdfb37c5953..47b09a3ea04 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -215,7 +215,7 @@ void __cpu_die(unsigned int cpu) pr_err("CPU%u: cpu didn't die\n", cpu); return; } - printk(KERN_NOTICE "CPU%u: shutdown\n", cpu); + pr_debug(KERN_NOTICE "CPU%u: shutdown\n", cpu); if (!platform_cpu_kill(cpu)) printk("CPU%u: unable to kill\n", cpu); @@ -280,7 +280,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) struct mm_struct *mm = &init_mm; unsigned int cpu = smp_processor_id(); - printk("CPU%u: Booted secondary processor\n", cpu); + pr_debug("CPU%u: Booted secondary processor\n", cpu); /* * All kernel threads share the same mm context; grab a diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 1b049cd7a49..9b06bb41fca 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -16,8 +16,9 @@ * __get_user_X * * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved * Outputs: r0 is the error code - * r2, r3 contains the zero-extended value + * r2 contains the zero-extended value * lr corrupted * * No other registers must be altered. (see @@ -27,34 +28,40 @@ * Note also that it is intended that __get_user_bad is not global. */ #include +#include #include #include ENTRY(__get_user_1) -1: T(ldrb) r2, [r0] + check_uaccess r0, 1, r1, r2, __get_user_bad +1: TUSER(ldrb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__get_user_1) ENTRY(__get_user_2) -#ifdef CONFIG_THUMB2_KERNEL -2: T(ldrb) r2, [r0] -3: T(ldrb) r3, [r0, #1] + check_uaccess r0, 2, r1, r2, __get_user_bad +#ifdef CONFIG_CPU_USE_DOMAINS +rb .req ip +2: ldrbt r2, [r0], #1 +3: ldrbt rb, [r0], #0 #else -2: T(ldrb) r2, [r0], #1 -3: T(ldrb) r3, [r0] +rb .req r0 +2: ldrb r2, [r0] +3: ldrb rb, [r0, #1] #endif #ifndef __ARMEB__ - orr r2, r2, r3, lsl #8 + orr r2, r2, rb, lsl #8 #else - orr r2, r3, r2, lsl #8 + orr r2, rb, r2, lsl #8 #endif mov r0, #0 mov pc, lr ENDPROC(__get_user_2) ENTRY(__get_user_4) -4: T(ldr) r2, [r0] + check_uaccess r0, 4, r1, r2, __get_user_bad +4: TUSER(ldr) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__get_user_4) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index c023fc11e86..3d73dcb959b 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -16,6 +16,7 @@ * __put_user_X * * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved * r2, r3 contains the value * Outputs: r0 is the error code * lr corrupted @@ -27,32 +28,35 @@ * Note also that it is intended that __put_user_bad is not global. */ #include +#include #include #include ENTRY(__put_user_1) -1: T(strb) r2, [r0] + check_uaccess r0, 1, r1, ip, __put_user_bad +1: TUSER(strb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_1) ENTRY(__put_user_2) + check_uaccess r0, 2, r1, ip, __put_user_bad mov ip, r2, lsr #8 #ifdef CONFIG_THUMB2_KERNEL #ifndef __ARMEB__ -2: T(strb) r2, [r0] -3: T(strb) ip, [r0, #1] +2: TUSER(strb) r2, [r0] +3: TUSER(strb) ip, [r0, #1] #else -2: T(strb) ip, [r0] -3: T(strb) r2, [r0, #1] +2: TUSER(strb) ip, [r0] +3: TUSER(strb) r2, [r0, #1] #endif #else /* !CONFIG_THUMB2_KERNEL */ #ifndef __ARMEB__ -2: T(strb) r2, [r0], #1 -3: T(strb) ip, [r0] +2: TUSER(strb) r2, [r0], #1 +3: TUSER(strb) ip, [r0] #else -2: T(strb) ip, [r0], #1 -3: T(strb) r2, [r0] +2: TUSER(strb) ip, [r0], #1 +3: TUSER(strb) r2, [r0] #endif #endif /* CONFIG_THUMB2_KERNEL */ mov r0, #0 @@ -60,18 +64,20 @@ ENTRY(__put_user_2) ENDPROC(__put_user_2) ENTRY(__put_user_4) -4: T(str) r2, [r0] + check_uaccess r0, 4, r1, ip, __put_user_bad +4: TUSER(str) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_4) ENTRY(__put_user_8) + check_uaccess r0, 8, r1, ip, __put_user_bad #ifdef CONFIG_THUMB2_KERNEL -5: T(str) r2, [r0] -6: T(str) r3, [r0, #4] +5: TUSER(str) r2, [r0] +6: TUSER(str) r3, [r0, #4] #else -5: T(str) r2, [r0], #4 -6: T(str) r3, [r0] +5: TUSER(str) r2, [r0], #4 +6: TUSER(str) r3, [r0] #endif mov r0, #0 mov pc, lr diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S index d0ece2aeb70..5c908b1cb8e 100644 --- a/arch/arm/lib/uaccess.S +++ b/arch/arm/lib/uaccess.S @@ -32,11 +32,11 @@ rsb ip, ip, #4 cmp ip, #2 ldrb r3, [r1], #1 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #1 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault sub r2, r2, ip b .Lc2u_dest_aligned @@ -59,7 +59,7 @@ ENTRY(__copy_to_user) addmi ip, r2, #4 bmi .Lc2u_0nowords ldr r3, [r1], #4 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -88,18 +88,18 @@ USER( T(str) r3, [r0], #4) @ May fault stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 ldrne r3, [r1], #4 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_0fupi .Lc2u_0nowords: teq ip, #0 beq .Lc2u_finished .Lc2u_nowords: cmp ip, #2 ldrb r3, [r1], #1 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #1 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_not_enough: @@ -120,7 +120,7 @@ USER( T(strgtb) r3, [r0], #1) @ May fault mov r3, r7, pull #8 ldr r7, [r1], #4 orr r3, r3, r7, push #24 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -155,18 +155,18 @@ USER( T(str) r3, [r0], #4) @ May fault movne r3, r7, pull #8 ldrne r7, [r1], #4 orrne r3, r3, r7, push #24 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_1fupi .Lc2u_1nowords: mov r3, r7, get_byte_1 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault movge r3, r7, get_byte_2 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault movgt r3, r7, get_byte_3 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_2fupi: subs r2, r2, #4 @@ -175,7 +175,7 @@ USER( T(strgtb) r3, [r0], #1) @ May fault mov r3, r7, pull #16 ldr r7, [r1], #4 orr r3, r3, r7, push #16 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -210,18 +210,18 @@ USER( T(str) r3, [r0], #4) @ May fault movne r3, r7, pull #16 ldrne r7, [r1], #4 orrne r3, r3, r7, push #16 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_2fupi .Lc2u_2nowords: mov r3, r7, get_byte_2 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault movge r3, r7, get_byte_3 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_3fupi: subs r2, r2, #4 @@ -230,7 +230,7 @@ USER( T(strgtb) r3, [r0], #1) @ May fault mov r3, r7, pull #24 ldr r7, [r1], #4 orr r3, r3, r7, push #8 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -265,18 +265,18 @@ USER( T(str) r3, [r0], #4) @ May fault movne r3, r7, pull #24 ldrne r7, [r1], #4 orrne r3, r3, r7, push #8 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_3fupi .Lc2u_3nowords: mov r3, r7, get_byte_3 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished ENDPROC(__copy_to_user) @@ -295,11 +295,11 @@ ENDPROC(__copy_to_user) .Lcfu_dest_not_aligned: rsb ip, ip, #4 cmp ip, #2 -USER( T(ldrb) r3, [r1], #1) @ May fault +USER( TUSER( ldrb) r3, [r1], #1) @ May fault strb r3, [r0], #1 -USER( T(ldrgeb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 sub r2, r2, ip b .Lcfu_dest_aligned @@ -322,7 +322,7 @@ ENTRY(__copy_from_user) .Lcfu_0fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_0nowords -USER( T(ldr) r3, [r1], #4) +USER( TUSER( ldr) r3, [r1], #4) str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction rsb ip, ip, #0 @@ -351,18 +351,18 @@ USER( T(ldr) r3, [r1], #4) ldmneia r1!, {r3 - r4} @ Shouldnt fault stmneia r0!, {r3 - r4} tst ip, #4 - T(ldrne) r3, [r1], #4 @ Shouldnt fault + TUSER( ldrne) r3, [r1], #4 @ Shouldnt fault strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_0fupi .Lcfu_0nowords: teq ip, #0 beq .Lcfu_finished .Lcfu_nowords: cmp ip, #2 -USER( T(ldrb) r3, [r1], #1) @ May fault +USER( TUSER( ldrb) r3, [r1], #1) @ May fault strb r3, [r0], #1 -USER( T(ldrgeb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished @@ -375,7 +375,7 @@ USER( T(ldrgtb) r3, [r1], #1) @ May fault .Lcfu_src_not_aligned: bic r1, r1, #3 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault cmp ip, #2 bgt .Lcfu_3fupi beq .Lcfu_2fupi @@ -383,7 +383,7 @@ USER( T(ldr) r7, [r1], #4) @ May fault addmi ip, r2, #4 bmi .Lcfu_1nowords mov r3, r7, pull #8 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault orr r3, r3, r7, push #24 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @@ -418,7 +418,7 @@ USER( T(ldr) r7, [r1], #4) @ May fault stmneia r0!, {r3 - r4} tst ip, #4 movne r3, r7, pull #8 -USER( T(ldrne) r7, [r1], #4) @ May fault +USER( TUSER( ldrne) r7, [r1], #4) @ May fault orrne r3, r3, r7, push #24 strne r3, [r0], #4 ands ip, ip, #3 @@ -438,7 +438,7 @@ USER( T(ldrne) r7, [r1], #4) @ May fault addmi ip, r2, #4 bmi .Lcfu_2nowords mov r3, r7, pull #16 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault orr r3, r3, r7, push #16 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @@ -474,7 +474,7 @@ USER( T(ldr) r7, [r1], #4) @ May fault stmneia r0!, {r3 - r4} tst ip, #4 movne r3, r7, pull #16 -USER( T(ldrne) r7, [r1], #4) @ May fault +USER( TUSER( ldrne) r7, [r1], #4) @ May fault orrne r3, r3, r7, push #16 strne r3, [r0], #4 ands ip, ip, #3 @@ -486,7 +486,7 @@ USER( T(ldrne) r7, [r1], #4) @ May fault strb r3, [r0], #1 movge r3, r7, get_byte_3 strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #0) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished @@ -494,7 +494,7 @@ USER( T(ldrgtb) r3, [r1], #0) @ May fault addmi ip, r2, #4 bmi .Lcfu_3nowords mov r3, r7, pull #24 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault orr r3, r3, r7, push #8 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @@ -529,7 +529,7 @@ USER( T(ldr) r7, [r1], #4) @ May fault stmneia r0!, {r3 - r4} tst ip, #4 movne r3, r7, pull #24 -USER( T(ldrne) r7, [r1], #4) @ May fault +USER( TUSER( ldrne) r7, [r1], #4) @ May fault orrne r3, r3, r7, push #8 strne r3, [r0], #4 ands ip, ip, #3 @@ -539,9 +539,9 @@ USER( T(ldrne) r7, [r1], #4) @ May fault beq .Lcfu_finished cmp ip, #2 strb r3, [r0], #1 -USER( T(ldrgeb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished ENDPROC(__copy_from_user) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 9ac04ddf8ed..9b328e97a68 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -70,6 +70,14 @@ config TEGRA_PCI help Adds PCIe Host controller driver for tegra based systems +config TEGRA_AHB + bool "Enable AHB driver for NVIDIA Tegra SoCs" + default y + help + Adds AHB configuration functionality for NVIDIA Tegra SoCs, + which controls AHB bus master arbitration and some + perfomance parameters(priority, prefech size). + comment "Tegra board type" config MACH_HARMONY @@ -271,6 +279,108 @@ config TEGRA_EMC_SCALING_ENABLE depends on TEGRA_SILICON_PLATFORM default n +config VOLTAGE_CONTROL + bool "Enable user voltage control on Tegra CPU" + depends on TEGRA_SILICON_PLATFORM + default n + help + User custom voltage control interface + +config CUSTOM_BRIGHTNESS + bool "Custom Brightness Levels" + depends on TEGRA_SILICON_PLATFORM + default n + help + Allow custom min and max brightness + +config DEFAULT_DUAL_CORE + bool "Default to Dual-Core" + depends on TEGRA_SILICON_PLATFORM + default n + help + Set the default maximum number of cores to 2 instead of 4. + +config GPU_OVERCLOCK + bool "Enable GPU overclock for Tegra3" + depends on TEGRA_SILICON_PLATFORM + default n + help + Choose y to overclock the GPU. + If Off, clock speed is 416MHz. + If On, GPU clock speed can be selected. + +choice + + depends on GPU_OVERCLOCK + prompt "Maximum GPU Rate" + default GPU_OC_484 + ---help--- + Select the desired GPU overclock rate. + + If you are not sure what you are doing, leave this + option alone! + config GPU_OC_332 + bool "332 MHz" + config GPU_OC_446 + bool "446 MHz" + config GPU_OC_484 + bool "484 MHz" + config GPU_OC_520 + bool "520 MHz" + config GPU_OC_600 + bool "600 MHz" + config GPU_OC_666 + bool "666 MHz" + config GPU_OC_700 + bool "700 MHz" + +endchoice + +config LP_OVERCLOCK + bool "Enable LP overclock for Tegra3" + depends on TEGRA_SILICON_PLATFORM + default n + ---help--- + Choose y to overclock the LP core. + If Off, maximum clock speed is 475MHz. + If On, LP clock speed can be selected. + +choice + + depends on LP_OVERCLOCK + prompt "Maximum LP Rate" + default LP_OC_666 + ---help--- + Select the desired LP overclock rate. + + If you are not sure what you are doing, leave this + option alone! + config LP_OC_555 + bool "555 MHz" + config LP_OC_620 + bool "620 MHz" + config LP_OC_666 + bool "666 MHz" + config LP_OC_700 + bool "700 MHz" + config LP_OC_740 + bool "740 MHz" + +endchoice + +config LP_ONLY + bool "Only use the Low-Power companion core" + depends on TEGRA_SILICON_PLATFORM + default n + +config AUDIO_MIN_PERFLOCK + bool "Minimum Audio Playback performance lock" + depends on SND_SOC_TEGRA_RT5640 + default n + help + Tegra3 minimum Audio Performance Lock to prevent audio playback + underruns + config TEGRA_CPU_DVFS bool "Enable voltage scaling on Tegra CPU" depends on TEGRA_SILICON_PLATFORM @@ -411,6 +521,27 @@ config TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND help Also will restore to original cpu frequency governor when device is resumed +config TEGRA_LP1_950 + bool "LP1 low core voltage" + default n + depends on ARCH_TEGRA_3x_SOC + help + Enable support for LP1 Core voltage to set to lowest + +config TEGRA_RUNNABLE_THREAD + bool "Tegra3 runnable thread hotplug" + depends on ARCH_TEGRA_3x_SOC + default n + help + Tegra3 Runnable Thread calculations for CPU hotplug + +config TEGRA_VARIANT_INFO + bool "Tegra3 variant info" + depends on ARCH_TEGRA_3x_SOC + default y + help + Tegra3 SOC variant info display via debugfs + config TEGRA_STAT_MON bool "Enable H/W statistics monitor" depends on ARCH_TEGRA_2x_SOC diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile index 9fceb2d3511..9ea2aefe87a 100755 --- a/arch/arm/mach-tegra/Makefile +++ b/arch/arm/mach-tegra/Makefile @@ -79,8 +79,12 @@ obj-y += reset.o obj-$(CONFIG_TEGRA_SYSTEM_DMA) += dma.o obj-$(CONFIG_CPU_FREQ) += cpu-tegra.o ifeq ($(CONFIG_TEGRA_AUTO_HOTPLUG),y) +ifeq ($(CONFIG_CPUQUIET_FRAMEWORK),y) +obj-$(CONFIG_ARCH_TEGRA_3x_SOC) += cpuquiet.o +else obj-$(CONFIG_ARCH_TEGRA_3x_SOC) += cpu-tegra3.o endif +endif obj-$(CONFIG_TEGRA_PCI) += pcie.o obj-$(CONFIG_USB_SUPPORT) += usb_phy.o ifeq ($(CONFIG_CPU_IDLE),y) diff --git a/arch/arm/mach-tegra/Makefile.boot b/arch/arm/mach-tegra/Makefile.boot index d8cb9173cdf..e672efe4b82 100644 --- a/arch/arm/mach-tegra/Makefile.boot +++ b/arch/arm/mach-tegra/Makefile.boot @@ -8,3 +8,4 @@ initrd_phys-$(CONFIG_ARCH_TEGRA_3x_SOC) := 0x80800000 dtb-$(CONFIG_MACH_HARMONY) += tegra-harmony.dtb dtb-$(CONFIG_MACH_SEABOARD) += tegra-seaboard.dtb +dtb-$(CONFIG_MACH_GROUPER) += tegra30-grouper.dtb diff --git a/arch/arm/mach-tegra/apbio.c b/arch/arm/mach-tegra/apbio.c index e227331c2f0..6b6e07fd5d3 100644 --- a/arch/arm/mach-tegra/apbio.c +++ b/arch/arm/mach-tegra/apbio.c @@ -164,4 +164,4 @@ static int tegra_init_apb_dma(void) #endif return 0; } -arch_initcall(tegra_init_apb_dma); +subsys_initcall(tegra_init_apb_dma); diff --git a/arch/arm/mach-tegra/baseband-xmm-power.c b/arch/arm/mach-tegra/baseband-xmm-power.c index 11d9b3f309c..12f568d1f9f 100755 --- a/arch/arm/mach-tegra/baseband-xmm-power.c +++ b/arch/arm/mach-tegra/baseband-xmm-power.c @@ -562,6 +562,7 @@ void baseband_xmm_set_power_status(unsigned int status) baseband_xmm_power_driver_handle_resume(data); }*/ pr_info("L0\n"); + baseband_xmm_powerstate = status; value = gpio_get_value(data->modem.xmm.ipc_hsic_active); pr_debug("before L0 ipc_hsic_active=%d\n", value); if (!value) { @@ -580,6 +581,7 @@ void baseband_xmm_set_power_status(unsigned int status) break; case BBXMM_PS_L2: pr_info("L2\n"); + baseband_xmm_powerstate = status; wake_unlock(&wakelock); modem_sleep_flag = true; break; @@ -594,6 +596,7 @@ void baseband_xmm_set_power_status(unsigned int status) } } pr_info("L3\n"); + baseband_xmm_powerstate = status; if (wake_lock_active(&wakelock)) { pr_info("%s: releasing wakelock before L3\n", __func__); @@ -613,9 +616,9 @@ void baseband_xmm_set_power_status(unsigned int status) } else goto exit_without_state_change; default: + baseband_xmm_powerstate = status; break; } - baseband_xmm_powerstate = status; pr_debug("BB XMM POWER STATE = %d\n", status); return; @@ -706,6 +709,7 @@ irqreturn_t baseband_xmm_power_ipc_ap_wake_irq(int irq, void *dev_id) wakeup_pending = true; spin_unlock(&xmm_lock); pr_info("CP L3 -> L0\n"); + ril_change_modem_crash_mode(); } } /* save gpio state */ @@ -807,7 +811,7 @@ static void baseband_xmm_power_init2_work(struct work_struct *work) } else pr_err("%s: hsic_register is missing\n", __func__); register_hsic_device = false; - modem_reset_flag == 0; + modem_reset_flag = 0; } } diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index e05433934d7..a8a643b2668 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -39,6 +39,14 @@ #include "gpio-names.h" #include +#include +static bool otf_scaling = 0; +module_param(otf_scaling, bool, 0644); +static unsigned int min_backlight = 10; +module_param(min_backlight, uint, 0644); +static unsigned int max_backlight = 255; +module_param(max_backlight, uint, 0644); + /* grouper default display board pins */ #define grouper_lvds_avdd_en TEGRA_GPIO_PH6 #define grouper_lvds_rst TEGRA_GPIO_PG7 @@ -66,7 +74,7 @@ static struct regulator *grouper_lvds_reg; static struct regulator *grouper_lvds_vdd_panel; static tegra_dc_bl_output grouper_bl_output_measured = { - 0, 13, 13, 13, 13, 13, 13, 13, + 0, 2, 4, 6, 9, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, @@ -151,11 +159,38 @@ static int grouper_backlight_notify(struct device *unused, int brightness) brightness = (brightness * cur_sd_brightness) / 255; /* Apply any backlight response curve */ - if (brightness > 255) + if (brightness > 255) { pr_info("Error: Brightness > 255!\n"); - else - brightness = bl_output[brightness]; - + } else { +#ifdef CONFIG_CUSTOM_BRIGHTNESS + if ((min_backlight == 0) || (max_backlight == 0)) { +#endif + brightness = bl_output[brightness]; +#ifdef CONFIG_CUSTOM_BRIGHTNESS + } else { + if (otf_scaling == 0) { + int min_bl_adj = min_backlight; + /* Ensure that min backlight goes up to at least 10 to prevent auto-min != slider-min */ + if (min_backlight < 11) + min_bl_adj = 11; + if ((brightness > 0) && (brightness < min_bl_adj)) { + brightness = min_backlight; + } else if (brightness > max_backlight) { + brightness = max_backlight; + } else { + brightness = bl_output[brightness]; + } + } else { + if (brightness == 0) { + brightness = 0; + } else { + brightness = min_backlight + + DIV_ROUND_CLOSEST(((max_backlight - min_backlight) * max((brightness - 10),0)),245); + } + } + } +#endif + } return brightness; } @@ -410,7 +445,17 @@ static struct tegra_dc_sd_settings grouper_sd_settings = { .bin_width = -1, .aggressiveness = 1, .phase_in_adjustments = true, - .panel_min_brightness = 13, + .panel_min_brightness = 10, +#ifdef CONFIG_TEGRA_SD_GEN2 + .k_limit_enable = true, + .k_limit = 180, + .sd_window_enable = false, + .soft_clipping_enable = true, + /* Low soft clipping threshold to compensate for aggressive k_limit */ + .soft_clipping_threshold = 128, + .smooth_k_enable = true, + .smooth_k_incr = 4, +#endif .use_vid_luma = false, /* Default video coefficients */ .coeff = {5, 9, 2}, @@ -549,7 +594,7 @@ static struct tegra_dc_out grouper_disp1_out = { .type = TEGRA_DC_OUT_RGB, .depth = 18, - .dither = TEGRA_DC_ORDERED_DITHER, + .dither = TEGRA_DC_ERRDIFF_DITHER, .modes = grouper_panel_modes, .n_modes = ARRAY_SIZE(grouper_panel_modes), diff --git a/arch/arm/mach-tegra/board-grouper-power.c b/arch/arm/mach-tegra/board-grouper-power.c index 9780d5f555f..2c8ff7c7fc2 100755 --- a/arch/arm/mach-tegra/board-grouper-power.c +++ b/arch/arm/mach-tegra/board-grouper-power.c @@ -615,6 +615,14 @@ static struct tegra_suspend_platform_data grouper_suspend_data = { .cpu_lp2_min_residency = 2000, .board_suspend = grouper_board_suspend, .board_resume = grouper_board_resume, +#ifdef CONFIG_TEGRA_LP1_950 + .lp1_lowvolt_support = true, + .i2c_base_addr = TEGRA_I2C5_BASE, + .pmuslave_addr = 0x24, + .core_reg_addr = 0x5B, + .lp1_core_volt_low = 0x1D, + .lp1_core_volt_high = 0x33, +#endif }; int __init grouper_suspend_init(void) diff --git a/arch/arm/mach-tegra/board-grouper-sdhci.c b/arch/arm/mach-tegra/board-grouper-sdhci.c index 282a19db292..b2b639baa9b 100644 --- a/arch/arm/mach-tegra/board-grouper-sdhci.c +++ b/arch/arm/mach-tegra/board-grouper-sdhci.c @@ -40,16 +40,45 @@ static void (*wifi_status_cb)(int card_present, void *dev_id); static void *wifi_status_cb_devid; -static int grouper_wifi_status_register(void (*callback)(int , void *),void *); +static int grouper_wifi_status_register(void (*callback)(int, void *), void *); static int grouper_wifi_reset(int on); static int grouper_wifi_power(int on); static int grouper_wifi_set_carddetect(int val); +/* Customized Locale table : OPTIONAL feature */ +#define WLC_CNTRY_BUF_SZ 4 +typedef struct cntry_locales_custom { + char iso_abbrev[WLC_CNTRY_BUF_SZ]; + char custom_locale[WLC_CNTRY_BUF_SZ]; + int custom_locale_rev; +} cntry_locales_custom_t; + +static cntry_locales_custom_t grouper_wifi_translate_custom_table[] = { +/* Table should be filled out based on custom platform regulatory requirement */ + {"RU", "XY", 4}, + {"IR", "XY", 4} +}; + +static void *grouper_wifi_get_country_code(char *ccode) +{ + int size = ARRAY_SIZE(grouper_wifi_translate_custom_table); + int i; + + if (!ccode) + return NULL; + + for (i = 0; i < size; i++) + if (strcmp(ccode, grouper_wifi_translate_custom_table[i].iso_abbrev) == 0) + return &grouper_wifi_translate_custom_table[i]; + return NULL; +} + static struct wifi_platform_data grouper_wifi_control = { .set_power = grouper_wifi_power, .set_reset = grouper_wifi_reset, .set_carddetect = grouper_wifi_set_carddetect, + .get_country_code = grouper_wifi_get_country_code, }; static struct resource wifi_resource[] = { diff --git a/arch/arm/mach-tegra/board-grouper-sensors.c b/arch/arm/mach-tegra/board-grouper-sensors.c index 0b3ef7b8772..08141bff9ab 100644 --- a/arch/arm/mach-tegra/board-grouper-sensors.c +++ b/arch/arm/mach-tegra/board-grouper-sensors.c @@ -40,6 +40,9 @@ #define CAM1_LDO_EN_GPIO TEGRA_GPIO_PR6 #define FRONT_YUV_SENSOR_RST_GPIO TEGRA_GPIO_PO0 +#define FRONT_YUV_SENSOR_RST_GPIO_BACH TEGRA_GPIO_PBB0 + +static int front_yuv_sensor_rst_gpio = FRONT_YUV_SENSOR_RST_GPIO; static struct regulator *grouper_1v8_ldo5; static struct regulator *grouper_1v8_cam3; @@ -63,6 +66,11 @@ static const struct i2c_board_info cap1106_i2c1_board_info[] = { static int grouper_camera_init(void) { + u32 project_info = grouper_get_project_id(); + + if (project_info == GROUPER_PROJECT_BACH) + front_yuv_sensor_rst_gpio = FRONT_YUV_SENSOR_RST_GPIO_BACH; + pmic_id = grouper_query_pmic_id(); printk("%s: pmic_id= 0x%X", __FUNCTION__, pmic_id); #if 0 @@ -165,20 +173,6 @@ static int yuv_front_sensor_power_on(void) int ret; printk("yuv_front_sensor_power_on+\n"); - /* AVDD_CAM1, 2.85V, controlled by CAM1_LDO_EN */ - pr_info("gpio %d read as %d\n",CAM1_LDO_EN_GPIO, gpio_get_value(CAM1_LDO_EN_GPIO)); - tegra_gpio_enable(CAM1_LDO_EN_GPIO); - ret = gpio_request(CAM1_LDO_EN_GPIO, "cam1_ldo_en"); - if (ret < 0) - pr_err("%s: gpio_request failed for gpio %s, ret= %d\n", - __func__, "CAM1_LDO_EN_GPIO", ret); - pr_info("gpio %d: %d", CAM1_LDO_EN_GPIO, gpio_get_value(CAM1_LDO_EN_GPIO)); - gpio_set_value(CAM1_LDO_EN_GPIO, 1); - gpio_direction_output(CAM1_LDO_EN_GPIO, 1); - pr_info("--> %d\n", gpio_get_value(CAM1_LDO_EN_GPIO)); - - msleep(5); - if (!grouper_1v8_ldo5) { if(pmic_id == GROUPER_PMIC_MAXIM) { grouper_1v8_ldo5 = regulator_get(NULL, "vdd_sensor_1v8"); @@ -186,43 +180,49 @@ static int yuv_front_sensor_power_on(void) grouper_1v8_ldo5 = regulator_get(NULL, "avdd_vdac"); } if (IS_ERR_OR_NULL(grouper_1v8_ldo5)) { + if (grouper_1v8_ldo5) { + regulator_put(grouper_1v8_ldo5); + } grouper_1v8_ldo5 = NULL; - pr_err("Can't get grouper_1v8_ldo5.\n"); - goto fail_to_get_reg; + pr_err("%s-: Can't get grouper_1v8_ldo5.\n", __func__); + return -ENODEV; } regulator_set_voltage(grouper_1v8_ldo5, 1800000, 1800000); regulator_enable(grouper_1v8_ldo5); } + msleep(10); + + /* AVDD_CAM1, 2.85V, controlled by CAM1_LDO_EN */ + pr_info("gpio %d read as %d\n",CAM1_LDO_EN_GPIO, gpio_get_value(CAM1_LDO_EN_GPIO)); + tegra_gpio_enable(CAM1_LDO_EN_GPIO); + ret = gpio_request(CAM1_LDO_EN_GPIO, "cam1_ldo_en"); + if (ret < 0) + pr_err("%s: gpio_request failed for gpio %s, ret= %d\n", + __func__, "CAM1_LDO_EN_GPIO", ret); + pr_info("gpio %d: %d", CAM1_LDO_EN_GPIO, gpio_get_value(CAM1_LDO_EN_GPIO)); + gpio_set_value(CAM1_LDO_EN_GPIO, 1); + gpio_direction_output(CAM1_LDO_EN_GPIO, 1); + pr_info("--> %d\n", gpio_get_value(CAM1_LDO_EN_GPIO)); + tegra_pinmux_set_tristate(TEGRA_PINGROUP_CAM_MCLK, TEGRA_TRI_NORMAL); + msleep(10); + /* yuv_sensor_rst_lo*/ - tegra_gpio_enable(FRONT_YUV_SENSOR_RST_GPIO); - ret = gpio_request(FRONT_YUV_SENSOR_RST_GPIO, "yuv_sensor_rst_lo"); + tegra_gpio_enable(front_yuv_sensor_rst_gpio); + ret = gpio_request(front_yuv_sensor_rst_gpio, "yuv_sensor_rst_lo"); if (ret < 0) pr_err("%s: gpio_request failed for gpio %s, ret= %d\n", __func__, "FRONT_YUV_SENSOR_RST_GPIO", ret); - pr_info("gpio %d: %d", FRONT_YUV_SENSOR_RST_GPIO, gpio_get_value(FRONT_YUV_SENSOR_RST_GPIO)); - gpio_set_value(FRONT_YUV_SENSOR_RST_GPIO, 1); - gpio_direction_output(FRONT_YUV_SENSOR_RST_GPIO, 1); - pr_info("--> %d\n", gpio_get_value(FRONT_YUV_SENSOR_RST_GPIO)); + pr_info("gpio %d: %d", front_yuv_sensor_rst_gpio, gpio_get_value(front_yuv_sensor_rst_gpio)); + gpio_set_value(front_yuv_sensor_rst_gpio, 1); + gpio_direction_output(front_yuv_sensor_rst_gpio, 1); + pr_info("--> %d\n", gpio_get_value(front_yuv_sensor_rst_gpio)); printk("yuv_front_sensor_power_on-\n"); return 0; - -fail_to_get_reg: - if (grouper_1v8_ldo5) { - regulator_put(grouper_1v8_ldo5); - grouper_1v8_ldo5 = NULL; - } - - gpio_set_value(CAM1_LDO_EN_GPIO, 0); - gpio_direction_output(CAM1_LDO_EN_GPIO, 0); - gpio_free(CAM1_LDO_EN_GPIO); - - printk("yuv_front_sensor_power_on- : -ENODEV\n"); - return -ENODEV; } static int yuv_front_sensor_power_off(void) @@ -230,24 +230,26 @@ static int yuv_front_sensor_power_off(void) printk("%s+\n", __FUNCTION__); if((pmic_id == GROUPER_PMIC_MAXIM) || (pmic_id == GROUPER_PMIC_TI)) { - gpio_set_value(FRONT_YUV_SENSOR_RST_GPIO, 0); - gpio_direction_output(FRONT_YUV_SENSOR_RST_GPIO, 0); - gpio_free(FRONT_YUV_SENSOR_RST_GPIO); + gpio_set_value(front_yuv_sensor_rst_gpio, 0); + gpio_direction_output(front_yuv_sensor_rst_gpio, 0); + gpio_free(front_yuv_sensor_rst_gpio); + + msleep(10); tegra_pinmux_set_tristate(TEGRA_PINGROUP_CAM_MCLK, TEGRA_TRI_TRISTATE); + gpio_set_value(CAM1_LDO_EN_GPIO, 0); + gpio_direction_output(CAM1_LDO_EN_GPIO, 0); + gpio_free(CAM1_LDO_EN_GPIO); + + msleep(10); + if (grouper_1v8_ldo5) { regulator_disable(grouper_1v8_ldo5); regulator_put(grouper_1v8_ldo5); grouper_1v8_ldo5 = NULL; } - msleep(5); - - gpio_set_value(CAM1_LDO_EN_GPIO, 0); - gpio_direction_output(CAM1_LDO_EN_GPIO, 0); - gpio_free(CAM1_LDO_EN_GPIO); - printk("%s-\n", __FUNCTION__); return 0; } else { diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c index 162063a8529..67ea3c39cbe 100644 --- a/arch/arm/mach-tegra/clock.c +++ b/arch/arm/mach-tegra/clock.c @@ -525,13 +525,11 @@ unsigned long clk_get_rate_all_locked(struct clk *c) return rate; } -long clk_round_rate(struct clk *c, unsigned long rate) +long clk_round_rate_locked(struct clk *c, unsigned long rate) { - unsigned long flags, max_rate; + unsigned long max_rate; long ret; - clk_lock_save(c, &flags); - if (!c->ops || !c->ops->round_rate) { ret = -ENOSYS; goto out; @@ -544,6 +542,16 @@ long clk_round_rate(struct clk *c, unsigned long rate) ret = c->ops->round_rate(c, rate); out: + return ret; +} + +long clk_round_rate(struct clk *c, unsigned long rate) +{ + unsigned long flags; + long ret; + + clk_lock_save(c, &flags); + ret = clk_round_rate_locked(c, rate); clk_unlock_restore(c, &flags); return ret; } @@ -684,7 +692,10 @@ void __init tegra_init_max_rate(struct clk *c, unsigned long max_rate) pr_warning("Lowering %s maximum rate from %lu to %lu\n", c->name, c->max_rate, max_rate); - + if(!strncmp(c->name,"cpu_g",strlen("cpu_g"))){ + pr_warning("Keep max_rate of %s as %lu \n",c->name, c->max_rate); + return; + } c->max_rate = max_rate; list_for_each_entry(shared_bus_user, &c->shared_bus_list, u.shared_bus_user.node) { diff --git a/arch/arm/mach-tegra/clock.h b/arch/arm/mach-tegra/clock.h index dde9e07292a..a9945e3c229 100644 --- a/arch/arm/mach-tegra/clock.h +++ b/arch/arm/mach-tegra/clock.h @@ -240,6 +240,7 @@ unsigned long clk_get_min_rate(struct clk *c); unsigned long clk_get_rate_locked(struct clk *c); int clk_set_rate_locked(struct clk *c, unsigned long rate); int clk_set_parent_locked(struct clk *c, struct clk *parent); +long clk_round_rate_locked(struct clk *c, unsigned long rate); int tegra_clk_shared_bus_update(struct clk *c); void tegra2_sdmmc_tap_delay(struct clk *c, int delay); void tegra3_set_cpu_skipper_delay(int delay); diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index d91ad83fd6a..4eabbdc436d 100755 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -103,7 +103,7 @@ static struct board_info pmu_board_info; static struct board_info display_board_info; static struct board_info camera_board_info; -static int pmu_core_edp = 1200; /* default 1.2V EDP limit */ +static int pmu_core_edp = 1700; /* default 1.2V EDP limit */ static int board_panel_type; static enum power_supply_type pow_supply_type = POWER_SUPPLY_TYPE_MAINS; @@ -172,7 +172,7 @@ static __initdata struct tegra_clk_init_table common_clk_init_table[] = { { "pll_p_out2", "pll_p", 48000000, false }, { "pll_p_out3", "pll_p", 72000000, true }, { "pll_p_out4", "pll_p", 108000000, false }, - { "pll_m", "clk_m", 0, true }, + { "pll_m", "pll_ref", 0, true }, { "pll_m_out1", "pll_m", 120000000, true }, { "sclk", "pll_c_out1", 40000000, true }, { "hclk", "sclk", 40000000, true }, @@ -961,13 +961,20 @@ void __init tegra_ram_console_debug_reserve(unsigned long ram_console_size) { struct resource *res; long ret; + unsigned long real_start, real_size; res = platform_get_resource(&ram_console_device, IORESOURCE_MEM, 0); if (!res) goto fail; + res->start = memblock_end_of_DRAM() - ram_console_size; res->end = res->start + ram_console_size - 1; - ret = memblock_remove(res->start, ram_console_size); + + // Register an extra 1M before ramconsole to store kexec stuff + real_start = res->start - SZ_1M; + real_size = ram_console_size + SZ_1M; + + ret = memblock_remove(real_start, real_size); if (ret) goto fail; diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index 871988dc280..0d75fcd5e61 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -56,7 +56,12 @@ static DEFINE_MUTEX(tegra_cpu_lock); static bool is_suspended; static int suspend_index; -static bool force_policy_max; +static bool force_policy_max = 1; +static bool coldstart = 1; + +#define TEGRA3_OVERCLOCK +#define TEGRA3_DYNAMIC_EDP_THRES_TEMP (60) +static bool edp_enable = 1; static int force_policy_max_set(const char *arg, const struct kernel_param *kp) { @@ -220,6 +225,16 @@ int tegra_edp_update_thermal_zone(int temperature) int nlimits = cpu_edp_limits_size; int index; +#ifdef TEGRA3_OVERCLOCK + if(temperature >= TEGRA3_DYNAMIC_EDP_THRES_TEMP) { + edp_enable = 1; + pr_info("%s: Dynamic EDP enabled, temp: %u\n", __func__, temperature); + } else { + edp_enable = 0; + pr_info("%s: Dynamic EDP disabled, temp: %u\n", __func__, temperature); + } +#endif + if (!cpu_edp_limits) return -EINVAL; @@ -324,16 +339,26 @@ static int tegra_cpu_edp_notify( edp_update_limit(); cpu_speed = tegra_getspeed(0); + +#ifdef TEGRA3_OVERCLOCK + if(edp_enable) { + new_speed = edp_governor_speed(new_speed); + } else { + new_speed = cpu_speed; + } +#else new_speed = edp_governor_speed(cpu_speed); +#endif if (new_speed < cpu_speed) { ret = tegra_cpu_set_speed_cap(NULL); if (ret) { cpu_clear(cpu, edp_cpumask); edp_update_limit(); } + if (new_speed > 1000000) + printk(KERN_DEBUG "tegra CPU:%sforce EDP limit %u kHz" + "\n", ret ? " failed to " : " ", new_speed); - printk(KERN_DEBUG "tegra CPU:%sforce EDP limit %u kHz" - "\n", ret ? " failed to " : " ", new_speed); } mutex_unlock(&tegra_cpu_lock); break; @@ -468,7 +493,7 @@ unsigned int tegra_getspeed(unsigned int cpu) rate = clk_get_rate(cpu_clk) / 1000; return rate; } -extern bool stress_test_enable; +//extern bool stress_test_enable; int tegra_update_cpu_speed(unsigned long rate) { int ret = 0; @@ -506,10 +531,10 @@ int tegra_update_cpu_speed(unsigned long rate) for_each_online_cpu(freqs.cpu) cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - if(stress_test_enable) - printk(KERN_DEBUG "cpufreq-tegra: transition: %u --> %u\n", - freqs.old, freqs.new); - +// if(stress_test_enable) +// printk(KERN_DEBUG "cpufreq-tegra: transition: %u --> %u\n", +// freqs.old, freqs.new); +// ret = clk_set_rate(cpu_clk, freqs.new * 1000); if (ret) { pr_err("cpu-tegra: Failed to set cpu frequency to %d kHz\n", @@ -584,7 +609,15 @@ int tegra_cpu_set_speed_cap(unsigned int *speed_cap) return -EBUSY; new_speed = tegra_throttle_governor_speed(new_speed); + +#ifdef TEGRA3_OVERCLOCK + if(edp_enable) { + new_speed = edp_governor_speed(new_speed); + } +#else new_speed = edp_governor_speed(new_speed); +#endif + new_speed = user_cap_speed(new_speed); if (speed_cap) *speed_cap = new_speed; @@ -604,7 +637,14 @@ int tegra_suspended_target(unsigned int target_freq) /* apply only "hard" caps */ new_speed = tegra_throttle_governor_speed(new_speed); +#ifdef TEGRA3_OVERCLOCK + if(edp_enable) { + pr_info("%s : Dynamic EDP is enabled\n", __func__); + new_speed = edp_governor_speed(new_speed); + } +#else new_speed = edp_governor_speed(new_speed); +#endif return tegra_update_cpu_speed(new_speed); } @@ -688,7 +728,7 @@ static int tegra_cpu_init(struct cpufreq_policy *policy) target_cpu_speed[policy->cpu] = policy->cur; /* FIXME: what's the actual transition time? */ - policy->cpuinfo.transition_latency = 300 * 1000; + policy->cpuinfo.transition_latency = 40 * 1000; policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; cpumask_copy(policy->related_cpus, cpu_possible_mask); @@ -697,6 +737,11 @@ static int tegra_cpu_init(struct cpufreq_policy *policy) register_pm_notifier(&tegra_cpu_pm_notifier); } + if (coldstart == 1) { + policy->max = 1300000; + coldstart = 0; + } + return 0; } diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index cc01ed854a5..2a9119c39b4 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -39,8 +39,8 @@ #include "clock.h" #define INITIAL_STATE TEGRA_HP_DISABLED -#define UP2G0_DELAY_MS 70 -#define UP2Gn_DELAY_MS 100 +#define UP2G0_DELAY_MS 300 +#define UP2Gn_DELAY_MS 150 #define DOWN_DELAY_MS 2000 static struct mutex *tegra3_cpu_lock; @@ -66,13 +66,17 @@ module_param(idle_bottom_freq, uint, 0644); static int mp_overhead = 10; module_param(mp_overhead, int, 0644); -static int balance_level = 75; +static int balance_level = 60; module_param(balance_level, int, 0644); static struct clk *cpu_clk; static struct clk *cpu_g_clk; static struct clk *cpu_lp_clk; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD +static unsigned long last_change_time; +#endif + static struct { cputime64_t time_up_total; u64 last_update; @@ -147,9 +151,12 @@ static int hp_state_set(const char *arg, const struct kernel_param *kp) if (ret == 0) { if ((hp_state == TEGRA_HP_DISABLED) && - (old_state != TEGRA_HP_DISABLED)) - pr_info("Tegra auto-hotplug disabled\n"); - else if (hp_state != TEGRA_HP_DISABLED) { + (old_state != TEGRA_HP_DISABLED)) { + mutex_unlock(tegra3_cpu_lock); + cancel_delayed_work_sync(&hotplug_work); + mutex_lock(tegra3_cpu_lock); + pr_info("Tegra auto-hotplug disabled\n"); + } else if (hp_state != TEGRA_HP_DISABLED) { if (old_state == TEGRA_HP_DISABLED) { pr_info("Tegra auto-hotplug enabled\n"); hp_init_stats(); @@ -183,6 +190,16 @@ enum { TEGRA_CPU_SPEED_SKEWED, }; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD +#define NR_FSHIFT 2 +static unsigned int nr_run_thresholds[] = { +/* 1, 2, 3, 4 - on-line cpus target */ + 5, 9, 13, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ +}; +static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ +static unsigned int nr_run_last; +#endif + static noinline int tegra_cpu_speed_balance(void) { unsigned long highest_speed = tegra_cpu_highest_speed(); @@ -191,17 +208,42 @@ static noinline int tegra_cpu_speed_balance(void) unsigned int nr_cpus = num_online_cpus(); unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; unsigned int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + unsigned int avg_nr_run = avg_nr_running(); + unsigned int nr_run; + + /* Evaluate: + * - distribution of freq targets for already on-lined CPUs + * - average number of runnable threads + * - effective MIPS available within EDP frequency limits, + * and return: + * TEGRA_CPU_SPEED_BALANCED to bring one more CPU core on-line + * TEGRA_CPU_SPEED_BIASED to keep CPU core composition unchanged + * TEGRA_CPU_SPEED_SKEWED to remove CPU core off-line + */ + for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) { + unsigned int nr_threshold = nr_run_thresholds[nr_run - 1]; + if (nr_run_last <= nr_run) + nr_threshold += nr_run_hysteresis; + if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT))) + break; + } + nr_run_last = nr_run; +#endif - /* balanced: freq targets for all CPUs are above 50% of highest speed - biased: freq target for at least one CPU is below 50% threshold - skewed: freq targets for at least 2 CPUs are below 25% threshold */ if (((tegra_count_slow_cpus(skewed_speed) >= 2) || +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + (nr_run < nr_cpus) || +#endif tegra_cpu_edp_favor_down(nr_cpus, mp_overhead) || (highest_speed <= idle_bottom_freq) || (nr_cpus > max_cpus)) && (nr_cpus > min_cpus)) return TEGRA_CPU_SPEED_SKEWED; if (((tegra_count_slow_cpus(balanced_speed) >= 1) || +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + (nr_run <= nr_cpus) || +#endif (!tegra_cpu_edp_favor_up(nr_cpus, mp_overhead)) || (highest_speed <= idle_bottom_freq) || (nr_cpus == max_cpus)) && (nr_cpus >= min_cpus)) @@ -219,7 +261,9 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) bool up = false; unsigned int cpu = nr_cpu_ids; unsigned long now = jiffies; +#ifndef CONFIG_TEGRA_RUNNABLE_THREAD static unsigned long last_change_time; +#endif mutex_lock(tegra3_cpu_lock); @@ -232,7 +276,12 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) if (cpu < nr_cpu_ids) { up = false; } else if (!is_lp_cluster() && !no_lp && +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + !pm_qos_request(PM_QOS_MIN_ONLINE_CPUS) && + ((now - last_change_time) >= down_delay)) { +#else !pm_qos_request(PM_QOS_MIN_ONLINE_CPUS)) { +#endif if(!clk_set_parent(cpu_clk, cpu_lp_clk)) { hp_stats_update(CONFIG_NR_CPUS, true); hp_stats_update(0, false); @@ -242,11 +291,18 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) } } queue_delayed_work( +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + hotplug_wq, &hotplug_work, up2gn_delay); +#else hotplug_wq, &hotplug_work, down_delay); +#endif break; case TEGRA_HP_UP: if (is_lp_cluster() && !no_lp) { if(!clk_set_parent(cpu_clk, cpu_g_clk)) { +#ifndef CONFIG_TEGRA_RUNNABLE_THREAD + last_change_time = now; +#endif hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); /* catch-up with governor target speed */ @@ -291,13 +347,13 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) if (cpu < nr_cpu_ids) { if (up){ - printk("cpu_up(%u)+\n",cpu); + pr_debug("cpu_up(%u)+\n",cpu); cpu_up(cpu); - printk("cpu_up(%u)-\n",cpu); + pr_debug("cpu_up(%u)-\n",cpu); }else{ - printk("cpu_down(%u)+\n",cpu); + pr_debug("cpu_down(%u)+\n",cpu); cpu_down(cpu); - printk("cpu_down(%u)-\n",cpu); + pr_debug("cpu_down(%u)-\n",cpu); } } } @@ -313,6 +369,9 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) tegra_update_cpu_speed(speed); if (!clk_set_parent(cpu_clk, cpu_g_clk)) { +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + last_change_time = jiffies; +#endif hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); } @@ -378,7 +437,11 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) } else if (cpu_freq <= bottom_freq) { hp_state = TEGRA_HP_DOWN; queue_delayed_work( +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + hotplug_wq, &hotplug_work, up2gn_delay); +#else hotplug_wq, &hotplug_work, down_delay); +#endif } break; case TEGRA_HP_DOWN: @@ -394,7 +457,11 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) if (cpu_freq <= bottom_freq) { hp_state = TEGRA_HP_DOWN; queue_delayed_work( +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + hotplug_wq, &hotplug_work, up_delay); +#else hotplug_wq, &hotplug_work, down_delay); +#endif } else if (cpu_freq <= top_freq) { hp_state = TEGRA_HP_IDLE; } @@ -538,7 +605,11 @@ static int __init tegra_auto_hotplug_debug_init(void) pm_qos_add_request(&min_cpu_req, PM_QOS_MIN_ONLINE_CPUS, PM_QOS_DEFAULT_VALUE); pm_qos_add_request(&max_cpu_req, PM_QOS_MAX_ONLINE_CPUS, +#ifdef DEFAULT_DUAL_CORE + (s32)2); +#else PM_QOS_DEFAULT_VALUE); +#endif if (!debugfs_create_file( "min_cpus", S_IRUGO, hp_debugfs_root, NULL, &min_cpus_fops)) diff --git a/arch/arm/mach-tegra/cpuidle.c b/arch/arm/mach-tegra/cpuidle.c index 47d5996e596..0e0ec7f2dbf 100644 --- a/arch/arm/mach-tegra/cpuidle.c +++ b/arch/arm/mach-tegra/cpuidle.c @@ -80,7 +80,7 @@ static int tegra_idle_enter_lp3(struct cpuidle_device *dev, return (int)us; } -static bool lp2_in_idle __read_mostly = false; +static bool lp2_in_idle __read_mostly = true; #ifdef CONFIG_PM_SLEEP static bool lp2_in_idle_modifiable __read_mostly = true; diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c new file mode 100644 index 00000000000..f212e9b632a --- /dev/null +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -0,0 +1,465 @@ +/* + * arch/arm/mach-tegra/cpuquiet.c + * + * Cpuquiet driver for Tegra3 CPUs + * + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pm.h" +#include "cpu-tegra.h" +#include "clock.h" + +#define INITIAL_STATE TEGRA_CPQ_IDLE +#define UP2G_DELAY_MS 300 +#define UP_DELAY_MS 150 +#define DOWN2LP_DELAY_MS 3000 +#define DOWN_DELAY_MS 2000 + +static struct mutex *tegra3_cpu_lock; +static struct workqueue_struct *cpuquiet_wq; +static struct delayed_work cpuquiet_work; +static struct work_struct minmax_work; + +static struct kobject *tegra_auto_sysfs_kobject; + +static bool no_lp; +static bool enable; +static unsigned long up_delay; +static unsigned long up2g_delay; +static unsigned long down_delay; +static unsigned long down2lp_delay; +static int mp_overhead = 10; +static unsigned int idle_top_freq; +static unsigned int idle_bottom_freq; + +static struct clk *cpu_clk; +static struct clk *cpu_g_clk; +static struct clk *cpu_lp_clk; + +static struct cpumask cr_online_requests; + +enum { + TEGRA_CPQ_DISABLED = 0, + TEGRA_CPQ_IDLE, + TEGRA_CPQ_SWITCH_TO_LP, + TEGRA_CPQ_SWITCH_TO_G, +}; + +static int cpq_state; + +static int update_core_config(unsigned int cpunumber, bool up) +{ + int ret = -EINVAL; + unsigned int nr_cpus = num_online_cpus(); + int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; + int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); + + if (cpq_state == TEGRA_CPQ_DISABLED || cpunumber >= nr_cpu_ids) + return ret; + + if (up) { + if(is_lp_cluster()) { + cpumask_set_cpu(cpunumber, &cr_online_requests); + ret = -EBUSY; + } else { + if (tegra_cpu_edp_favor_up(nr_cpus, mp_overhead) && + nr_cpus < max_cpus) + ret = cpu_up(cpunumber); + } + } else { + if (is_lp_cluster()) { + ret = -EBUSY; + } else { + if (nr_cpus > min_cpus) + ret = cpu_down(cpunumber); + } + } + + return ret; +} + +static int tegra_quiesence_cpu(unsigned int cpunumber) +{ + return update_core_config(cpunumber, false); +} + +static int tegra_wake_cpu(unsigned int cpunumber) +{ + return update_core_config(cpunumber, true); +} + +static struct cpuquiet_driver tegra_cpuquiet_driver = { + .name = "tegra", + .quiesence_cpu = tegra_quiesence_cpu, + .wake_cpu = tegra_wake_cpu, +}; + +static void apply_core_config(void) +{ + unsigned int cpu; + + if (is_lp_cluster() || cpq_state == TEGRA_CPQ_DISABLED) + return; + + for_each_cpu_mask(cpu, cr_online_requests) { + if (cpu < nr_cpu_ids && !cpu_online(cpu)) + if (!tegra_wake_cpu(cpu)) + cpumask_clear_cpu(cpu, &cr_online_requests); + } +} + +static void tegra_cpuquiet_work_func(struct work_struct *work) +{ + int device_busy = -1; + + mutex_lock(tegra3_cpu_lock); + + switch(cpq_state) { + case TEGRA_CPQ_DISABLED: + case TEGRA_CPQ_IDLE: + break; + case TEGRA_CPQ_SWITCH_TO_G: + if (is_lp_cluster()) { + if(!clk_set_parent(cpu_clk, cpu_g_clk)) { + /*catch-up with governor target speed */ + tegra_cpu_set_speed_cap(NULL); + /* process pending core requests*/ + device_busy = 0; + } + } + break; + case TEGRA_CPQ_SWITCH_TO_LP: + if (!is_lp_cluster() && !no_lp && + num_online_cpus() == 1) { + if (!clk_set_parent(cpu_clk, cpu_lp_clk)) { + /*catch-up with governor target speed*/ + tegra_cpu_set_speed_cap(NULL); + device_busy = 1; + } + } + break; + default: + pr_err("%s: invalid tegra hotplug state %d\n", + __func__, cpq_state); + } + + mutex_unlock(tegra3_cpu_lock); + + if (device_busy == 1) { + cpuquiet_device_busy(); + } else if (!device_busy) { + apply_core_config(); + cpuquiet_device_free(); + } +} + +static void min_max_constraints_workfunc(struct work_struct *work) +{ + int count = -1; + bool up = false; + unsigned int cpu; + + int nr_cpus = num_online_cpus(); + int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; + int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); + + if (is_lp_cluster()) + return; + + if (nr_cpus < min_cpus) { + up = true; + count = min_cpus - nr_cpus; + } else if (nr_cpus > max_cpus && max_cpus >= min_cpus) { + count = nr_cpus - max_cpus; + } + + for (;count > 0; count--) { + if (up) { + cpu = cpumask_next_zero(0, cpu_online_mask); + if (cpu < nr_cpu_ids) + cpu_up(cpu); + else + break; + } else { + cpu = cpumask_next(0, cpu_online_mask); + if (cpu < nr_cpu_ids) + cpu_down(cpu); + else + break; + } + } +} + +static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) +{ + bool g_cluster = false; + + mutex_lock(tegra3_cpu_lock); + + if ((n >= 2) && is_lp_cluster()) { + /* make sure cpu rate is within g-mode range before switching */ + unsigned long speed = max((unsigned long)tegra_getspeed(0), + clk_get_min_rate(cpu_g_clk) / 1000); + tegra_update_cpu_speed(speed); + + clk_set_parent(cpu_clk, cpu_g_clk); + g_cluster = true; + } + + tegra_cpu_set_speed_cap(NULL); + mutex_unlock(tegra3_cpu_lock); + + schedule_work(&minmax_work); + + if (g_cluster) + cpuquiet_device_free(); + + return NOTIFY_OK; +} + +static int max_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) +{ + if (n < num_online_cpus()) + schedule_work(&minmax_work); + + return NOTIFY_OK; +} + +void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) +{ + if (!is_g_cluster_present()) + return; + + if (cpq_state == TEGRA_CPQ_DISABLED) + return; + + if (suspend) { + cpq_state = TEGRA_CPQ_IDLE; + + /* Switch to G-mode if suspend rate is high enough */ + if (is_lp_cluster() && (cpu_freq >= idle_bottom_freq)) { + clk_set_parent(cpu_clk, cpu_g_clk); + cpuquiet_device_free(); + } + return; + } + + if (is_lp_cluster() && pm_qos_request(PM_QOS_MIN_ONLINE_CPUS) >= 2) { + if (cpq_state != TEGRA_CPQ_SWITCH_TO_G) { + /* Force switch */ + cpq_state = TEGRA_CPQ_SWITCH_TO_G; + queue_delayed_work( + cpuquiet_wq, &cpuquiet_work, up_delay); + } + return; + } + + if (is_lp_cluster() && (cpu_freq >= idle_top_freq || no_lp)) { + cpq_state = TEGRA_CPQ_SWITCH_TO_G; + queue_delayed_work(cpuquiet_wq, &cpuquiet_work, up2g_delay); + } else if (!is_lp_cluster() && !no_lp && + cpu_freq <= idle_bottom_freq) { + cpq_state = TEGRA_CPQ_SWITCH_TO_LP; + queue_delayed_work(cpuquiet_wq, &cpuquiet_work, down2lp_delay); + } else { + cpq_state = TEGRA_CPQ_IDLE; + } +} + +static struct notifier_block min_cpus_notifier = { + .notifier_call = min_cpus_notify, +}; + +static struct notifier_block max_cpus_notifier = { + .notifier_call = max_cpus_notify, +}; + +static void delay_callback(struct cpuquiet_attribute *attr) +{ + unsigned long val; + + if (attr) { + val = (*((unsigned long *)(attr->param))); + (*((unsigned long *)(attr->param))) = msecs_to_jiffies(val); + } +} + +static void enable_callback(struct cpuquiet_attribute *attr) +{ + int disabled = -1; + + mutex_lock(tegra3_cpu_lock); + + if (!enable && cpq_state != TEGRA_CPQ_DISABLED) { + disabled = 1; + cpq_state = TEGRA_CPQ_DISABLED; + } else if (enable && cpq_state == TEGRA_CPQ_DISABLED) { + disabled = 0; + cpq_state = TEGRA_CPQ_IDLE; + tegra_cpu_set_speed_cap(NULL); + } + + mutex_unlock(tegra3_cpu_lock); + + if (disabled == -1) + return; + + if (disabled == 1) { + cancel_delayed_work_sync(&cpuquiet_work); + pr_info("Tegra cpuquiet clusterswitch disabled\n"); + cpuquiet_device_busy(); + } else if (!disabled) { + pr_info("Tegra cpuquiet clusterswitch enabled\n"); + cpuquiet_device_free(); + } +} + +CPQ_BASIC_ATTRIBUTE(no_lp, 0644, bool); +CPQ_BASIC_ATTRIBUTE(idle_top_freq, 0644, uint); +CPQ_BASIC_ATTRIBUTE(idle_bottom_freq, 0644, uint); +CPQ_BASIC_ATTRIBUTE(mp_overhead, 0644, int); +CPQ_ATTRIBUTE(up_delay, 0644, ulong, delay_callback); +CPQ_ATTRIBUTE(up2g_delay, 0644, ulong, delay_callback); +CPQ_ATTRIBUTE(down_delay, 0644, ulong, delay_callback); +CPQ_ATTRIBUTE(down2lp_delay, 0644, ulong, delay_callback); +CPQ_ATTRIBUTE(enable, 0644, bool, enable_callback); + +static struct attribute *tegra_auto_attributes[] = { + &no_lp_attr.attr, + &up_delay_attr.attr, + &up2g_delay_attr.attr, + &down_delay_attr.attr, + &down2lp_delay_attr.attr, + &idle_top_freq_attr.attr, + &idle_bottom_freq_attr.attr, + &mp_overhead_attr.attr, + &enable_attr.attr, + NULL, +}; + +static const struct sysfs_ops tegra_auto_sysfs_ops = { + .show = cpuquiet_auto_sysfs_show, + .store = cpuquiet_auto_sysfs_store, +}; + +static struct kobj_type ktype_sysfs = { + .sysfs_ops = &tegra_auto_sysfs_ops, + .default_attrs = tegra_auto_attributes, +}; + +static int tegra_auto_sysfs(void) +{ + int err; + + tegra_auto_sysfs_kobject = kzalloc(sizeof(*tegra_auto_sysfs_kobject), + GFP_KERNEL); + + if (!tegra_auto_sysfs_kobject) + return -ENOMEM; + + err = cpuquiet_kobject_init(tegra_auto_sysfs_kobject, &ktype_sysfs, + "tegra_cpuquiet"); + + if (err) + kfree(tegra_auto_sysfs_kobject); + + return err; +} + +int tegra_auto_hotplug_init(struct mutex *cpu_lock) +{ + int err; + + cpu_clk = clk_get_sys(NULL, "cpu"); + cpu_g_clk = clk_get_sys(NULL, "cpu_g"); + cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); + + if (IS_ERR(cpu_clk) || IS_ERR(cpu_g_clk) || IS_ERR(cpu_lp_clk)) + return -ENOENT; + + /* + * Not bound to the issuer CPU (=> high-priority), has rescue worker + * task, single-threaded, freezable. + */ + cpuquiet_wq = alloc_workqueue( + "cpuquiet", WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1); + + if (!cpuquiet_wq) + return -ENOMEM; + + INIT_DELAYED_WORK(&cpuquiet_work, tegra_cpuquiet_work_func); + INIT_WORK(&minmax_work, min_max_constraints_workfunc); + + idle_top_freq = clk_get_max_rate(cpu_lp_clk) / 1000; + idle_bottom_freq = clk_get_min_rate(cpu_g_clk) / 1000; + + up_delay = msecs_to_jiffies(UP_DELAY_MS); + up2g_delay = msecs_to_jiffies(UP2G_DELAY_MS); + down_delay = msecs_to_jiffies(DOWN_DELAY_MS); + down2lp_delay = msecs_to_jiffies(DOWN2LP_DELAY_MS); + cpumask_clear(&cr_online_requests); + tegra3_cpu_lock = cpu_lock; + + cpq_state = INITIAL_STATE; + enable = cpq_state == TEGRA_CPQ_DISABLED ? false : true; + + + pr_info("Tegra cpuquiet initialized: %s\n", + (cpq_state == TEGRA_CPQ_DISABLED) ? "disabled" : "enabled"); + + if (pm_qos_add_notifier(PM_QOS_MIN_ONLINE_CPUS, &min_cpus_notifier)) + pr_err("%s: Failed to register min cpus PM QoS notifier\n", + __func__); + if (pm_qos_add_notifier(PM_QOS_MAX_ONLINE_CPUS, &max_cpus_notifier)) + pr_err("%s: Failed to register max cpus PM QoS notifier\n", + __func__); + + err = cpuquiet_register_driver(&tegra_cpuquiet_driver); + if (err) { + destroy_workqueue(cpuquiet_wq); + return err; + } + + err = tegra_auto_sysfs(); + if (err) { + cpuquiet_unregister_driver(&tegra_cpuquiet_driver); + destroy_workqueue(cpuquiet_wq); + } + + return err; +} + +void tegra_auto_hotplug_exit(void) +{ + destroy_workqueue(cpuquiet_wq); + cpuquiet_unregister_driver(&tegra_cpuquiet_driver); + kobject_put(tegra_auto_sysfs_kobject); +} diff --git a/arch/arm/mach-tegra/dvfs.c b/arch/arm/mach-tegra/dvfs.c index 8723e6fa60d..21069e1c1e9 100644 --- a/arch/arm/mach-tegra/dvfs.c +++ b/arch/arm/mach-tegra/dvfs.c @@ -336,11 +336,11 @@ __tegra_dvfs_set_rate(struct dvfs *d, unsigned long rate) if (freqs == NULL || d->millivolts == NULL) return -ENODEV; - if (rate > freqs[d->num_freqs - 1]) { - pr_warn("tegra_dvfs: rate %lu too high for dvfs on %s\n", rate, - d->clk_name); - return -EINVAL; - } +// if (rate > freqs[d->num_freqs - 1]) { +// pr_warn("tegra_dvfs: rate %lu too high for dvfs on %s\n", rate, +// d->clk_name); +// return -EINVAL; +// } if (rate == 0) { d->cur_millivolts = 0; @@ -348,12 +348,12 @@ __tegra_dvfs_set_rate(struct dvfs *d, unsigned long rate) while (i < d->num_freqs && rate > freqs[i]) i++; - if ((d->max_millivolts) && - (d->millivolts[i] > d->max_millivolts)) { - pr_warn("tegra_dvfs: voltage %d too high for dvfs on" - " %s\n", d->millivolts[i], d->clk_name); - return -EINVAL; - } +// if ((d->max_millivolts) && +// (d->millivolts[i] > d->max_millivolts)) { +// pr_warn("tegra_dvfs: voltage %d too high for dvfs on" +// " %s\n", d->millivolts[i], d->clk_name); +// return -EINVAL; +// } d->cur_millivolts = d->millivolts[i]; } diff --git a/arch/arm/mach-tegra/dvfs.h b/arch/arm/mach-tegra/dvfs.h index eaecf425fe8..50b6dfe8230 100644 --- a/arch/arm/mach-tegra/dvfs.h +++ b/arch/arm/mach-tegra/dvfs.h @@ -89,7 +89,7 @@ struct dvfs { int freqs_mult; unsigned long freqs[MAX_DVFS_FREQS]; unsigned long alt_freqs[MAX_DVFS_FREQS]; - const int *millivolts; + int *millivolts; struct dvfs_rail *dvfs_rail; bool auto_dvfs; enum dvfs_alt_freqs alt_freqs_state; diff --git a/arch/arm/mach-tegra/edp.c b/arch/arm/mach-tegra/edp.c index a4be48fed4d..e7e96a22b64 100644 --- a/arch/arm/mach-tegra/edp.c +++ b/arch/arm/mach-tegra/edp.c @@ -362,10 +362,17 @@ void __init tegra_init_cpu_edp_limits(unsigned int regulator_mA) for (j = 0; j < edp_limits_size; j++) { e[j].temperature = (int)t[i+j].temperature; - e[j].freq_limits[0] = (unsigned int)t[i+j].freq_limits[0] * 10000; - e[j].freq_limits[1] = (unsigned int)t[i+j].freq_limits[1] * 10000; - e[j].freq_limits[2] = (unsigned int)t[i+j].freq_limits[2] * 10000; - e[j].freq_limits[3] = (unsigned int)t[i+j].freq_limits[3] * 10000; + if (j == 0) { + e[j].freq_limits[0] = (unsigned int)(t[i+j].freq_limits[0]-15) * 10000; + e[j].freq_limits[1] = (unsigned int)(t[i+j].freq_limits[1]-5) * 10000; + e[j].freq_limits[2] = (unsigned int)(t[i+j].freq_limits[2]-5) * 10000; + e[j].freq_limits[3] = (unsigned int)(t[i+j].freq_limits[3]-5) * 10000; + } else { + e[j].freq_limits[0] = (unsigned int)(t[i+j].freq_limits[0]-30) * 10000; + e[j].freq_limits[1] = (unsigned int)(t[i+j].freq_limits[1]-20) * 10000; + e[j].freq_limits[2] = (unsigned int)(t[i+j].freq_limits[2]-20) * 10000; + e[j].freq_limits[3] = (unsigned int)(t[i+j].freq_limits[3]-20) * 10000; + } } if (edp_limits != edp_default_limits) @@ -436,6 +443,28 @@ void tegra_get_system_edp_limits(const unsigned int **limits) #ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_TEGRA_VARIANT_INFO +extern int orig_cpu_process_id; +extern int orig_core_process_id; +extern int orig_cpu_speedo_id; +extern int orig_soc_speedo_id; + +static int t3_variant_debugfs_show(struct seq_file *s, void *data) +{ + int cpu_speedo_id = orig_cpu_speedo_id; + int soc_speedo_id = orig_soc_speedo_id; + int cpu_process_id = orig_cpu_process_id; + int core_process_id = orig_core_process_id; + + seq_printf(s, "cpu_speedo_id => %d\n", cpu_speedo_id); + seq_printf(s, "soc_speedo_id => %d\n", soc_speedo_id); + seq_printf(s, "cpu_process_id => %d\n", cpu_process_id); + seq_printf(s, "core_process_id => %d\n", core_process_id); + + return 0; +} +#endif + static int edp_limit_debugfs_show(struct seq_file *s, void *data) { seq_printf(s, "%u\n", tegra_get_edp_limit()); @@ -470,6 +499,12 @@ static int edp_debugfs_show(struct seq_file *s, void *data) return 0; } +#ifdef CONFIG_TEGRA_VARIANT_INFO +static int t3_variant_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, t3_variant_debugfs_show, inode->i_private); +} +#endif static int edp_debugfs_open(struct inode *inode, struct file *file) { @@ -481,6 +516,14 @@ static int edp_limit_debugfs_open(struct inode *inode, struct file *file) return single_open(file, edp_limit_debugfs_show, inode->i_private); } +#ifdef CONFIG_TEGRA_VARIANT_INFO +static const struct file_operations t3_variant_debugfs_fops = { + .open = t3_variant_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif static const struct file_operations edp_debugfs_fops = { .open = edp_debugfs_open, @@ -500,6 +543,11 @@ static int __init tegra_edp_debugfs_init(void) { struct dentry *d; +#ifdef CONFIG_TEGRA_VARIANT_INFO + d = debugfs_create_file("t3_variant", S_IRUGO, NULL, NULL, + &t3_variant_debugfs_fops); +#endif + d = debugfs_create_file("edp", S_IRUGO, NULL, NULL, &edp_debugfs_fops); if (!d) diff --git a/arch/arm/mach-tegra/include/mach/dc.h b/arch/arm/mach-tegra/include/mach/dc.h index a1e17cad96c..e19344dac15 100644 --- a/arch/arm/mach-tegra/include/mach/dc.h +++ b/arch/arm/mach-tegra/include/mach/dc.h @@ -269,6 +269,13 @@ struct tegra_dc_sd_agg_priorities { u8 agg[4]; }; +struct tegra_dc_sd_window { + u16 h_position; + u16 v_position; + u16 h_size; + u16 v_size; +}; + struct tegra_dc_sd_settings { unsigned enable; bool use_auto_pwm; @@ -290,6 +297,22 @@ struct tegra_dc_sd_settings { bool use_vid_luma; struct tegra_dc_sd_rgb coeff; + bool k_limit_enable; + u16 k_limit; + + bool sd_window_enable; + struct tegra_dc_sd_window sd_window; + + bool soft_clipping_enable; + u8 soft_clipping_threshold; + + bool smooth_k_enable; + u16 smooth_k_incr; + + bool sd_proc_control; + bool soft_clipping_correction; + bool use_vpulse2; + struct tegra_dc_sd_fc fc; struct tegra_dc_sd_blp blp; u8 bltf[4][4][4]; diff --git a/arch/arm/mach-tegra/include/mach/memory.h b/arch/arm/mach-tegra/include/mach/memory.h index 5f51066482e..84dd44ebaa9 100644 --- a/arch/arm/mach-tegra/include/mach/memory.h +++ b/arch/arm/mach-tegra/include/mach/memory.h @@ -29,6 +29,18 @@ #define PLAT_PHYS_OFFSET UL(0x80000000) #endif +#if defined(CONFIG_MACH_GROUPER) +#define END_MEM UL(0xBEA00000) +#endif + +#if defined(CONFIG_KEXEC_HARDBOOT) +#if defined(CONFIG_MACH_GROUPER) +#define KEXEC_HB_PAGE_ADDR UL(0xBEA00000) +#else +#error "Adress for kexec hardboot page not defined" +#endif +#endif + /* * Unaligned DMA causes tegra dma to place data on 4-byte boundary after * expected address. Call to skb_reserve(skb, NET_IP_ALIGN) was causing skb diff --git a/arch/arm/mach-tegra/include/mach/sdhci.h b/arch/arm/mach-tegra/include/mach/sdhci.h index b48a9288707..5dc8cd2ddf7 100644 --- a/arch/arm/mach-tegra/include/mach/sdhci.h +++ b/arch/arm/mach-tegra/include/mach/sdhci.h @@ -28,6 +28,7 @@ struct tegra_sdhci_platform_data { int pm_flags; int pm_caps; unsigned int max_clk_limit; + unsigned int ddr_clk_limit; unsigned int tap_delay; struct mmc_platform_data mmc_data; }; diff --git a/arch/arm/mach-tegra/include/mach/tegra-ahb.h b/arch/arm/mach-tegra/include/mach/tegra-ahb.h new file mode 100644 index 00000000000..e0f8c84b1d8 --- /dev/null +++ b/arch/arm/mach-tegra/include/mach/tegra-ahb.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __MACH_TEGRA_AHB_H__ +#define __MACH_TEGRA_AHB_H__ + +extern int tegra_ahb_enable_smmu(struct device_node *ahb); + +#endif /* __MACH_TEGRA_AHB_H__ */ diff --git a/arch/arm/mach-tegra/pm-irq.c b/arch/arm/mach-tegra/pm-irq.c index 57d21361ca1..a6ca3380311 100644 --- a/arch/arm/mach-tegra/pm-irq.c +++ b/arch/arm/mach-tegra/pm-irq.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -217,7 +218,7 @@ static void tegra_pm_irq_syscore_resume_helper( (wake + 32 * index)); continue; } - + log_wakeup_reason(irq); desc = irq_to_desc(irq); if (!desc || !desc->action || !desc->action->name) { pr_info("Resume caused by WAKE%d, irq %d\n", diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c index e53effb9b66..dfe13e28745 100644 --- a/arch/arm/mach-tegra/pm.c +++ b/arch/arm/mach-tegra/pm.c @@ -138,6 +138,11 @@ struct suspend_context tegra_sctx; #define PMC_CPUPWROFF_TIMER 0xcc #define PMC_COREPWROFF_TIMER PMC_WAKE_DELAY +#define PMC_PWRGATE_TOGGLE 0x30 +#define PWRGATE_TOGGLE_START (1 << 8) +#define UN_PWRGATE_CPU \ + (PWRGATE_TOGGLE_START | TEGRA_CPU_POWERGATE_ID(TEGRA_POWERGATE_CPU)) + #ifdef CONFIG_TEGRA_CLUSTER_CONTROL #define PMC_SCRATCH4_WAKE_CLUSTER_MASK (1<<31) #endif @@ -527,7 +532,6 @@ unsigned int tegra_idle_lp2_last(unsigned int sleep_time, unsigned int flags) mode |= TEGRA_POWER_PWRREQ_OE; mode &= ~TEGRA_POWER_EFFECT_LP0; pmc_32kwritel(mode, PMC_CTRL); - mode |= flags; tegra_cluster_switch_time(flags, tegra_cluster_switch_time_id_start); @@ -539,7 +543,17 @@ unsigned int tegra_idle_lp2_last(unsigned int sleep_time, unsigned int flags) trace_cpu_cluster(POWER_CPU_CLUSTER_START); set_power_timers(pdata->cpu_timer, 0, clk_get_rate_all_locked(tegra_pclk)); - tegra_cluster_switch_prolog(mode); + if (flags & TEGRA_POWER_CLUSTER_G) { + /* + * To reduce the vdd_cpu up latency when LP->G + * transition. Before the transition, enable + * the vdd_cpu rail. + */ + if (is_lp_cluster()) + writel(UN_PWRGATE_CPU, + pmc + PMC_PWRGATE_TOGGLE); + } + tegra_cluster_switch_prolog(flags); } else { set_power_timers(pdata->cpu_timer, pdata->cpu_off_timer, clk_get_rate_all_locked(tegra_pclk)); @@ -549,7 +563,7 @@ unsigned int tegra_idle_lp2_last(unsigned int sleep_time, unsigned int flags) tegra_lp2_set_trigger(sleep_time); cpu_complex_pm_enter(); - suspend_cpu_complex(mode); + suspend_cpu_complex(flags); tegra_cluster_switch_time(flags, tegra_cluster_switch_time_id_prolog); flush_cache_all(); /* @@ -566,7 +580,7 @@ unsigned int tegra_idle_lp2_last(unsigned int sleep_time, unsigned int flags) tegra_init_cache(false); tegra_cluster_switch_time(flags, tegra_cluster_switch_time_id_switch); - restore_cpu_complex(mode); + restore_cpu_complex(flags); cpu_complex_pm_exit(); remain = tegra_lp2_timer_remain(); @@ -574,7 +588,7 @@ unsigned int tegra_idle_lp2_last(unsigned int sleep_time, unsigned int flags) tegra_lp2_set_trigger(0); if (flags & TEGRA_POWER_CLUSTER_MASK) { - tegra_cluster_switch_epilog(mode); + tegra_cluster_switch_epilog(flags); trace_cpu_cluster(POWER_CPU_CLUSTER_DONE); } tegra_cluster_switch_time(flags, tegra_cluster_switch_time_id_epilog); @@ -1092,6 +1106,21 @@ void __init tegra_init_suspend(struct tegra_suspend_platform_data *plat) plat->suspend_mode = TEGRA_SUSPEND_LP2; } +#ifdef CONFIG_TEGRA_LP1_950 + if (pdata->lp1_lowvolt_support) { + u32 lp1_core_lowvolt, lp1_core_highvolt; + memcpy(tegra_lp1_register_pmuslave_addr(), &pdata->pmuslave_addr, 4); + memcpy(tegra_lp1_register_i2c_base_addr(), &pdata->i2c_base_addr, 4); + + lp1_core_lowvolt = 0; + lp1_core_lowvolt = (pdata->lp1_core_volt_low << 8) | pdata->core_reg_addr; + memcpy(tegra_lp1_register_core_lowvolt(), &lp1_core_lowvolt, 4); + + lp1_core_highvolt = 0; + lp1_core_highvolt = (pdata->lp1_core_volt_high << 8) | pdata->core_reg_addr; + memcpy(tegra_lp1_register_core_highvolt(), &lp1_core_highvolt, 4); + } +#endif /* !!!FIXME!!! THIS IS TEGRA2 ONLY */ /* Initialize scratch registers used for CPU LP2 synchronization */ writel(0, pmc + PMC_SCRATCH37); diff --git a/arch/arm/mach-tegra/pm.h b/arch/arm/mach-tegra/pm.h index 421b21ac934..8a90ac993fa 100644 --- a/arch/arm/mach-tegra/pm.h +++ b/arch/arm/mach-tegra/pm.h @@ -65,6 +65,14 @@ struct tegra_suspend_platform_data { /* lp_state = 0 for LP0 state, 1 for LP1 state, 2 for LP2 state */ void (*board_resume)(int lp_state, enum resume_stage stg); unsigned int cpu_resume_boost; /* CPU frequency resume boost in kHz */ +#ifdef CONFIG_TEGRA_LP1_950 + bool lp1_lowvolt_support; + unsigned int i2c_base_addr; + unsigned int pmuslave_addr; + unsigned int core_reg_addr; + unsigned int lp1_core_volt_low; + unsigned int lp1_core_volt_high; +#endif }; /* Tegra io dpd entry - for each supported driver */ diff --git a/arch/arm/mach-tegra/reset.c b/arch/arm/mach-tegra/reset.c index 3ab2c132d62..d91ba95eaff 100644 --- a/arch/arm/mach-tegra/reset.c +++ b/arch/arm/mach-tegra/reset.c @@ -27,6 +27,10 @@ #include "sleep.h" #include "pm.h" +#ifdef CONFIG_KEXEC_HARDBOOT +#include +#endif + static bool is_enabled; static void tegra_cpu_reset_handler_enable(void) @@ -88,6 +92,21 @@ void tegra_cpu_reset_handler_restore(void) } #endif +#ifdef CONFIG_KEXEC_HARDBOOT +#define RECOVERY_MODE BIT(31) +void tegra_kexec_hardboot(void) +{ + /* Reboot with the recovery kernel since the boot kernel decompressor may + * not support the hardboot jump. */ + + void __iomem *reset = IO_ADDRESS(TEGRA_PMC_BASE + 0x00); + + u32 reg = readl_relaxed(reset + PMC_SCRATCH0); + reg |= RECOVERY_MODE; + writel_relaxed(reg, reset + PMC_SCRATCH0); +} +#endif + void __init tegra_cpu_reset_handler_init(void) { #ifdef CONFIG_SMP @@ -112,4 +131,8 @@ void __init tegra_cpu_reset_handler_init(void) __pa(&__tegra_cpu_reset_handler_data[TEGRA_RESET_DATA_SIZE])); tegra_cpu_reset_handler_enable(); + +#ifdef CONFIG_KEXEC_HARDBOOT + kexec_hardboot_hook = tegra_kexec_hardboot; +#endif } diff --git a/arch/arm/mach-tegra/sleep-t3.S b/arch/arm/mach-tegra/sleep-t3.S index caabeb75139..ab4bfc999cd 100644 --- a/arch/arm/mach-tegra/sleep-t3.S +++ b/arch/arm/mach-tegra/sleep-t3.S @@ -94,6 +94,16 @@ #define PMC_PLLP_WB0_OVERRIDE 0xf8 #define CLK_RESET_CLK_SOURCE_MSELECT 0x3b4 +#define CLK_RESET_CLK_ENB_H_SET 0x328 +#define CLK_RESET_CLK_ENB_H_CLR 0x32c +#define CLK_RESET_CLK_RST_DEV_H_SET 0x308 +#define CLK_RESET_CLK_RST_DEV_H_CLR 0x30c + +#define I2C_CNFG 0x0 +#define I2C_ADDR0 0x4 +#define I2C_DATA1 0xc +#define I2C_DATA2 0x10 +#define I2C_STATUS 0x1c #define MSELECT_CLKM (0x3 << 30) @@ -357,6 +367,66 @@ ENTRY(tegra3_lp1_reset) mov32 r4, ((1<<28) | (8)) @ burst policy is PLLX str r4, [r0, #CLK_RESET_CCLK_BURST] +#ifdef CONFIG_TEGRA_LP1_950 +lp1_voltset: + /* Restore the Core voltage to high on LP1 resume */ + /* Reset(Enable/Disable) the DVC-I2C Controller*/ + mov r1, #(1 << 15) + str r1, [r0, #CLK_RESET_CLK_RST_DEV_H_SET] + + /* Wait for 2us */ + mov32 r7, TEGRA_TMRUS_BASE + wait_for_us r1, r7, r9 + add r1, r1, #2 + wait_until r1, r7, r9 + + mov r1, #(1 << 15) + str r1, [r0, #CLK_RESET_CLK_RST_DEV_H_CLR] + + /* Enable the DVC-I2C Controller */ + mov r1, #(1 << 15) + str r1, [r0, #CLK_RESET_CLK_ENB_H_SET] + + + /* Same I2C transaction protocol as suspend */ + ldr r1, lp1_register_pmuslave_addr + cmp r1, #0 + beq lp1_voltskip_resume + + ldr r4, lp1_register_i2c_base_addr + str r1, [r4, #I2C_ADDR0] + + mov32 r1, 0x2 + str r1, [r4, #I2C_CNFG] + + ldr r1, lp1_register_core_highvolt + str r1, [r4, #I2C_DATA1] + + mov32 r1, 0 + str r1, [r4, #I2C_DATA2] + + mov32 r1, 0xA02 + str r1, [r4, #I2C_CNFG] + + wait_for_us r1, r7, r9 + mov32 r3, 0x7D0 /* Wait for 2ms and try transaction again */ + add r0, r1, r3 +loop_i2c_status_resume: + add r1, r1, #0xFA /* Check status every 250us */ + wait_until r1, r7, r9 + cmp r0, r1 + beq lp1_voltset + + ldr r3, [r4, #I2C_STATUS] + cmp r3, #0 + bne loop_i2c_status_resume + +lp1_voltskip_resume: + /* Disable the DVC-I2C Controller */ + mov r0, #(1 << 15) + str r0, [r5, #CLK_RESET_CLK_ENB_H_CLR] +#endif + #if defined (CONFIG_CACHE_L2X0) /* power up L2 */ ldr r0, [r2, #PMC_PWRGATE_STATUS] @@ -501,6 +571,21 @@ tegra3_sdram_pad_address: tegra3_sdram_pad_size: .word tegra3_sdram_pad_address - tegra3_sdram_pad_save +#ifdef CONFIG_TEGRA_LP1_950 + .globl lp1_register_pmuslave_addr + .globl lp1_register_i2c_base_addr + .globl lp1_register_core_lowvolt + .globl lp1_register_core_highvolt +lp1_register_pmuslave_addr: + .word 0 +lp1_register_i2c_base_addr: + .word 0 +lp1_register_core_lowvolt: + .word 0 +lp1_register_core_highvolt: + .word 0 +#endif + /* * tegra3_tear_down_core * @@ -533,9 +618,72 @@ tegra3_cpu_clk32k: str r0, [r4, #PMC_PLLP_WB0_OVERRIDE] mov pc, lr +lp1_clocks_prepare: + /* Prepare to set the Core to the lowest voltage if supported. + * Start by setting the I2C clocks to make the I2C transfer */ +#ifdef CONFIG_TEGRA_LP1_950 + /* Set up the PWR I2C GPIOs with the right masks*/ + + /* Reset(Set/Clr) the DVC-I2C Controller*/ + mov r0, #(1 << 15) + str r0, [r5, #CLK_RESET_CLK_RST_DEV_H_SET] + + /* Wait for 2us */ + wait_for_us r1, r7, r9 + mov32 r0, 0x7D0 + add r1, r1, r0 + wait_until r1, r7, r9 + + mov r0, #(1 << 15) + str r0, [r5, #CLK_RESET_CLK_RST_DEV_H_CLR] + + /* Enable the DVC-I2C Controller */ + mov r0, #(1 << 15) + str r0, [r5, #CLK_RESET_CLK_ENB_H_SET] + + /* I2C transfer protocol: + * 4 packets: Slaveaddr + WriteConfigure + Data1 + Data2 */ + ldr r0, lp1_register_pmuslave_addr + cmp r0, #0 + beq lp1_volt_skip + ldr r1, lp1_register_i2c_base_addr + str r0, [r1, #I2C_ADDR0] + + mov32 r0, 0x2 + str r0, [r1, #I2C_CNFG] + + ldr r0, lp1_register_core_lowvolt + str r0, [r1, #I2C_DATA1] + + mov32 r0, 0 + str r0, [r1, #I2C_DATA2] + + /* Send I2C transaction */ + mov32 r0, 0xA02 + str r0, [r1, #I2C_CNFG] + + /* Check the transaction status before proceeding */ + wait_for_us r2, r7, r9 + mov32 r3, 0x7D0 /* Wait for 2ms for I2C transaction */ + add r3, r2, r3 +loop_i2c_status_suspend: + add r2, r2, #0xFA /* Check status every 250us */ + cmp r3, r2 + beq lp1_volt_skip /* Waited for 2ms, I2C transaction didn't take place */ + wait_until r2, r7, r9 + + ldr r0, [r1, #I2C_STATUS] + cmp r0, #0 + bne loop_i2c_status_suspend +lp1_volt_skip: + + /* Disable the DVC-I2C Controller */ + mov r0, #(1 << 15) + str r0, [r5, #CLK_RESET_CLK_ENB_H_CLR] + +#endif /* start by jumping to clkm to safely disable PLLs, then jump * to clks */ -lp1_clocks_prepare: mov r0, #(1 << 28) str r0, [r5, #CLK_RESET_SCLK_BURST] str r0, [r5, #CLK_RESET_CCLK_BURST] diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S index 18b8799ea32..4e898b81f36 100644 --- a/arch/arm/mach-tegra/sleep.S +++ b/arch/arm/mach-tegra/sleep.S @@ -49,6 +49,15 @@ #define TEGRA_PMC_VIRT (TEGRA_PMC_BASE - IO_APB_PHYS + IO_APB_VIRT) #define TEGRA_CLK_RESET_VIRT (TEGRA_CLK_RESET_BASE - IO_PPSB_PHYS + IO_PPSB_VIRT) +/* + * ARM security extensions are required when compiling TRUSTED_FOUNDATIONS code, + * and this explicit arch_extension line fixes the build on compilers where the + * as-instr Makefile function fails. The Android cross-compiler is an example. + */ +#ifdef CONFIG_TRUSTED_FOUNDATIONS +.arch_extension sec +#endif + /* * tegra_pen_lock * diff --git a/arch/arm/mach-tegra/sleep.h b/arch/arm/mach-tegra/sleep.h index 59298f1efbe..ba290fdeb35 100644 --- a/arch/arm/mach-tegra/sleep.h +++ b/arch/arm/mach-tegra/sleep.h @@ -162,6 +162,10 @@ void tegra2_sleep_wfi(unsigned long v2p); #else extern void tegra3_iram_start; extern void tegra3_iram_end; +extern unsigned int lp1_register_pmuslave_addr; +extern unsigned int lp1_register_i2c_base_addr; +extern unsigned int lp1_register_core_lowvolt; +extern unsigned int lp1_register_core_highvolt; int tegra3_sleep_core_finish(unsigned long int); int tegra3_sleep_cpu_secondary_finish(unsigned long int); void tegra3_hotplug_shutdown(void); @@ -184,5 +188,41 @@ static inline void *tegra_iram_end(void) return &tegra3_iram_end; #endif } + +static inline void *tegra_lp1_register_pmuslave_addr(void) +{ +#ifdef CONFIG_ARCH_TEGRA_2x_SOC + return NULL; +#else + return &lp1_register_pmuslave_addr; +#endif +} + +static inline void *tegra_lp1_register_i2c_base_addr(void) +{ +#ifdef CONFIG_ARCH_TEGRA_2x_SOC + return NULL; +#else + return &lp1_register_i2c_base_addr; +#endif +} + +static inline void *tegra_lp1_register_core_lowvolt(void) +{ +#ifdef CONFIG_ARCH_TEGRA_2x_SOC + return NULL; +#else + return &lp1_register_core_lowvolt; +#endif +} + +static inline void *tegra_lp1_register_core_highvolt(void) +{ +#ifdef CONFIG_ARCH_TEGRA_2x_SOC + return NULL; +#else + return &lp1_register_core_highvolt; +#endif +} #endif #endif diff --git a/arch/arm/mach-tegra/tegra3_actmon.c b/arch/arm/mach-tegra/tegra3_actmon.c index 5df6ed1fc47..a76d0a963d9 100644 --- a/arch/arm/mach-tegra/tegra3_actmon.c +++ b/arch/arm/mach-tegra/tegra3_actmon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, NVIDIA Corporation. + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -532,7 +532,7 @@ static struct actmon_dev actmon_dev_avp = { .boost_freq_step = 8000, .boost_up_coef = 200, .boost_down_coef = 50, - .boost_up_threshold = 75, + .boost_up_threshold = 85, .boost_down_threshold = 50, .up_wmark_window = 1, diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 3650b2268ec..3392bc7d155 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -839,7 +839,7 @@ static int tegra3_cpu_clk_set_rate(struct clk *c, unsigned long rate) return -ENOSYS; else if ((!c->dvfs->dvfs_rail->reg) && (clk_get_rate_locked(c) < rate)) { - WARN(1, "Increasing CPU rate while regulator is not" + pr_debug("Increasing CPU rate while regulator is not" " ready may overclock CPU\n"); return -ENOSYS; } @@ -2946,11 +2946,11 @@ static noinline int shared_bus_set_rate(struct clk *bus, unsigned long rate, mv = tegra_dvfs_predict_millivolts(bus, rate); old_mv = tegra_dvfs_predict_millivolts(bus, old_rate); - if (IS_ERR_VALUE(mv) || IS_ERR_VALUE(old_mv)) { - pr_err("%s: Failed to predict %s voltage for %lu => %lu\n", - __func__, bus->name, old_rate, rate); - return -EINVAL; - } +// if (IS_ERR_VALUE(mv) || IS_ERR_VALUE(old_mv)) { +// pr_err("%s: Failed to predict %s voltage for %lu => %lu\n", +// __func__, bus->name, old_rate, rate); +// return -EINVAL; +// } /* emc bus: set bridge rate as intermediate step when crossing * bridge threshold in any direction @@ -3012,7 +3012,8 @@ static int tegra3_clk_shared_bus_update(struct clk *bus) if (c->u.shared_bus_user.enabled) { switch (c->u.shared_bus_user.mode) { case SHARED_BW: - bw += c->u.shared_bus_user.rate; + if (bw < bus->max_rate) + bw += c->u.shared_bus_user.rate; break; case SHARED_CEILING: ceiling = min(c->u.shared_bus_user.rate, @@ -3025,6 +3026,16 @@ static int tegra3_clk_shared_bus_update(struct clk *bus) } } } + + if (bw) { + if (bus->flags & PERIPH_EMC_ENB) { + bw = tegra_emc_bw_efficiency ? + (bw / tegra_emc_bw_efficiency) : bus->max_rate; + bw = (bw < bus->max_rate / 100) ? + (bw * 100) : bus->max_rate; + } + bw = clk_round_rate_locked(bus, bw); + } rate = min(max(rate, bw), ceiling); old_rate = clk_get_rate_locked(bus); @@ -3073,6 +3084,10 @@ static long tegra_clk_shared_bus_round_rate(struct clk *c, unsigned long rate) if (c->u.shared_bus_user.mode == SHARED_AUTO) rate = 0; + /* BW users should not be rounded until aggregated */ + if (c->u.shared_bus_user.mode == SHARED_BW) + return rate; + return clk_round_rate(c->parent, rate); } @@ -3196,6 +3211,19 @@ static struct clk tegra_pll_ref = { }; static struct clk_pll_freq_table tegra_pll_c_freq_table[] = { + + { 12000000, 1400000000, 700, 6, 1, 8}, + { 13000000, 1400000000, 700, 13, 2, 8}, /* custom: 1400 MHz for 700Mhz GPU */ + { 16800000, 1400000000, 666, 8, 1, 8}, + { 19200000, 1400000000, 656, 9, 1, 8}, + { 26000000, 1400000000, 700, 13, 1, 8}, + + { 12000000, 1332000000, 666, 6, 1, 8}, + { 13000000, 1332000000, 666, 13, 2, 8}, /* custom: 1332 MHz for 666Mhz GPU */ + { 16800000, 1332000000, 555, 7, 1, 8}, + { 19200000, 1332000000, 555, 8, 1, 8}, + { 26000000, 1332000000, 666, 13, 1, 8}, + { 12000000, 1200000000, 600, 6, 1, 8}, { 13000000, 1200000000, 923, 10, 1, 8}, /* actual: 1199.9 MHz */ { 16800000, 1200000000, 500, 7, 1, 8}, @@ -3524,6 +3552,20 @@ static struct clk tegra_pll_u = { }; static struct clk_pll_freq_table tegra_pll_x_freq_table[] = { + /* 1.9 GHz */ + { 12000000, 1900000000, 850, 6, 1, 8}, + { 13000000, 1900000000, 915, 7, 1, 8}, + { 16800000, 1900000000, 708, 7, 1, 8}, + { 19200000, 1900000000, 989, 10, 1, 8}, /* actual: 1898.8 MHz */ + { 26000000, 1900000000, 950, 13, 1, 8}, + + /* 1.8 GHz */ + { 12000000, 1800000000, 900, 6, 1, 8}, + { 13000000, 1800000000, 969, 7, 1, 8}, /* actual: 1799.6 MHz */ + { 16800000, 1800000000, 750, 7, 1, 8}, + { 19200000, 1800000000, 750, 8, 1, 8}, + { 26000000, 1800000000, 900, 13, 1, 8}, + /* 1.7 GHz */ { 12000000, 1700000000, 850, 6, 1, 8}, { 13000000, 1700000000, 915, 7, 1, 8}, /* actual: 1699.2 MHz */ @@ -3580,6 +3622,13 @@ static struct clk_pll_freq_table tegra_pll_x_freq_table[] = { { 19200000, 1000000000, 625, 12, 1, 8}, { 26000000, 1000000000, 1000, 26, 1, 8}, + /* 666 MHz */ + { 12000000, 666000000, 555, 10, 1, 8}, + { 13000000, 666000000, 666, 13, 1, 8}, + { 16800000, 666000000, 555, 14, 1, 8}, /* actual: 999.6 MHz */ + { 19200000, 666000000, 555, 16, 1, 8}, + { 26000000, 666000000, 333, 13, 1, 8}, + { 0, 0, 0, 0, 0, 0 }, }; @@ -3589,14 +3638,14 @@ static struct clk tegra_pll_x = { .ops = &tegra_pll_ops, .reg = 0xe0, .parent = &tegra_pll_ref, - .max_rate = 1700000000, + .max_rate = 1900000000, .u.pll = { .input_min = 2000000, .input_max = 31000000, .cf_min = 1000000, .cf_max = 6000000, .vco_min = 20000000, - .vco_max = 1700000000, + .vco_max = 1900000000, .freq_table = tegra_pll_x_freq_table, .lock_delay = 300, }, @@ -3607,7 +3656,7 @@ static struct clk tegra_pll_x_out0 = { .ops = &tegra_pll_div_ops, .flags = DIV_2 | PLLX, .parent = &tegra_pll_x, - .max_rate = 850000000, + .max_rate = 950000000, }; @@ -3874,7 +3923,7 @@ static struct clk tegra_clk_cclk_g = { .inputs = mux_cclk_g, .reg = 0x368, .ops = &tegra_super_ops, - .max_rate = 1700000000, + .max_rate = 1900000000, }; static struct clk tegra_clk_cclk_lp = { @@ -3883,7 +3932,7 @@ static struct clk tegra_clk_cclk_lp = { .inputs = mux_cclk_lp, .reg = 0x370, .ops = &tegra_super_ops, - .max_rate = 620000000, + .max_rate = 740000000, }; static struct clk tegra_clk_sclk = { @@ -3899,7 +3948,7 @@ static struct clk tegra_clk_virtual_cpu_g = { .name = "cpu_g", .parent = &tegra_clk_cclk_g, .ops = &tegra_cpu_ops, - .max_rate = 1700000000, + .max_rate = 1900000000, .u.cpu = { .main = &tegra_pll_x, .backup = &tegra_pll_p, @@ -3911,7 +3960,7 @@ static struct clk tegra_clk_virtual_cpu_lp = { .name = "cpu_lp", .parent = &tegra_clk_cclk_lp, .ops = &tegra_cpu_ops, - .max_rate = 620000000, + .max_rate = 740000000, .u.cpu = { .main = &tegra_pll_x, .backup = &tegra_pll_p, @@ -3929,7 +3978,7 @@ static struct clk tegra_clk_cpu_cmplx = { .name = "cpu", .inputs = mux_cpu_cmplx, .ops = &tegra_cpu_cmplx_ops, - .max_rate = 1700000000, + .max_rate = 1900000000, }; static struct clk tegra_clk_cop = { @@ -4234,7 +4283,7 @@ struct clk tegra_list_clks[] = { PERIPH_CLK("vcp", "tegra-avp", "vcp", 29, 0, 250000000, mux_clk_m, 0), PERIPH_CLK("bsea", "tegra-avp", "bsea", 62, 0, 250000000, mux_clk_m, 0), PERIPH_CLK("bsev", "tegra-aes", "bsev", 63, 0, 250000000, mux_clk_m, 0), - PERIPH_CLK("vde", "vde", NULL, 61, 0x1c8, 600000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71 | DIV_U71_INT), + PERIPH_CLK("vde", "vde", NULL, 61, 0x1c8, 700000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71 | DIV_U71_INT), PERIPH_CLK("csite", "csite", NULL, 73, 0x1d4, 144000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71), /* max rate ??? */ PERIPH_CLK("la", "la", NULL, 76, 0x1f8, 26000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71), PERIPH_CLK("owr", "tegra_w1", NULL, 71, 0x1cc, 26000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71 | PERIPH_ON_APB), @@ -4262,19 +4311,19 @@ struct clk tegra_list_clks[] = { PERIPH_CLK("uarte_dbg", "serial8250.0", "uarte", 66, 0x1c4, 900000000, mux_pllp_clkm, MUX | DIV_U151 | DIV_U151_UART | PERIPH_ON_APB), PERIPH_CLK_EX("vi", "tegra_camera", "vi", 20, 0x148, 470000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT, &tegra_vi_clk_ops), PERIPH_CLK("vi_sensor", "tegra_camera", "vi_sensor", 20, 0x1a8, 150000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | PERIPH_NO_RESET), - PERIPH_CLK("3d", "3d", NULL, 24, 0x158, 600000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE | PERIPH_MANUAL_RESET), - PERIPH_CLK("3d2", "3d2", NULL, 98, 0x3b0, 600000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE | PERIPH_MANUAL_RESET), - PERIPH_CLK("2d", "2d", NULL, 21, 0x15c, 600000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE), - PERIPH_CLK("epp", "epp", NULL, 19, 0x16c, 600000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT), - PERIPH_CLK("mpe", "mpe", NULL, 60, 0x170, 600000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT), + PERIPH_CLK("3d", "3d", NULL, 24, 0x158, 700000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE | PERIPH_MANUAL_RESET), + PERIPH_CLK("3d2", "3d2", NULL, 98, 0x3b0, 700000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE | PERIPH_MANUAL_RESET), + PERIPH_CLK("2d", "2d", NULL, 21, 0x15c, 700000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE), + PERIPH_CLK("epp", "epp", NULL, 19, 0x16c, 700000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT), + PERIPH_CLK("mpe", "mpe", NULL, 60, 0x170, 700000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT), PERIPH_CLK("host1x", "host1x", NULL, 28, 0x180, 300000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT), PERIPH_CLK("cve", "cve", NULL, 49, 0x140, 250000000, mux_pllp_plld_pllc_clkm, MUX | DIV_U71), /* requires min voltage */ PERIPH_CLK("tvo", "tvo", NULL, 49, 0x188, 250000000, mux_pllp_plld_pllc_clkm, MUX | DIV_U71), /* requires min voltage */ PERIPH_CLK_EX("dtv", "dtv", NULL, 79, 0x1dc, 250000000, mux_clk_m, 0, &tegra_dtv_clk_ops), PERIPH_CLK("hdmi", "hdmi", NULL, 51, 0x18c, 148500000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm, MUX | MUX8 | DIV_U71), PERIPH_CLK("tvdac", "tvdac", NULL, 53, 0x194, 220000000, mux_pllp_plld_pllc_clkm, MUX | DIV_U71), /* requires min voltage */ - PERIPH_CLK("disp1", "tegradc.0", NULL, 27, 0x138, 600000000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm, MUX | MUX8), - PERIPH_CLK("disp2", "tegradc.1", NULL, 26, 0x13c, 600000000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm, MUX | MUX8), + PERIPH_CLK("disp1", "tegradc.0", NULL, 27, 0x138, 700000000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm, MUX | MUX8), + PERIPH_CLK("disp2", "tegradc.1", NULL, 26, 0x13c, 700000000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm, MUX | MUX8), PERIPH_CLK("usbd", "fsl-tegra-udc", NULL, 22, 0, 480000000, mux_clk_m, 0), /* requires min voltage */ PERIPH_CLK("usb2", "tegra-ehci.1", NULL, 58, 0, 480000000, mux_clk_m, 0), /* requires min voltage */ PERIPH_CLK("usb3", "tegra-ehci.2", NULL, 59, 0, 480000000, mux_clk_m, 0), /* requires min voltage */ @@ -4292,7 +4341,7 @@ struct clk tegra_list_clks[] = { PERIPH_CLK("i2cslow", "i2cslow", NULL, 81, 0x3fc, 26000000, mux_pllp_pllc_clk32_clkm, MUX | DIV_U71 | PERIPH_ON_APB), PERIPH_CLK("pcie", "tegra-pcie", "pcie", 70, 0, 250000000, mux_clk_m, 0), PERIPH_CLK("afi", "tegra-pcie", "afi", 72, 0, 250000000, mux_clk_m, 0), - PERIPH_CLK("se", "se", NULL, 127, 0x42c, 625000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71 | DIV_U71_INT), + PERIPH_CLK("se", "se", NULL, 127, 0x42c, 700000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71 | DIV_U71_INT), PERIPH_CLK("mselect", "mselect", NULL, 99, 0x3b4, 108000000, mux_pllp_clkm, MUX | DIV_U71), SHARED_CLK("avp.sclk", "tegra-avp", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), @@ -4328,7 +4377,8 @@ struct clk tegra_list_clks[] = { SHARED_CLK("3d.emc", "tegra_gr3d", "emc", &tegra_clk_emc, NULL, 0, 0), SHARED_CLK("2d.emc", "tegra_gr2d", "emc", &tegra_clk_emc, NULL, 0, 0), SHARED_CLK("mpe.emc", "tegra_mpe", "emc", &tegra_clk_emc, NULL, 0, 0), - SHARED_CLK("camera.emc", "tegra_camera", "emc", &tegra_clk_emc, NULL, 0, 0), + SHARED_CLK("camera.emc", "tegra_camera", "emc", &tegra_clk_emc, NULL, 0, SHARED_BW), + SHARED_CLK("sdmmc4.emc", "sdhci-tegra.3", "emc", &tegra_clk_emc, NULL, 0, 0), SHARED_CLK("floor.emc", "floor.emc", NULL, &tegra_clk_emc, NULL, 0, 0), SHARED_CLK("host1x.cbus", "tegra_host1x", "host1x", &tegra_clk_cbus, "host1x", 2, SHARED_AUTO), @@ -4553,11 +4603,11 @@ static struct cpufreq_frequency_table freq_table_1p0GHz[] = { { 0, 51000 }, { 1, 102000 }, { 2, 204000 }, - { 3, 312000 }, - { 4, 456000 }, - { 5, 608000 }, + { 3, 340000 }, + { 4, 475000 }, + { 5, 620000 }, { 6, 760000 }, - { 7, 816000 }, + { 7, 860000 }, { 8, 912000 }, { 9, 1000000 }, {10, CPUFREQ_TABLE_END }, @@ -4569,7 +4619,7 @@ static struct cpufreq_frequency_table freq_table_1p3GHz[] = { { 2, 204000 }, { 3, 340000 }, { 4, 475000 }, - { 5, 640000 }, + { 5, 620000 }, { 6, 760000 }, { 7, 860000 }, { 8, 1000000 }, @@ -4602,7 +4652,7 @@ static struct cpufreq_frequency_table freq_table_1p5GHz[] = { { 2, 204000 }, { 3, 340000 }, { 4, 475000 }, - { 5, 640000 }, + { 5, 620000 }, { 6, 760000 }, { 7, 860000 }, { 8, 1000000 }, @@ -4614,31 +4664,151 @@ static struct cpufreq_frequency_table freq_table_1p5GHz[] = { {14, CPUFREQ_TABLE_END }, }; -static struct cpufreq_frequency_table freq_table_1p7GHz[] = { +static struct cpufreq_frequency_table freq_table_1p6GHz[] = { { 0, 51000 }, { 1, 102000 }, { 2, 204000 }, - { 3, 370000 }, + { 3, 340000 }, { 4, 475000 }, +#ifdef CONFIG_LP_OVERCLOCK +#ifdef CONFIG_LP_OC_740 + { 5, 740000 }, +#endif +#ifdef CONFIG_LP_OC_700 + { 5, 700000 }, +#endif +#ifdef CONFIG_LP_OC_666 + { 5, 666000 }, +#endif +#ifdef CONFIG_LP_OC_620 { 5, 620000 }, - { 6, 760000 }, - { 7, 910000 }, - { 8, 1150000 }, - { 9, 1300000 }, - {10, 1400000 }, - {11, 1500000 }, - {12, 1600000 }, - {13, 1700000 }, +#endif +#ifdef CONFIG_LP_OC_555 + { 5, 555000 }, +#endif +#else + { 5, 620000 }, + +#endif + { 6, 860000 }, + { 7, 1000000 }, + { 8, 1100000 }, + { 9, 1200000 }, + {10, 1300000 }, + {11, 1400000 }, + {12, 1500000 }, + {13, 1600000 }, {14, CPUFREQ_TABLE_END }, }; +static struct cpufreq_frequency_table freq_table_1p7GHz[] = { + { 0, 51000 }, + { 1, 102000 }, + { 2, 204000 }, + { 3, 340000 }, + { 4, 475000 }, +#ifdef CONFIG_LP_OVERCLOCK +#ifdef CONFIG_LP_OC_740 + { 5, 740000 }, +#endif +#ifdef CONFIG_LP_OC_700 + { 5, 700000 }, +#endif +#ifdef CONFIG_LP_OC_666 + { 5, 666000 }, +#endif +#ifdef CONFIG_LP_OC_620 + { 5, 620000 }, +#endif +#ifdef CONFIG_LP_OC_555 + { 5, 555000 }, +#endif +#else + { 5, 620000 }, + +#endif + { 6, 860000 }, + { 7, 1000000 }, + { 8, 1100000 }, + { 9, 1200000 }, + {10, 1300000 }, + {11, 1400000 }, + {12, 1500000 }, + {13, 1600000 }, + {14, 1700000 }, + {15, CPUFREQ_TABLE_END }, +}; + +static struct cpufreq_frequency_table freq_table_1p8GHz[] = { + { 0, 51000 }, + { 1, 102000 }, + { 2, 204000 }, + { 3, 340000 }, + { 4, 475000 }, +#ifdef CONFIG_LP_OVERCLOCK +#ifdef CONFIG_LP_OC_740 + { 5, 740000 }, +#endif +#ifdef CONFIG_LP_OC_700 + { 5, 700000 }, +#endif +#ifdef CONFIG_LP_OC_666 + { 5, 666000 }, +#endif +#ifdef CONFIG_LP_OC_620 + { 5, 620000 }, +#endif +#ifdef CONFIG_LP_OC_555 + { 5, 555000 }, +#endif +#else + { 5, 620000 }, + +#endif + { 6, 860000 }, + { 7, 1000000 }, + { 8, 1100000 }, + { 9, 1200000 }, + {10, 1300000 }, + {11, 1400000 }, + {12, 1500000 }, + {13, 1600000 }, + {14, 1700000 }, + {15, 1800000 }, + {16, CPUFREQ_TABLE_END }, +}; + +static struct cpufreq_frequency_table freq_table_1p9GHz[] = { + { 0, 51000 }, + { 1, 102000 }, + { 2, 204000 }, + { 3, 340000 }, + { 4, 475000 }, + { 5, 666000 }, + { 6, 860000 }, + { 7, 1000000 }, + { 8, 1100000 }, + { 9, 1200000 }, + {10, 1300000 }, + {11, 1400000 }, + {12, 1500000 }, + {13, 1600000 }, + {14, 1700000 }, + {15, 1900000 }, + {16, CPUFREQ_TABLE_END }, +}; + static struct tegra_cpufreq_table_data cpufreq_tables[] = { { freq_table_300MHz, 0, 1 }, { freq_table_1p0GHz, 2, 8 }, { freq_table_1p3GHz, 2, 10 }, { freq_table_1p4GHz, 2, 11 }, { freq_table_1p5GHz, 2, 12 }, - { freq_table_1p7GHz, 2, 12 }, + { freq_table_1p6GHz, 2, 13 }, + { freq_table_1p7GHz, 2, 13 }, + { freq_table_1p7GHz, 2, 13 }, + { freq_table_1p8GHz, 2, 13 }, + { freq_table_1p9GHz, 2, 13 }, }; static int clip_cpu_rate_limits( @@ -4674,7 +4844,11 @@ static int clip_cpu_rate_limits( cpu_clk_lp->max_rate, ret ? "outside" : "at the bottom"); return ret; } + + /* force idx for max LP*/ + idx=5; cpu_clk_lp->max_rate = freq_table[idx].frequency * 1000; + idx=4; cpu_clk_g->min_rate = freq_table[idx-1].frequency * 1000; data->suspend_index = idx; return 0; @@ -4706,6 +4880,7 @@ struct tegra_cpufreq_table_data *tegra_cpufreq_table_get(void) ret = clip_cpu_rate_limits( &cpufreq_tables[i], &policy, cpu_clk_g, cpu_clk_lp); + printk("tegra3_clocks: clip_cpu_rate_limits return code: %u\n", ret); if (!ret) return &cpufreq_tables[i]; } @@ -4730,10 +4905,10 @@ unsigned long tegra_emc_to_cpu_ratio(unsigned long cpu_rate) /* Vote on memory bus frequency based on cpu frequency; cpu rate is in kHz, emc rate is in Hz */ - if (cpu_rate >= 750000) - return emc_max_rate; /* cpu >= 750 MHz, emc max */ + if (cpu_rate >= 850000) + return emc_max_rate; /* cpu >= 850 MHz, emc max */ else if (cpu_rate >= 450000) - return emc_max_rate/2; /* cpu >= 500 MHz, emc max/2 */ + return emc_max_rate/2; /* cpu >= 450 MHz, emc max/2 */ else if (cpu_rate >= 250000) return 100000000; /* cpu >= 250 MHz, emc 100 MHz */ else diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 48c4384b1aa..1d37c4c4a20 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -28,18 +28,32 @@ #include "board.h" #include "tegra3_emc.h" +#ifdef CONFIG_VOLTAGE_CONTROL +int user_mv_table[MAX_DVFS_FREQS] = { + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1350, 1400}; +#endif + static bool tegra_dvfs_cpu_disabled; static bool tegra_dvfs_core_disabled; static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1350, 1400}; static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { - 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50}; + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25, 25, 25, 25}; static const int core_millivolts[MAX_DVFS_FREQS] = { - 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350}; + 950, 1000, 1050, 1100, 1150, 1200, 1200, 1200, 1200}; + +int avp_millivolts[MAX_DVFS_FREQS] = { + 950, 1000, 1050, 1100, 1150, 1200, 1200, 1200, 1200}; + +int lp_cpu_millivolts[MAX_DVFS_FREQS] = { + 950, 1000, 1050, 1100, 1150, 1200, 1200, 1200, 1200}; + +int emc_millivolts[MAX_DVFS_FREQS] = { + 950, 1000, 1050, 1100, 1150, 1200, 1200, 1200, 1200}; #define KHZ 1000 #define MHZ 1000000 @@ -53,7 +67,7 @@ static int cpu_below_core = VDD_CPU_BELOW_VDD_CORE; static struct dvfs_rail tegra3_dvfs_rail_vdd_cpu = { .reg_id = "vdd_cpu", - .max_millivolts = 1250, + .max_millivolts = 1500, .min_millivolts = 800, .step = VDD_SAFE_STEP, .jmp_to_zero = true, @@ -61,7 +75,7 @@ static struct dvfs_rail tegra3_dvfs_rail_vdd_cpu = { static struct dvfs_rail tegra3_dvfs_rail_vdd_core = { .reg_id = "vdd_core", - .max_millivolts = 1350, + .max_millivolts = 1500, .min_millivolts = 950, .step = VDD_SAFE_STEP, }; @@ -79,12 +93,9 @@ static int tegra3_get_core_floor_mv(int cpu_mv) return 1000; if (cpu_mv < 1000) return 1100; - if ((tegra_cpu_speedo_id() < 2) || - (tegra_cpu_speedo_id() == 4) || - (tegra_cpu_speedo_id() == 7) || - (tegra_cpu_speedo_id() == 8)) - return 1200; - if (cpu_mv < 1100) + if (cpu_mv < 1050) + return 1150; + if ((cpu_mv < 1100) || (tegra_cpu_speedo_id() == 4)) return 1200; if (cpu_mv <= 1250) return 1300; @@ -141,7 +152,7 @@ static struct dvfs_relationship tegra3_dvfs_relationships[] = { } static struct dvfs cpu_dvfs_table[] = { - /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ + /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ CPU_DVFS("cpu_g", 0, 0, MHZ, 1, 1, 684, 684, 817, 817, 1026, 1102, 1149, 1187, 1225, 1282, 1300), CPU_DVFS("cpu_g", 0, 1, MHZ, 1, 1, 807, 807, 948, 948, 1117, 1171, 1206, 1300), CPU_DVFS("cpu_g", 0, 2, MHZ, 1, 1, 883, 883, 1039, 1039, 1178, 1206, 1300), @@ -162,7 +173,10 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 4, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1240, 1280, 1320, 1360, 1360, 1500), CPU_DVFS("cpu_g", 4, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1250, 1300, 1330, 1360, 1400, 1500), - CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500), + + /* Nexus 7 - faking speedo id = 4, process id =2*/ + /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ + CPU_DVFS("cpu_g", 4, 2, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500, 1600, 1700), CPU_DVFS("cpu_g", 4, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500), CPU_DVFS("cpu_g", 5, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), @@ -171,12 +185,12 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 6, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), CPU_DVFS("cpu_g", 6, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1240, 1280, 1360, 1390, 1470, 1500, 1520, 1520, 1590, 1700), - CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1300), - CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300), - CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1300), - CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1300), - CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1300), - + CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 480, 550, 570, 680, 700, 820, 970, 1040, 1080, 1150, 1200, 1280, 1360, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 500, 650, 670, 780, 800, 990, 1040, 1100, 1200, 1300, 1320, 1380, 1400, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 540, 700, 720, 860, 880, 1050, 1150, 1200, 1240, 1300, 1340, 1380, 1400, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 750, 770, 910, 940, 1150, 1230, 1280, 1320, 1340, 1360, 1380, 1400, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 750, 770, 940, 940, 1160, 1280, 1300, 1330, 1340, 1360, 1380, 1400, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 8, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1300), CPU_DVFS("cpu_g", 8, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300), CPU_DVFS("cpu_g", 8, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1300), @@ -200,135 +214,292 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", -1, -1, MHZ, 1, 1, 216, 216, 300), }; -#define CORE_DVFS(_clk_name, _speedo_id, _auto, _mult, _freqs...) \ +#define CORE_DVFS(_clk_name, _millivolts, _speedo_id, _auto, _mult, _freqs...) \ { \ .clk_name = _clk_name, \ .speedo_id = _speedo_id, \ .process_id = -1, \ .freqs = {_freqs}, \ .freqs_mult = _mult, \ - .millivolts = core_millivolts, \ + .millivolts = _millivolts, \ .auto_dvfs = _auto, \ .dvfs_rail = &tegra3_dvfs_rail_vdd_core, \ } static struct dvfs core_dvfs_table[] = { - /* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ + /* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ /* Clock limits for internal blocks, PLLs */ - CORE_DVFS("cpu_lp", 0, 1, KHZ, 1, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), - CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), - CORE_DVFS("cpu_lp", 2, 1, KHZ, 204000, 295000, 370000, 428000, 475000, 513000, 579000, 620000, 620000), - CORE_DVFS("cpu_lp", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 450000, 450000, 450000), - - CORE_DVFS("emc", 0, 1, KHZ, 1, 266500, 266500, 266500, 266500, 533000, 533000, 533000, 533000), - CORE_DVFS("emc", 1, 1, KHZ, 102000, 408000, 408000, 408000, 408000, 667000, 667000, 667000, 667000), - CORE_DVFS("emc", 2, 1, KHZ, 102000, 408000, 408000, 408000, 408000, 667000, 667000, 800000, 900000), - CORE_DVFS("emc", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 625000, 625000, 625000), - - CORE_DVFS("sbus", 0, 1, KHZ, 1, 136000, 164000, 191000, 216000, 216000, 216000, 216000, 216000), - CORE_DVFS("sbus", 1, 1, KHZ, 51000, 205000, 205000, 227000, 227000, 267000, 267000, 267000, 267000), - CORE_DVFS("sbus", 2, 1, KHZ, 51000, 205000, 205000, 227000, 227000, 267000, 334000, 334000, 334000), - CORE_DVFS("sbus", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 378000, 378000, 378000), - - CORE_DVFS("vi", 0, 1, KHZ, 1, 216000, 285000, 300000, 300000, 300000, 300000, 300000, 300000), - CORE_DVFS("vi", 1, 1, KHZ, 1, 216000, 267000, 300000, 371000, 409000, 409000, 409000, 409000), - CORE_DVFS("vi", 2, 1, KHZ, 1, 219000, 267000, 300000, 371000, 409000, 425000, 425000, 425000), - CORE_DVFS("vi", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 470000, 470000, 470000), - - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - - CORE_DVFS("vde", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), - CORE_DVFS("mpe", 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), - CORE_DVFS("2d", 2, 1, KHZ, 1, 267000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), - CORE_DVFS("epp", 2, 1, KHZ, 1, 267000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), - CORE_DVFS("3d", 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), - CORE_DVFS("3d2", 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), - CORE_DVFS("se", 2, 1, KHZ, 1, 267000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), - - CORE_DVFS("vde", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), - CORE_DVFS("mpe", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), - CORE_DVFS("2d", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), - CORE_DVFS("epp", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), - CORE_DVFS("3d", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), - CORE_DVFS("3d2", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), - CORE_DVFS("se", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 625000, 625000, 625000), - - CORE_DVFS("host1x", 0, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), - CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), - CORE_DVFS("host1x", 2, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 300000), - CORE_DVFS("host1x", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), - - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), - CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), - - CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1066000, 1066000, 1066000, 1200000), + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 0, 1, KHZ, 1, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), +#ifdef CONFIG_LP_OVERCLOCK +#ifdef CONFIG_LP_OC_555 + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 475000, 555000, 555000, 555000, 555000, 555000), +#endif +#ifdef CONFIG_LP_OC_620 + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 475000, 620000, 620000, 620000, 620000, 620000), +#endif +#ifdef CONFIG_LP_OC_666 + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 342000, 475000, 555000, 666000, 666000, 666000, 666000, 666000), +#endif +#ifdef CONFIG_LP_OC_700 + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 342000, 475000, 620000, 700000, 700000, 700000, 700000, 700000), +#endif +#ifdef CONFIG_LP_OC_740 + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 475000, 620000, 620000, 620000, 620000, 740000), +#endif +#else + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), +#endif + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 2, 1, KHZ, 204000, 295000, 370000, 428000, 475000, 513000, 579000, 620000, 620000), + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 450000, 450000, 450000), + + CORE_DVFS("emc", emc_millivolts, 0, 1, KHZ, 1, 266500, 266500, 266500, 266500, 533000, 533000, 533000, 533000), + CORE_DVFS("emc", emc_millivolts, 1, 1, KHZ, 102000, 408000, 408000, 408000, 408000, 667000, 667000, 667000, 667000), + CORE_DVFS("emc", emc_millivolts, 2, 1, KHZ, 102000, 408000, 408000, 408000, 408000, 667000, 667000, 800000, 900000), + CORE_DVFS("emc", emc_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 625000, 625000, 625000), + + CORE_DVFS("sbus", core_millivolts, 0, 1, KHZ, 1, 136000, 164000, 191000, 216000, 216000, 216000, 216000, 216000), + CORE_DVFS("sbus", core_millivolts, 1, 1, KHZ, 205000, 205000, 205000, 227000, 227000, 267000, 267000, 267000, 267000), + CORE_DVFS("sbus", core_millivolts, 2, 1, KHZ, 205000, 205000, 205000, 227000, 227000, 267000, 334000, 334000, 334000), + CORE_DVFS("sbus", core_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 378000, 378000, 378000), + + CORE_DVFS("vi", core_millivolts, 0, 1, KHZ, 1, 216000, 285000, 300000, 300000, 300000, 300000, 300000, 300000), + CORE_DVFS("vi", core_millivolts, 1, 1, KHZ, 1, 216000, 267000, 300000, 371000, 409000, 409000, 409000, 409000), + CORE_DVFS("vi", core_millivolts, 2, 1, KHZ, 1, 219000, 267000, 300000, 371000, 409000, 425000, 425000, 425000), + CORE_DVFS("vi", core_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 470000, 470000, 470000), + +/* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ + + CORE_DVFS("vde", avp_millivolts, 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("mpe", avp_millivolts, 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("2d", avp_millivolts, 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("epp", avp_millivolts, 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d", avp_millivolts, 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d2", avp_millivolts, 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("se", avp_millivolts, 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + +#ifdef CONFIG_GPU_OVERCLOCK +#ifdef CONFIG_GPU_OC_332 + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 332000, 332000, 332000, 332000), +#endif +#ifdef CONFIG_GPU_OC_446 + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), +#endif +#ifdef CONFIG_GPU_OC_484 + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), +#endif +#ifdef CONFIG_GPU_OC_520 + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), +#endif +#ifdef CONFIG_GPU_OC_600 + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), +#endif +#ifdef CONFIG_GPU_OC_666 + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), +#endif +#ifdef CONFIG_GPU_OC_700 + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), +#endif +#else + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), +#endif + CORE_DVFS("vde", avp_millivolts, 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), + CORE_DVFS("mpe", avp_millivolts, 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), + CORE_DVFS("2d", avp_millivolts, 2, 1, KHZ, 1, 267000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), + CORE_DVFS("epp", avp_millivolts, 2, 1, KHZ, 1, 267000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), + CORE_DVFS("3d", avp_millivolts, 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), + CORE_DVFS("3d2", avp_millivolts, 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), + CORE_DVFS("se", avp_millivolts, 2, 1, KHZ, 1, 267000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), + + CORE_DVFS("vde", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), + CORE_DVFS("mpe", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), + CORE_DVFS("2d", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), + CORE_DVFS("epp", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), + CORE_DVFS("3d", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), + CORE_DVFS("3d2", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), + CORE_DVFS("se", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 625000, 625000, 625000), + + CORE_DVFS("host1x", avp_millivolts, 0, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), +#ifdef CONFIG_GPU_OVERCLOCK +#ifdef CONFIG_GPU_OC_332 + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 167000, 167000, 167000, 167000, 167000, 167000, 167000, 167000), +#endif +#ifdef CONFIG_GPU_OC_446 + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 223000, 223000, 223000, 223000, 223000, 223000), +#endif +#ifdef CONFIG_GPU_OC_484 + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 242000, 242000, 242000, 242000, 242000, 242000), +#endif +#ifdef CONFIG_GPU_OC_520 + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 260000, 260000, 260000, 260000, 260000), +#endif +#ifdef CONFIG_GPU_OC_600 + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 300000, 300000, 300000, 300000), +#endif +#ifdef CONFIG_GPU_OC_666 + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 333000, 333000, 333000, 333000), +#endif +#ifdef CONFIG_GPU_OC_700 + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 350000, 350000, 350000, 350000), +#endif +#else + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), +#endif + CORE_DVFS("host1x", avp_millivolts, 2, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 300000), + CORE_DVFS("host1x", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), + + CORE_DVFS("cbus", avp_millivolts, 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), +#ifdef CONFIG_GPU_OVERCLOCK +#ifdef CONFIG_GPU_OC_332 + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 332000, 332000, 332000, 332000, 332000), +#endif +#ifdef CONFIG_GPU_OC_446 + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), +#endif +#ifdef CONFIG_GPU_OC_484 + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), +#endif +#ifdef CONFIG_GPU_OC_520 + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), +#endif +#ifdef CONFIG_GPU_OC_600 + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), +#endif +#ifdef CONFIG_GPU_OC_666 + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 666000, 666000, 666000, 666000), +#endif +#ifdef CONFIG_GPU_OC_700 + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 700000, 700000, 700000, 700000), +#endif +#else + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), +#endif + CORE_DVFS("cbus", avp_millivolts, 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), + CORE_DVFS("cbus", avp_millivolts, 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), + +#ifdef CONFIG_GPU_OVERCLOCK +#ifdef CONFIG_GPU_OC_332 + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 667000, 667000, 667000, 667000, 667000, 667000), +#endif +#ifdef CONFIG_GPU_OC_446 + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 892000, 892000, 892000, 892000, 892000, 892000), +#endif +#ifdef CONFIG_GPU_OC_484 + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 968000, 968000, 968000, 968000, 968000, 968000), +#endif +#ifdef CONFIG_GPU_OC_520 + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 1040000, 1040000, 1040000, 1040000, 1040000), +#endif +#ifdef CONFIG_GPU_OC_600 + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1200000, 1200000, 1200000, 1200000), +#endif +#ifdef CONFIG_GPU_OC_666 + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1332000, 1332000, 1332000, 1332000), +#endif +#ifdef CONFIG_GPU_OC_700 + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1400000, 1400000, 1400000, 1400000), +#endif +#else + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1066000, 1066000, 1066000, 1200000), +#endif /* * PLLM dvfs is common across all speedo IDs with one special exception * for T30 and T33, rev A02+, provided PLLM usage is restricted. Both * common and restricted table are included, and table selection is * handled by is_pllm_dvfs() below. */ - CORE_DVFS("pll_m", -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1066000, 1066000, 1066000, 1066000), + CORE_DVFS("pll_m", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1066000, 1066000, 1066000, 1066000), #ifdef CONFIG_TEGRA_PLLM_RESTRICTED - CORE_DVFS("pll_m", 2, 1, KHZ, 533000, 800000, 800000, 800000, 800000, 1066000, 1066000, 1066000, 1066000), + CORE_DVFS("pll_m", core_millivolts, 2, 1, KHZ, 533000, 800000, 800000, 800000, 800000, 1066000, 1066000, 1066000, 1066000), #endif /* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ /* Clock limits for I/O peripherals */ - CORE_DVFS("mipi", 0, 1, KHZ, 1, 1, 1, 1, 1, 1, 1, 1, 1), - CORE_DVFS("mipi", 1, 1, KHZ, 1, 1, 1, 1, 1, 60000, 60000, 60000, 60000), - CORE_DVFS("mipi", 2, 1, KHZ, 1, 1, 1, 1, 1, 60000, 60000, 60000, 60000), - CORE_DVFS("mipi", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 1, 1, 1), - - CORE_DVFS("fuse_burn", -1, 1, KHZ, 1, 1, 1, 1, 26000, 26000, 26000, 26000, 26000), - CORE_DVFS("sdmmc1", -1, 1, KHZ, 104000, 104000, 104000, 104000, 104000, 208000, 208000, 208000, 208000), - CORE_DVFS("sdmmc3", -1, 1, KHZ, 104000, 104000, 104000, 104000, 104000, 208000, 208000, 208000, 208000), - CORE_DVFS("ndflash", -1, 1, KHZ, 1, 120000, 120000, 120000, 200000, 200000, 200000, 200000, 200000), - - CORE_DVFS("nor", 0, 1, KHZ, 1, 115000, 130000, 130000, 133000, 133000, 133000, 133000, 133000), - CORE_DVFS("nor", 1, 1, KHZ, 1, 115000, 130000, 130000, 133000, 133000, 133000, 133000, 133000), - CORE_DVFS("nor", 2, 1, KHZ, 1, 115000, 130000, 130000, 133000, 133000, 133000, 133000, 133000), - CORE_DVFS("nor", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 108000, 108000, 108000), - - CORE_DVFS("sbc1", -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - CORE_DVFS("sbc2", -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - CORE_DVFS("sbc3", -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - CORE_DVFS("sbc4", -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - CORE_DVFS("sbc5", -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - CORE_DVFS("sbc6", -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - - CORE_DVFS("usbd", -1, 1, KHZ, 1, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), - CORE_DVFS("usb2", -1, 1, KHZ, 1, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), - CORE_DVFS("usb3", -1, 1, KHZ, 1, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), - - CORE_DVFS("sata", -1, 1, KHZ, 1, 216000, 216000, 216000, 216000, 216000, 216000, 216000, 216000), - CORE_DVFS("sata_oob", -1, 1, KHZ, 1, 216000, 216000, 216000, 216000, 216000, 216000, 216000, 216000), - CORE_DVFS("pcie", -1, 1, KHZ, 1, 250000, 250000, 250000, 250000, 250000, 250000, 250000, 250000), - CORE_DVFS("afi", -1, 1, KHZ, 1, 250000, 250000, 250000, 250000, 250000, 250000, 250000, 250000), - CORE_DVFS("pll_e", -1, 1, KHZ, 1, 100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000), - - CORE_DVFS("tvdac", -1, 1, KHZ, 1, 220000, 220000, 220000, 220000, 220000, 220000, 220000, 220000), - CORE_DVFS("tvo", -1, 1, KHZ, 1, 1, 297000, 297000, 297000, 297000, 297000, 297000, 297000), - CORE_DVFS("cve", -1, 1, KHZ, 1, 1, 297000, 297000, 297000, 297000, 297000, 297000, 297000), - CORE_DVFS("dsia", -1, 1, KHZ, 1, 275000, 275000, 275000, 275000, 275000, 275000, 275000, 275000), - CORE_DVFS("dsib", -1, 1, KHZ, 1, 275000, 275000, 275000, 275000, 275000, 275000, 275000, 275000), - CORE_DVFS("hdmi", -1, 1, KHZ, 1, 148500, 148500, 148500, 148500, 148500, 148500, 148500, 148500), + CORE_DVFS("mipi", core_millivolts, 0, 1, KHZ, 1, 1, 1, 1, 1, 1, 1, 1, 1), + CORE_DVFS("mipi", core_millivolts, 1, 1, KHZ, 1, 1, 1, 1, 1, 60000, 60000, 60000, 60000), + CORE_DVFS("mipi", core_millivolts, 2, 1, KHZ, 1, 1, 1, 1, 1, 60000, 60000, 60000, 60000), + CORE_DVFS("mipi", core_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 1, 1, 1), + + CORE_DVFS("fuse_burn", core_millivolts, -1, 1, KHZ, 1, 1, 1, 1, 26000, 26000, 26000, 26000, 26000), + CORE_DVFS("sdmmc1", core_millivolts, -1, 1, KHZ, 104000, 104000, 104000, 104000, 104000, 208000, 208000, 208000, 208000), + CORE_DVFS("sdmmc3", core_millivolts, -1, 1, KHZ, 104000, 104000, 104000, 104000, 104000, 208000, 208000, 208000, 208000), + CORE_DVFS("ndflash", core_millivolts, -1, 1, KHZ, 1, 120000, 120000, 120000, 200000, 200000, 200000, 200000, 200000), + + CORE_DVFS("nor", core_millivolts, 0, 1, KHZ, 1, 115000, 130000, 130000, 133000, 133000, 133000, 133000, 133000), + CORE_DVFS("nor", core_millivolts, 1, 1, KHZ, 1, 115000, 130000, 130000, 133000, 133000, 133000, 133000, 133000), + CORE_DVFS("nor", core_millivolts, 2, 1, KHZ, 1, 115000, 130000, 130000, 133000, 133000, 133000, 133000, 133000), + CORE_DVFS("nor", core_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 108000, 108000, 108000), + + CORE_DVFS("sbc1", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), + CORE_DVFS("sbc2", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), + CORE_DVFS("sbc3", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), + CORE_DVFS("sbc4", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), + CORE_DVFS("sbc5", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), + CORE_DVFS("sbc6", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), + + CORE_DVFS("usbd", core_millivolts, -1, 1, KHZ, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), + CORE_DVFS("usb2", core_millivolts, -1, 1, KHZ, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), + CORE_DVFS("usb3", core_millivolts, -1, 1, KHZ, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), + + CORE_DVFS("sata", core_millivolts, -1, 1, KHZ, 1, 216000, 216000, 216000, 216000, 216000, 216000, 216000, 216000), + CORE_DVFS("sata_oob", core_millivolts, -1, 1, KHZ, 1, 216000, 216000, 216000, 216000, 216000, 216000, 216000, 216000), + CORE_DVFS("pcie", core_millivolts, -1, 1, KHZ, 1, 250000, 250000, 250000, 250000, 250000, 250000, 250000, 250000), + CORE_DVFS("afi", core_millivolts, -1, 1, KHZ, 1, 250000, 250000, 250000, 250000, 250000, 250000, 250000, 250000), + CORE_DVFS("pll_e", core_millivolts, -1, 1, KHZ, 1, 100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000), + + CORE_DVFS("tvdac", core_millivolts, -1, 1, KHZ, 1, 220000, 220000, 220000, 220000, 220000, 220000, 220000, 220000), + CORE_DVFS("tvo", core_millivolts, -1, 1, KHZ, 1, 1, 297000, 297000, 297000, 297000, 297000, 297000, 297000), + CORE_DVFS("cve", core_millivolts, -1, 1, KHZ, 1, 1, 297000, 297000, 297000, 297000, 297000, 297000, 297000), + CORE_DVFS("dsia", core_millivolts, -1, 1, KHZ, 1, 275000, 275000, 275000, 275000, 275000, 275000, 275000, 275000), + CORE_DVFS("dsib", core_millivolts, -1, 1, KHZ, 1, 275000, 275000, 275000, 275000, 275000, 275000, 275000, 275000), + CORE_DVFS("hdmi", core_millivolts, -1, 1, KHZ, 1, 148500, 148500, 148500, 148500, 148500, 148500, 148500, 148500), /* * The clock rate for the display controllers that determines the @@ -336,18 +507,18 @@ static struct dvfs core_dvfs_table[] = { * to the display block. Disable auto-dvfs on the display clocks, * and let the display driver call tegra_dvfs_set_rate manually */ - CORE_DVFS("disp1", 0, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), - CORE_DVFS("disp1", 1, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), - CORE_DVFS("disp1", 2, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), - CORE_DVFS("disp1", 3, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), - - CORE_DVFS("disp2", 0, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), - CORE_DVFS("disp2", 1, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), - CORE_DVFS("disp2", 2, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), - CORE_DVFS("disp2", 3, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), - - CORE_DVFS("pwm", -1, 1, KHZ, 1, 408000, 408000, 408000, 408000, 408000, 408000, 408000, 408000), - CORE_DVFS("spdif_out", -1, 1, KHZ, 1, 26000, 26000, 26000, 26000, 26000, 26000, 26000, 26000), + CORE_DVFS("disp1", core_millivolts, 0, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), + CORE_DVFS("disp1", core_millivolts, 1, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), + CORE_DVFS("disp1", core_millivolts, 2, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), + CORE_DVFS("disp1", core_millivolts, 3, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), + + CORE_DVFS("disp2", core_millivolts, 0, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), + CORE_DVFS("disp2", core_millivolts, 1, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), + CORE_DVFS("disp2", core_millivolts, 2, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), + CORE_DVFS("disp2", core_millivolts, 3, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), + + CORE_DVFS("pwm", core_millivolts, -1, 1, KHZ, 1, 408000, 408000, 408000, 408000, 408000, 408000, 408000, 408000), + CORE_DVFS("spdif_out", core_millivolts, -1, 1, KHZ, 1, 26000, 26000, 26000, 26000, 26000, 26000, 26000, 26000), }; @@ -446,7 +617,7 @@ static void __init init_dvfs_one(struct dvfs *d, int nominal_mv_index) tegra_init_max_rate( c, d->freqs[nominal_mv_index] * d->freqs_mult); } - d->max_millivolts = d->dvfs_rail->nominal_millivolts; +// d->max_millivolts = d->dvfs_rail->nominal_millivolts; /* * Check if we may skip enabling dvfs on PLLM. PLLM is a special case, @@ -511,6 +682,7 @@ static int __init get_cpu_nominal_mv_index( * result to the nominal cpu level for the chips with this speedo_id. */ mv = tegra3_dvfs_rail_vdd_core.nominal_millivolts; + pr_info("tegra3_dvfs: %s: tegra3_dvfs_rail_vdd_core.nominal_millivolts mV for cpu_speedo_id: %u is %umV\n",__func__,speedo_id,mv); for (i = 0; i < MAX_DVFS_FREQS; i++) { if ((cpu_millivolts[i] == 0) || tegra3_get_core_floor_mv(cpu_millivolts[i]) > mv) @@ -518,8 +690,10 @@ static int __init get_cpu_nominal_mv_index( } BUG_ON(i == 0); mv = cpu_millivolts[i - 1]; + pr_info("tegra3_dvfs: %s: cpu mv: %i\n", __func__, mv); BUG_ON(mv < tegra3_dvfs_rail_vdd_cpu.min_millivolts); mv = min(mv, tegra_cpu_speedo_mv()); + pr_info("tegra3_dvfs: %s: nominal mV for cpu_speedo_id:%u is %umV\n",__func__,speedo_id,mv); /* * Find matching cpu dvfs entry, and use it to determine index to the @@ -558,6 +732,7 @@ static int __init get_cpu_nominal_mv_index( speedo_id, process_id, d->freqs[i-1] * d->freqs_mult); *cpu_dvfs = d; + pr_info("tegra3_dvfs: %s: cpu_nominal_mv_index: %i\n",__func__, i - 1); return (i - 1); } @@ -966,3 +1141,4 @@ static int __init tegra_dvfs_init_core_cap(void) return 0; } late_initcall(tegra_dvfs_init_core_cap); + diff --git a/arch/arm/mach-tegra/tegra3_emc.c b/arch/arm/mach-tegra/tegra3_emc.c index 3c81495feda..63a7887bcd8 100755 --- a/arch/arm/mach-tegra/tegra3_emc.c +++ b/arch/arm/mach-tegra/tegra3_emc.c @@ -45,6 +45,8 @@ static bool emc_enable; #endif module_param(emc_enable, bool, 0644); +u8 tegra_emc_bw_efficiency = 35; + #define EMC_MIN_RATE_DDR3 25500000 #define EMC_STATUS_UPDATE_TIMEOUT 100 #define TEGRA_EMC_TABLE_MAX_SIZE 16 @@ -888,8 +890,8 @@ static bool is_emc_bridge(void) return false; mv = tegra_dvfs_predict_millivolts(emc, rate); - if (IS_ERR_VALUE(mv) || (mv > TEGRA_EMC_BRIDGE_MVOLTS_MIN)) - return false; +// if (IS_ERR_VALUE(mv) || (mv > TEGRA_EMC_BRIDGE_MVOLTS_MIN)) +// return false; if (clk_set_rate(bridge, rate)) return false; @@ -1028,13 +1030,13 @@ void tegra_init_emc(const struct tegra_emc_table *table, int table_size) adjust_emc_dvfs_table(tegra_emc_table, tegra_emc_table_size); mv = tegra_dvfs_predict_millivolts(emc, max_rate * 1000); - if ((mv <= 0) || (mv > emc->dvfs->max_millivolts)) { - tegra_emc_table = NULL; - pr_err("tegra: invalid EMC DFS table: maximum rate %lu kHz does" - " not match nominal voltage %d\n", - max_rate, emc->dvfs->max_millivolts); - return; - } +// if ((mv <= 0) || (mv > emc->dvfs->max_millivolts)) { +// tegra_emc_table = NULL; +// pr_err("tegra: invalid EMC DFS table: maximum rate %lu kHz does" +// " not match nominal voltage %d\n", +// max_rate, emc->dvfs->max_millivolts); +// return; +// } if (!is_emc_bridge()) { tegra_emc_table = NULL; @@ -1273,6 +1275,22 @@ static int eack_state_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(eack_state_fops, eack_state_get, eack_state_set, "%llu\n"); +static int efficiency_get(void *data, u64 *val) +{ + *val = tegra_emc_bw_efficiency; + return 0; +} +static int efficiency_set(void *data, u64 val) +{ + tegra_emc_bw_efficiency = (val > 100) ? 100 : val; + if (emc) + tegra_clk_shared_bus_update(emc); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(efficiency_fops, efficiency_get, + efficiency_set, "%llu\n"); + static int __init tegra_emc_debug_init(void) { if (!tegra_emc_table) @@ -1298,6 +1316,10 @@ static int __init tegra_emc_debug_init(void) "eack_state", S_IRUGO | S_IWUSR, emc_debugfs_root, NULL, &eack_state_fops)) goto err_out; + if (!debugfs_create_file("efficiency", S_IRUGO | S_IWUSR, + emc_debugfs_root, NULL, &efficiency_fops)) + goto err_out; + return 0; err_out: diff --git a/arch/arm/mach-tegra/tegra3_emc.h b/arch/arm/mach-tegra/tegra3_emc.h index cfde92c1355..c6a1ddec49f 100755 --- a/arch/arm/mach-tegra/tegra3_emc.h +++ b/arch/arm/mach-tegra/tegra3_emc.h @@ -27,6 +27,8 @@ #define TEGRA_EMC_BRIDGE_RATE_MIN 300000000 #define TEGRA_EMC_BRIDGE_MVOLTS_MIN 1200 +extern u8 tegra_emc_bw_efficiency; + struct tegra_emc_table { u8 rev; unsigned long rate; diff --git a/arch/arm/mach-tegra/tegra3_speedo.c b/arch/arm/mach-tegra/tegra3_speedo.c index bd880bc7ca8..c5db102d5b1 100644 --- a/arch/arm/mach-tegra/tegra3_speedo.c +++ b/arch/arm/mach-tegra/tegra3_speedo.c @@ -129,6 +129,13 @@ static int cpu_speedo_id; static int soc_speedo_id; static int package_id; +#ifdef CONFIG_TEGRA_VARIANT_INFO +int orig_cpu_process_id; +int orig_core_process_id; +int orig_cpu_speedo_id; +int orig_soc_speedo_id; +#endif + static void fuse_speedo_calib(u32 *speedo_g, u32 *speedo_lp) { u32 reg; @@ -233,9 +240,14 @@ static void rev_sku_to_speedo_ids(int rev, int sku) case 0x83: /* T30L or T30S */ switch (package_id) { case 1: /* MID => T30L */ - cpu_speedo_id = 7; +#ifdef CONFIG_TEGRA_VARIANT_INFO + /* save it for T3 Variant info */ + orig_cpu_speedo_id = 7; + orig_soc_speedo_id = 1; +#endif + cpu_speedo_id = 4; soc_speedo_id = 1; - threshold_index = 10; + threshold_index = 7; break; case 2: /* DSC => T30S */ cpu_speedo_id = 3; @@ -428,7 +440,12 @@ void tegra_init_speedo_data(void) break; } } +#ifdef CONFIG_TEGRA_VARIANT_INFO cpu_process_id = iv -1; + orig_cpu_process_id = cpu_process_id; +#endif + cpu_process_id = 2; //iv -1; + if (cpu_process_id == -1) { pr_err("****************************************************"); @@ -448,7 +465,11 @@ void tegra_init_speedo_data(void) break; } } +#ifdef CONFIG_TEGRA_VARIANT_INFO core_process_id = iv -1; + orig_core_process_id = core_process_id; +#endif + core_process_id = 1; //iv -1; if (core_process_id == -1) { pr_err("****************************************************"); @@ -510,8 +531,9 @@ int tegra_package_id(void) * latter is resolved by the dvfs code) */ static const int cpu_speedo_nominal_millivolts[] = -/* speedo_id 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 */ - { 1125, 1150, 1150, 1150, 1237, 1237, 1237, 1150, 1150, 912, 850, 850, 1237, 1237}; + /* speedo_id + * 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 */ + { 1125, 1150, 1150, 1150, 1237, 1237, 1237, 1237, 1150, 912, 850, 850, 1237, 1237}; int tegra_cpu_speedo_mv(void) { diff --git a/arch/arm/mach-tegra/usb_phy.c b/arch/arm/mach-tegra/usb_phy.c index 6e84e3d8279..71c38adf1f2 100755 --- a/arch/arm/mach-tegra/usb_phy.c +++ b/arch/arm/mach-tegra/usb_phy.c @@ -1134,6 +1134,7 @@ static unsigned int tegra_phy_xcvr_setup_value(struct tegra_utmip_config *cfg) return (unsigned int)val; } +static void utmip_phy_disable_pmc_bus_ctrl(struct tegra_usb_phy *phy); static int utmi_phy_power_on(struct tegra_usb_phy *phy, bool is_dpd) { unsigned long val; @@ -1302,6 +1303,8 @@ static int utmi_phy_power_on(struct tegra_usb_phy *phy, bool is_dpd) if (phy->mode == TEGRA_USB_PHY_MODE_DEVICE) utmip_powerup_pmc_wake_detect(phy); + else + utmip_phy_disable_pmc_bus_ctrl(phy); #endif return 0; diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c index 5f8071110e8..cc41a6a0619 100644 --- a/arch/arm/mm/pageattr.c +++ b/arch/arm/mm/pageattr.c @@ -331,6 +331,10 @@ static void __set_pmd_pte(pmd_t *pmd, unsigned long address, pte_t *pte) cpa_debug("__set_pmd_pte %x %x %x\n", pmd, pte, *pte); + /* enforce pte entry stores ordering to avoid pmd writes + * bypassing pte stores. + */ + dsb(); /* change init_mm */ pmd_populate_kernel(&init_mm, pmd, pte); @@ -342,7 +346,10 @@ static void __set_pmd_pte(pmd_t *pmd, unsigned long address, pte_t *pte) pgd_index(address), address); pmd_populate_kernel(NULL, pmd, pte); } - + /* enforce pmd entry stores ordering to avoid tlb flush bypassing + * pmd entry stores. + */ + dsb(); } static int diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index e381dc68505..07d3a2c6abb 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -574,6 +574,9 @@ static int __init vfp_init(void) unsigned int vfpsid; unsigned int cpu_arch = cpu_architecture(); +#ifdef CONFIG_SMP + preempt_disable(); +#endif if (cpu_arch >= CPU_ARCH_ARMv6) vfp_enable(NULL); @@ -587,6 +590,9 @@ static int __init vfp_init(void) vfpsid = fmrx(FPSID); barrier(); vfp_vector = vfp_null_entry; +#ifdef CONFIG_SMP + preempt_enable(); +#endif printk(KERN_INFO "VFP support v0.3: "); if (VFP_arch) @@ -596,7 +602,7 @@ static int __init vfp_init(void) } else { hotcpu_notifier(vfp_hotplug, 0); - smp_call_function(vfp_enable, NULL, 1); + on_each_cpu(vfp_enable, NULL, 1); VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT; /* Extract the architecture version */ printk("implementor %02x architecture %d part %02x variant %x rev %x\n", diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 02ee9adff54..eac388e2aaf 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include "init.h" #include "kern_constants.h" diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 3199b76f795..8b27e90013a 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -21,6 +21,17 @@ config IOSCHED_DEADLINE a new point in the service tree and doing a batch of IO from there in case of expiry. +config IOSCHED_ROW + tristate "ROW I/O scheduler" + default y + ---help--- + The ROW I/O scheduler gives priority to READ requests over the + WRITE requests when dispatching, without starving WRITE requests. + Requests are kept in priority queues. Dispatching is done in a RR + manner when the dispatch quantum for each queue is calculated + according to queue priority. + Most suitable for mobile devices. + config IOSCHED_CFQ tristate "CFQ I/O scheduler" # If BLK_CGROUP is a module, CFQ has to be built as module. @@ -43,6 +54,45 @@ config CFQ_GROUP_IOSCHED ---help--- Enable group IO scheduling in CFQ. +config IOSCHED_SIO + tristate "Simple I/O scheduler" + default y + ---help--- + The Simple I/O scheduler is an extremely simple scheduler, + based on noop and deadline, that relies on deadlines to + ensure fairness. The algorithm does not do any sorting but + basic merging, trying to keep a minimum overhead. It is aimed + mainly for aleatory access devices (eg: flash devices). + +config IOSCHED_VR + tristate "V(R) I/O scheduler" + default n + ---help--- + Requests are chosen according to SSTF with a penalty of rev_penalty + for switching head direction. + +config IOSCHED_BFQ + tristate "BFQ I/O scheduler" + depends on EXPERIMENTAL + default n + ---help--- + The BFQ I/O scheduler tries to distribute bandwidth among + all processes according to their weights. + It aims at distributing the bandwidth as desired, independently of + the disk parameters and with any workload. It also tries to + guarantee low latency to interactive and soft real-time + applications. If compiled built-in (saying Y here), BFQ can + be configured to support hierarchical scheduling. + +config CGROUP_BFQIO + bool "BFQ hierarchical scheduling support" + depends on CGROUPS && IOSCHED_BFQ=y + default n + ---help--- + Enable hierarchical scheduling in BFQ, using the cgroups + filesystem interface. The name of the subsystem will be + bfqio. + choice prompt "Default I/O scheduler" default DEFAULT_CFQ @@ -53,20 +103,49 @@ choice config DEFAULT_DEADLINE bool "Deadline" if IOSCHED_DEADLINE=y + config DEFAULT_ROW + bool "ROW" if IOSCHED_ROW=y + help + The ROW I/O scheduler gives priority to READ requests + over the WRITE requests when dispatching, without starving + WRITE requests. Requests are kept in priority queues. + Dispatching is done in a RR manner when the dispatch quantum + for each queue is defined according to queue priority. + Most suitable for mobile devices. + config DEFAULT_CFQ bool "CFQ" if IOSCHED_CFQ=y + config DEFAULT_BFQ + bool "BFQ" if IOSCHED_BFQ=y + help + Selects BFQ as the default I/O scheduler which will be + used by default for all block devices. + The BFQ I/O scheduler aims at distributing the bandwidth + as desired, independently of the disk parameters and with + any workload. It also tries to guarantee low latency to + interactive and soft real-time applications. + config DEFAULT_NOOP bool "No-op" + + config DEFAULT_SIO + bool "SIO" if IOSCHED_SIO=y + + config DEFAULT_VR + bool "V(R)" if IOSCHED_VR=y endchoice config DEFAULT_IOSCHED string default "deadline" if DEFAULT_DEADLINE + default "row" if DEFAULT_ROW default "cfq" if DEFAULT_CFQ + default "bfq" if DEFAULT_BFQ default "noop" if DEFAULT_NOOP - + default "sio" if DEFAULT_SIO + default "vr" if DEFAULT_VR endmenu endif diff --git a/block/Makefile b/block/Makefile index 514c6e4f427..8613fe380f0 100644 --- a/block/Makefile +++ b/block/Makefile @@ -13,7 +13,11 @@ obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o +obj-$(CONFIG_IOSCHED_ROW) += row-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o +obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o +obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o +obj-$(CONFIG_IOSCHED_VR) += vr-iosched.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c new file mode 100644 index 00000000000..349fa7facd0 --- /dev/null +++ b/block/bfq-cgroup.c @@ -0,0 +1,900 @@ +/* + * BFQ: CGROUPS support. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + * + * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ + * file. + */ + +#ifdef CONFIG_CGROUP_BFQIO +static struct bfqio_cgroup bfqio_root_cgroup = { + .weight = BFQ_DEFAULT_GRP_WEIGHT, + .ioprio = BFQ_DEFAULT_GRP_IOPRIO, + .ioprio_class = BFQ_DEFAULT_GRP_CLASS, +}; + +static inline void bfq_init_entity(struct bfq_entity *entity, + struct bfq_group *bfqg) +{ + entity->weight = entity->new_weight; + entity->orig_weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class; + entity->parent = bfqg->my_entity; + entity->sched_data = &bfqg->sched_data; +} + +static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup) +{ + return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id), + struct bfqio_cgroup, css); +} + +/* + * Search the bfq_group for bfqd into the hash table (by now only a list) + * of bgrp. Must be called under rcu_read_lock(). + */ +static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp, + struct bfq_data *bfqd) +{ + struct bfq_group *bfqg; + struct hlist_node *n; + void *key; + + hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) { + key = rcu_dereference(bfqg->bfqd); + if (key == bfqd) + return bfqg; + } + + return NULL; +} + +static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp, + struct bfq_group *bfqg) +{ + struct bfq_entity *entity = &bfqg->entity; + + /* + * If the weight of the entity has never been set via the sysfs + * interface, then bgrp->weight == 0. In this case we initialize + * the weight from the current ioprio value. Otherwise, the group + * weight, if set, has priority over the ioprio value. + */ + if (bgrp->weight == 0) { + entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio); + entity->new_ioprio = bgrp->ioprio; + } else { + entity->new_weight = bgrp->weight; + entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight); + } + entity->orig_weight = entity->weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class; + entity->my_sched_data = &bfqg->sched_data; + bfqg->active_entities = 0; +} + +static inline void bfq_group_set_parent(struct bfq_group *bfqg, + struct bfq_group *parent) +{ + struct bfq_entity *entity; + + BUG_ON(parent == NULL); + BUG_ON(bfqg == NULL); + + entity = &bfqg->entity; + entity->parent = parent->my_entity; + entity->sched_data = &parent->sched_data; +} + +/** + * bfq_group_chain_alloc - allocate a chain of groups. + * @bfqd: queue descriptor. + * @cgroup: the leaf cgroup this chain starts from. + * + * Allocate a chain of groups starting from the one belonging to + * @cgroup up to the root cgroup. Stop if a cgroup on the chain + * to the root has already an allocated group on @bfqd. + */ +static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd, + struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp; + struct bfq_group *bfqg, *prev = NULL, *leaf = NULL; + + for (; cgroup != NULL; cgroup = cgroup->parent) { + bgrp = cgroup_to_bfqio(cgroup); + + bfqg = bfqio_lookup_group(bgrp, bfqd); + if (bfqg != NULL) { + /* + * All the cgroups in the path from there to the + * root must have a bfq_group for bfqd, so we don't + * need any more allocations. + */ + break; + } + + bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC); + if (bfqg == NULL) + goto cleanup; + + bfq_group_init_entity(bgrp, bfqg); + bfqg->my_entity = &bfqg->entity; + + if (leaf == NULL) { + leaf = bfqg; + prev = leaf; + } else { + bfq_group_set_parent(prev, bfqg); + /* + * Build a list of allocated nodes using the bfqd + * filed, that is still unused and will be + * initialized only after the node will be + * connected. + */ + prev->bfqd = bfqg; + prev = bfqg; + } + } + + return leaf; + +cleanup: + while (leaf != NULL) { + prev = leaf; + leaf = leaf->bfqd; + kfree(prev); + } + + return NULL; +} + +/** + * bfq_group_chain_link - link an allocated group chain to a cgroup + * hierarchy. + * @bfqd: the queue descriptor. + * @cgroup: the leaf cgroup to start from. + * @leaf: the leaf group (to be associated to @cgroup). + * + * Try to link a chain of groups to a cgroup hierarchy, connecting the + * nodes bottom-up, so we can be sure that when we find a cgroup in the + * hierarchy that already as a group associated to @bfqd all the nodes + * in the path to the root cgroup have one too. + * + * On locking: the queue lock protects the hierarchy (there is a hierarchy + * per device) while the bfqio_cgroup lock protects the list of groups + * belonging to the same cgroup. + */ +static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup, + struct bfq_group *leaf) +{ + struct bfqio_cgroup *bgrp; + struct bfq_group *bfqg, *next, *prev = NULL; + unsigned long flags; + + assert_spin_locked(bfqd->queue->queue_lock); + + for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) { + bgrp = cgroup_to_bfqio(cgroup); + next = leaf->bfqd; + + bfqg = bfqio_lookup_group(bgrp, bfqd); + BUG_ON(bfqg != NULL); + + spin_lock_irqsave(&bgrp->lock, flags); + + rcu_assign_pointer(leaf->bfqd, bfqd); + hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data); + hlist_add_head(&leaf->bfqd_node, &bfqd->group_list); + + spin_unlock_irqrestore(&bgrp->lock, flags); + + prev = leaf; + leaf = next; + } + + BUG_ON(cgroup == NULL && leaf != NULL); + if (cgroup != NULL && prev != NULL) { + bgrp = cgroup_to_bfqio(cgroup); + bfqg = bfqio_lookup_group(bgrp, bfqd); + bfq_group_set_parent(prev, bfqg); + } +} + +/** + * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup. + * @bfqd: queue descriptor. + * @cgroup: cgroup being searched for. + * + * Return a group associated to @bfqd in @cgroup, allocating one if + * necessary. When a group is returned all the cgroups in the path + * to the root have a group associated to @bfqd. + * + * If the allocation fails, return the root group: this breaks guarantees + * but is a safe fallback. If this loss becomes a problem it can be + * mitigated using the equivalent weight (given by the product of the + * weights of the groups in the path from @group to the root) in the + * root scheduler. + * + * We allocate all the missing nodes in the path from the leaf cgroup + * to the root and we connect the nodes only after all the allocations + * have been successful. + */ +static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, + struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); + struct bfq_group *bfqg; + + bfqg = bfqio_lookup_group(bgrp, bfqd); + if (bfqg != NULL) + return bfqg; + + bfqg = bfq_group_chain_alloc(bfqd, cgroup); + if (bfqg != NULL) + bfq_group_chain_link(bfqd, cgroup, bfqg); + else + bfqg = bfqd->root_group; + + return bfqg; +} + +/** + * bfq_bfqq_move - migrate @bfqq to @bfqg. + * @bfqd: queue descriptor. + * @bfqq: the queue to move. + * @entity: @bfqq's entity. + * @bfqg: the group to move to. + * + * Move @bfqq to @bfqg, deactivating it from its old group and reactivating + * it on the new one. Avoid putting the entity on the old group idle tree. + * + * Must be called under the queue lock; the cgroup owning @bfqg must + * not disappear (by now this just means that we are called under + * rcu_read_lock()). + */ +static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, + struct bfq_entity *entity, struct bfq_group *bfqg) +{ + int busy, resume; + + busy = bfq_bfqq_busy(bfqq); + resume = !RB_EMPTY_ROOT(&bfqq->sort_list); + + BUG_ON(resume && !entity->on_st); + BUG_ON(busy && !resume && entity->on_st && + bfqq != bfqd->in_service_queue); + + if (busy) { + BUG_ON(atomic_read(&bfqq->ref) < 2); + + if (!resume) + bfq_del_bfqq_busy(bfqd, bfqq, 0); + else + bfq_deactivate_bfqq(bfqd, bfqq, 0); + } else if (entity->on_st) + bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); + + /* + * Here we use a reference to bfqg. We don't need a refcounter + * as the cgroup reference will not be dropped, so that its + * destroy() callback will not be invoked. + */ + entity->parent = bfqg->my_entity; + entity->sched_data = &bfqg->sched_data; + + if (busy && resume) + bfq_activate_bfqq(bfqd, bfqq); + + if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); +} + +/** + * __bfq_cic_change_cgroup - move @cic to @cgroup. + * @bfqd: the queue descriptor. + * @cic: the cic to move. + * @cgroup: the cgroup to move to. + * + * Move cic to cgroup, assuming that bfqd->queue is locked; the caller + * has to make sure that the reference to cgroup is valid across the call. + * + * NOTE: an alternative approach might have been to store the current + * cgroup in bfqq and getting a reference to it, reducing the lookup + * time here, at the price of slightly more complex code. + */ +static struct bfq_group *__bfq_cic_change_cgroup(struct bfq_data *bfqd, + struct cfq_io_context *cic, + struct cgroup *cgroup) +{ + struct bfq_queue *async_bfqq; + struct bfq_queue *sync_bfqq; + struct bfq_entity *entity; + struct bfq_group *bfqg; + + spin_lock(&bfqd->eqm_lock); + + async_bfqq = cic_to_bfqq(cic, 0); + sync_bfqq = cic_to_bfqq(cic, 1); + + bfqg = bfq_find_alloc_group(bfqd, cgroup); + if (async_bfqq != NULL) { + entity = &async_bfqq->entity; + + if (entity->sched_data != &bfqg->sched_data) { + cic_set_bfqq(cic, NULL, 0); + bfq_log_bfqq(bfqd, async_bfqq, + "cic_change_group: %p %d", + async_bfqq, atomic_read(&async_bfqq->ref)); + bfq_put_queue(async_bfqq); + } + } + + if (sync_bfqq != NULL) { + entity = &sync_bfqq->entity; + if (entity->sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); + } + + spin_unlock(&bfqd->eqm_lock); + + return bfqg; +} + +/** + * bfq_cic_change_cgroup - move @cic to @cgroup. + * @cic: the cic being migrated. + * @cgroup: the destination cgroup. + * + * When the task owning @cic is moved to @cgroup, @cic is immediately + * moved into its new parent group. + */ +static void bfq_cic_change_cgroup(struct cfq_io_context *cic, + struct cgroup *cgroup) +{ + struct bfq_data *bfqd; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&cic->key, &flags); + if (bfqd != NULL && + !strncmp(bfqd->queue->elevator->elevator_type->elevator_name, + "bfq", ELV_NAME_MAX)) { + __bfq_cic_change_cgroup(bfqd, cic, cgroup); + bfq_put_bfqd_unlock(bfqd, &flags); + } +} + +/** + * bfq_cic_update_cgroup - update the cgroup of @cic. + * @cic: the @cic to update. + * + * Make sure that @cic is enqueued in the cgroup of the current task. + * We need this in addition to moving cics during the cgroup attach + * phase because the task owning @cic could be at its first disk + * access or we may end up in the root cgroup as the result of a + * memory allocation failure and here we try to move to the right + * group. + * + * Must be called under the queue lock. It is safe to use the returned + * value even after the rcu_read_unlock() as the migration/destruction + * paths act under the queue lock too. IOW it is impossible to race with + * group migration/destruction and end up with an invalid group as: + * a) here cgroup has not yet been destroyed, nor its destroy callback + * has started execution, as current holds a reference to it, + * b) if it is destroyed after rcu_read_unlock() [after current is + * migrated to a different cgroup] its attach() callback will have + * taken care of remove all the references to the old cgroup data. + */ +static struct bfq_group *bfq_cic_update_cgroup(struct cfq_io_context *cic) +{ + struct bfq_data *bfqd = cic->key; + struct bfq_group *bfqg; + struct cgroup *cgroup; + + BUG_ON(bfqd == NULL); + + rcu_read_lock(); + cgroup = task_cgroup(current, bfqio_subsys_id); + bfqg = __bfq_cic_change_cgroup(bfqd, cic, cgroup); + rcu_read_unlock(); + + return bfqg; +} + +/** + * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. + * @st: the service tree being flushed. + */ +static inline void bfq_flush_idle_tree(struct bfq_service_tree *st) +{ + struct bfq_entity *entity = st->first_idle; + + for (; entity != NULL; entity = st->first_idle) + __bfq_deactivate_entity(entity, 0); +} + +/** + * bfq_reparent_leaf_entity - move leaf entity to the root_group. + * @bfqd: the device data structure with the root group. + * @entity: the entity to move. + */ +static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(bfqq == NULL); + bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); + return; +} + +/** + * bfq_reparent_active_entities - move to the root group all active + * entities. + * @bfqd: the device data structure with the root group. + * @bfqg: the group to move from. + * @st: the service tree with the entities. + * + * Needs queue_lock to be taken and reference to be valid over the call. + */ +static inline void bfq_reparent_active_entities(struct bfq_data *bfqd, + struct bfq_group *bfqg, + struct bfq_service_tree *st) +{ + struct rb_root *active = &st->active; + struct bfq_entity *entity = NULL; + + if (!RB_EMPTY_ROOT(&st->active)) + entity = bfq_entity_of(rb_first(active)); + + for (; entity != NULL; entity = bfq_entity_of(rb_first(active))) + bfq_reparent_leaf_entity(bfqd, entity); + + if (bfqg->sched_data.in_service_entity != NULL) + bfq_reparent_leaf_entity(bfqd, + bfqg->sched_data.in_service_entity); + + return; +} + +/** + * bfq_destroy_group - destroy @bfqg. + * @bgrp: the bfqio_cgroup containing @bfqg. + * @bfqg: the group being destroyed. + * + * Destroy @bfqg, making sure that it is not referenced from its parent. + */ +static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg) +{ + struct bfq_data *bfqd; + struct bfq_service_tree *st; + struct bfq_entity *entity = bfqg->my_entity; + unsigned long uninitialized_var(flags); + int i; + + hlist_del(&bfqg->group_node); + + /* + * Empty all service_trees belonging to this group before + * deactivating the group itself. + */ + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { + st = bfqg->sched_data.service_tree + i; + + /* + * The idle tree may still contain bfq_queues belonging + * to exited task because they never migrated to a different + * cgroup from the one being destroyed now. No one else + * can access them so it's safe to act without any lock. + */ + bfq_flush_idle_tree(st); + + /* + * It may happen that some queues are still active + * (busy) upon group destruction (if the corresponding + * processes have been forced to terminate). We move + * all the leaf entities corresponding to these queues + * to the root_group. + * Also, it may happen that the group has an entity + * in service, which is disconnected from the active + * tree: it must be moved, too. + * There is no need to put the sync queues, as the + * scheduler has taken no reference. + */ + bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); + if (bfqd != NULL) { + bfq_reparent_active_entities(bfqd, bfqg, st); + bfq_put_bfqd_unlock(bfqd, &flags); + } + BUG_ON(!RB_EMPTY_ROOT(&st->active)); + BUG_ON(!RB_EMPTY_ROOT(&st->idle)); + } + BUG_ON(bfqg->sched_data.next_in_service != NULL); + BUG_ON(bfqg->sched_data.in_service_entity != NULL); + + /* + * We may race with device destruction, take extra care when + * dereferencing bfqg->bfqd. + */ + bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); + if (bfqd != NULL) { + hlist_del(&bfqg->bfqd_node); + __bfq_deactivate_entity(entity, 0); + bfq_put_async_queues(bfqd, bfqg); + bfq_put_bfqd_unlock(bfqd, &flags); + } + BUG_ON(entity->tree != NULL); + + /* + * No need to defer the kfree() to the end of the RCU grace + * period: we are called from the destroy() callback of our + * cgroup, so we can be sure that no one is a) still using + * this cgroup or b) doing lookups in it. + */ + kfree(bfqg); +} + +static void bfq_end_wr_async(struct bfq_data *bfqd) +{ + struct hlist_node *pos, *n; + struct bfq_group *bfqg; + + hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) + bfq_end_wr_async_queues(bfqd, bfqg); + bfq_end_wr_async_queues(bfqd, bfqd->root_group); +} + +/** + * bfq_disconnect_groups - disconnect @bfqd from all its groups. + * @bfqd: the device descriptor being exited. + * + * When the device exits we just make sure that no lookup can return + * the now unused group structures. They will be deallocated on cgroup + * destruction. + */ +static void bfq_disconnect_groups(struct bfq_data *bfqd) +{ + struct hlist_node *pos, *n; + struct bfq_group *bfqg; + + bfq_log(bfqd, "disconnect_groups beginning"); + hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) { + hlist_del(&bfqg->bfqd_node); + + __bfq_deactivate_entity(bfqg->my_entity, 0); + + /* + * Don't remove from the group hash, just set an + * invalid key. No lookups can race with the + * assignment as bfqd is being destroyed; this + * implies also that new elements cannot be added + * to the list. + */ + rcu_assign_pointer(bfqg->bfqd, NULL); + + bfq_log(bfqd, "disconnect_groups: put async for group %p", + bfqg); + bfq_put_async_queues(bfqd, bfqg); + } +} + +static inline void bfq_free_root_group(struct bfq_data *bfqd) +{ + struct bfqio_cgroup *bgrp = &bfqio_root_cgroup; + struct bfq_group *bfqg = bfqd->root_group; + + bfq_put_async_queues(bfqd, bfqg); + + spin_lock_irq(&bgrp->lock); + hlist_del_rcu(&bfqg->group_node); + spin_unlock_irq(&bgrp->lock); + + /* + * No need to synchronize_rcu() here: since the device is gone + * there cannot be any read-side access to its root_group. + */ + kfree(bfqg); +} + +static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) +{ + struct bfq_group *bfqg; + struct bfqio_cgroup *bgrp; + int i; + + bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node); + if (bfqg == NULL) + return NULL; + + bfqg->entity.parent = NULL; + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) + bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; + + bgrp = &bfqio_root_cgroup; + spin_lock_irq(&bgrp->lock); + rcu_assign_pointer(bfqg->bfqd, bfqd); + hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data); + spin_unlock_irq(&bgrp->lock); + + return bfqg; +} + +#define SHOW_FUNCTION(__VAR) \ +static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup, \ + struct cftype *cftype) \ +{ \ + struct bfqio_cgroup *bgrp; \ + u64 ret; \ + \ + if (!cgroup_lock_live_group(cgroup)) \ + return -ENODEV; \ + \ + bgrp = cgroup_to_bfqio(cgroup); \ + spin_lock_irq(&bgrp->lock); \ + ret = bgrp->__VAR; \ + spin_unlock_irq(&bgrp->lock); \ + \ + cgroup_unlock(); \ + \ + return ret; \ +} + +SHOW_FUNCTION(weight); +SHOW_FUNCTION(ioprio); +SHOW_FUNCTION(ioprio_class); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__VAR, __MIN, __MAX) \ +static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup, \ + struct cftype *cftype, \ + u64 val) \ +{ \ + struct bfqio_cgroup *bgrp; \ + struct bfq_group *bfqg; \ + struct hlist_node *n; \ + \ + if (val < (__MIN) || val > (__MAX)) \ + return -EINVAL; \ + \ + if (!cgroup_lock_live_group(cgroup)) \ + return -ENODEV; \ + \ + bgrp = cgroup_to_bfqio(cgroup); \ + \ + spin_lock_irq(&bgrp->lock); \ + bgrp->__VAR = (unsigned short)val; \ + hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) { \ + /* \ + * Setting the ioprio_changed flag of the entity \ + * to 1 with new_##__VAR == ##__VAR would re-set \ + * the value of the weight to its ioprio mapping. \ + * Set the flag only if necessary. \ + */ \ + if ((unsigned short)val != bfqg->entity.new_##__VAR) { \ + bfqg->entity.new_##__VAR = (unsigned short)val; \ + /* \ + * Make sure that the above new value has been \ + * stored in bfqg->entity.new_##__VAR before \ + * setting the ioprio_changed flag. In fact, \ + * this flag may be read asynchronously (in \ + * critical sections protected by a different \ + * lock than that held here), and finding this \ + * flag set may cause the execution of the code \ + * for updating parameters whose value may \ + * depend also on bfqg->entity.new_##__VAR (in \ + * __bfq_entity_update_weight_prio). \ + * This barrier makes sure that the new value \ + * of bfqg->entity.new_##__VAR is correctly \ + * seen in that code. \ + */ \ + smp_wmb(); \ + bfqg->entity.ioprio_changed = 1; \ + } \ + } \ + spin_unlock_irq(&bgrp->lock); \ + \ + cgroup_unlock(); \ + \ + return 0; \ +} + +STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT); +STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1); +STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE); +#undef STORE_FUNCTION + +static struct cftype bfqio_files[] = { + { + .name = "weight", + .read_u64 = bfqio_cgroup_weight_read, + .write_u64 = bfqio_cgroup_weight_write, + }, + { + .name = "ioprio", + .read_u64 = bfqio_cgroup_ioprio_read, + .write_u64 = bfqio_cgroup_ioprio_write, + }, + { + .name = "ioprio_class", + .read_u64 = bfqio_cgroup_ioprio_class_read, + .write_u64 = bfqio_cgroup_ioprio_class_write, + }, +}; + +static int bfqio_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + return cgroup_add_files(cgroup, subsys, bfqio_files, + ARRAY_SIZE(bfqio_files)); +} + +static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys *subsys, + struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp; + + if (cgroup->parent != NULL) { + bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL); + if (bgrp == NULL) + return ERR_PTR(-ENOMEM); + } else + bgrp = &bfqio_root_cgroup; + + spin_lock_init(&bgrp->lock); + INIT_HLIST_HEAD(&bgrp->group_data); + bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO; + bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS; + + return &bgrp->css; +} + +/* + * We cannot support shared io contexts, as we have no means to support + * two tasks with the same ioc in two different groups without major rework + * of the main cic/bfqq data structures. By now we allow a task to change + * its cgroup only if it's the only owner of its ioc; the drawback of this + * behavior is that a group containing a task that forked using CLONE_IO + * will not be destroyed until the tasks sharing the ioc die. + */ +static int bfqio_can_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, + struct task_struct *tsk) +{ + struct io_context *ioc; + int ret = 0; + + /* task_lock() is needed to avoid races with exit_io_context() */ + task_lock(tsk); + ioc = tsk->io_context; + if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1) + /* + * ioc == NULL means that the task is either too + * young or exiting: if it has still no ioc the + * ioc can't be shared, if the task is exiting the + * attach will fail anyway, no matter what we + * return here. + */ + ret = -EINVAL; + task_unlock(tsk); + + return ret; +} + +static void bfqio_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, + struct cgroup *prev, struct task_struct *tsk) +{ + struct io_context *ioc; + struct cfq_io_context *cic; + struct hlist_node *n; + + task_lock(tsk); + ioc = tsk->io_context; + if (ioc != NULL) { + BUG_ON(atomic_long_read(&ioc->refcount) == 0); + atomic_long_inc(&ioc->refcount); + } + task_unlock(tsk); + + if (ioc == NULL) + return; + + rcu_read_lock(); + hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list) + bfq_cic_change_cgroup(cic, cgroup); + rcu_read_unlock(); + + put_io_context(ioc); +} + +static void bfqio_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); + struct hlist_node *n, *tmp; + struct bfq_group *bfqg; + + /* + * Since we are destroying the cgroup, there are no more tasks + * referencing it, and all the RCU grace periods that may have + * referenced it are ended (as the destruction of the parent + * cgroup is RCU-safe); bgrp->group_data will not be accessed by + * anything else and we don't need any synchronization. + */ + hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node) + bfq_destroy_group(bgrp, bfqg); + + BUG_ON(!hlist_empty(&bgrp->group_data)); + + kfree(bgrp); +} + +struct cgroup_subsys bfqio_subsys = { + .name = "bfqio", + .create = bfqio_create, + .can_attach = bfqio_can_attach, + .attach = bfqio_attach, + .destroy = bfqio_destroy, + .populate = bfqio_populate, + .subsys_id = bfqio_subsys_id, +}; +#else +static inline void bfq_init_entity(struct bfq_entity *entity, + struct bfq_group *bfqg) +{ + entity->weight = entity->new_weight; + entity->orig_weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class; + entity->sched_data = &bfqg->sched_data; +} + +static inline struct bfq_group * +bfq_cic_update_cgroup(struct cfq_io_context *cic) +{ + struct bfq_data *bfqd = cic->key; + return bfqd->root_group; +} + +static inline void bfq_bfqq_move(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct bfq_entity *entity, + struct bfq_group *bfqg) +{ +} + +static void bfq_end_wr_async(struct bfq_data *bfqd) +{ + bfq_end_wr_async_queues(bfqd, bfqd->root_group); +} + +static inline void bfq_disconnect_groups(struct bfq_data *bfqd) +{ + bfq_put_async_queues(bfqd, bfqd->root_group); +} + +static inline void bfq_free_root_group(struct bfq_data *bfqd) +{ + kfree(bfqd->root_group); +} + +static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) +{ + struct bfq_group *bfqg; + int i; + + bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); + if (bfqg == NULL) + return NULL; + + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) + bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; + + return bfqg; +} +#endif diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c new file mode 100644 index 00000000000..2dd6699cfa6 --- /dev/null +++ b/block/bfq-ioc.c @@ -0,0 +1,410 @@ +/* + * BFQ: I/O context handling. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + */ + +/** + * bfq_cic_free_rcu - deferred cic freeing. + * @head: RCU head of the cic to free. + * + * Free the cic containing @head and, if it was the last one and + * the module is exiting wake up anyone waiting for its deallocation + * (see bfq_exit()). + */ +static void bfq_cic_free_rcu(struct rcu_head *head) +{ + struct cfq_io_context *cic; + + cic = container_of(head, struct cfq_io_context, rcu_head); + + kmem_cache_free(bfq_ioc_pool, cic); + elv_ioc_count_dec(bfq_ioc_count); + + if (bfq_ioc_gone != NULL) { + spin_lock(&bfq_ioc_gone_lock); + if (bfq_ioc_gone != NULL && + !elv_ioc_count_read(bfq_ioc_count)) { + complete(bfq_ioc_gone); + bfq_ioc_gone = NULL; + } + spin_unlock(&bfq_ioc_gone_lock); + } +} + +static void bfq_cic_free(struct cfq_io_context *cic) +{ + call_rcu(&cic->rcu_head, bfq_cic_free_rcu); +} + +/** + * cic_free_func - disconnect a cic ready to be freed. + * @ioc: the io_context @cic belongs to. + * @cic: the cic to be freed. + * + * Remove @cic from the @ioc radix tree hash and from its cic list, + * deferring the deallocation of @cic to the end of the current RCU + * grace period. This assumes that __bfq_exit_single_io_context() + * has already been called for @cic. + */ +static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) +{ + unsigned long flags; + unsigned long dead_key = (unsigned long) cic->key; + + BUG_ON(!(dead_key & CIC_DEAD_KEY)); + + spin_lock_irqsave(&ioc->lock, flags); + radix_tree_delete(&ioc->bfq_radix_root, + dead_key >> CIC_DEAD_INDEX_SHIFT); + hlist_del_init_rcu(&cic->cic_list); + spin_unlock_irqrestore(&ioc->lock, flags); + + bfq_cic_free(cic); +} + +static void bfq_free_io_context(struct io_context *ioc) +{ + /* + * ioc->refcount is zero here, or we are called from elv_unregister(), + * so no more cic's are allowed to be linked into this ioc. So it + * should be ok to iterate over the known list, we will see all cic's + * since no new ones are added. + */ + call_for_each_cic(ioc, cic_free_func); +} + +/** + * __bfq_exit_single_io_context - deassociate @cic from any running task. + * @bfqd: bfq_data on which @cic is valid. + * @cic: the cic being exited. + * + * Whenever no more tasks are using @cic or @bfqd is deallocated we + * need to invalidate its entry in the radix tree hash table and to + * release the queues it refers to. + * + * Called under the queue lock. + */ +static void __bfq_exit_single_io_context(struct bfq_data *bfqd, + struct cfq_io_context *cic) +{ + struct io_context *ioc = cic->ioc; + + list_del_init(&cic->queue_list); + + /* + * Make sure dead mark is seen for dead queues + */ + smp_wmb(); + rcu_assign_pointer(cic->key, bfqd_dead_key(bfqd)); + + /* + * No write-side locking as no task is using @ioc (they're exited + * or bfqd is being deallocated. + */ + rcu_read_lock(); + if (rcu_dereference(ioc->ioc_data) == cic) { + rcu_read_unlock(); + spin_lock(&ioc->lock); + rcu_assign_pointer(ioc->ioc_data, NULL); + spin_unlock(&ioc->lock); + } else + rcu_read_unlock(); + + if (cic->cfqq[BLK_RW_ASYNC] != NULL) { + bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_ASYNC]); + cic->cfqq[BLK_RW_ASYNC] = NULL; + } + + spin_lock(&bfqd->eqm_lock); + if (cic->cfqq[BLK_RW_SYNC] != NULL) { + /* + * If the bic is using a shared queue, put the reference + * taken on the io_context when the bic started using a + * shared bfq_queue. + */ + if (bfq_bfqq_coop(cic->cfqq[BLK_RW_SYNC])) + put_io_context(ioc); + bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_SYNC]); + cic->cfqq[BLK_RW_SYNC] = NULL; + } + spin_unlock(&bfqd->eqm_lock); +} + +/** + * bfq_exit_single_io_context - deassociate @cic from @ioc (unlocked version). + * @ioc: the io_context @cic belongs to. + * @cic: the cic being exited. + * + * Take the queue lock and call __bfq_exit_single_io_context() to do the + * rest of the work. We take care of possible races with bfq_exit_queue() + * using bfq_get_bfqd_locked() (and abusing a little bit the RCU mechanism). + */ +static void bfq_exit_single_io_context(struct io_context *ioc, + struct cfq_io_context *cic) +{ + struct bfq_data *bfqd; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&cic->key, &flags); + if (bfqd != NULL) { + __bfq_exit_single_io_context(bfqd, cic); + bfq_put_bfqd_unlock(bfqd, &flags); + } +} + +/** + * bfq_exit_io_context - deassociate @ioc from all cics it owns. + * @ioc: the @ioc being exited. + * + * No more processes are using @ioc we need to clean up and put the + * internal structures we have that belongs to that process. Loop + * through all its cics, locking their queues and exiting them. + */ +static void bfq_exit_io_context(struct io_context *ioc) +{ + call_for_each_cic(ioc, bfq_exit_single_io_context); +} + +static struct cfq_io_context *bfq_alloc_io_context(struct bfq_data *bfqd, + gfp_t gfp_mask) +{ + struct cfq_io_context *cic; + + cic = kmem_cache_alloc_node(bfq_ioc_pool, gfp_mask | __GFP_ZERO, + bfqd->queue->node); + if (cic != NULL) { + cic->ttime.last_end_request = jiffies; + /* + * A newly created cic indicates that the process has just + * started doing I/O, and is probably mapping into memory its + * executable and libraries: it definitely needs weight raising. + * There is however the possibility that the process performs, + * for a while, I/O close to some other process. EQM intercepts + * this behavior and may merge the queue corresponding to the + * process with some other queue, BEFORE the weight of the queue + * is raised. Merged queues are not weight-raised (they are assumed + * to belong to processes that benefit only from high throughput). + * If the merge is basically the consequence of an accident, then + * the queue will be split soon and will get back its old weight. + * It is then important to write down somewhere that this queue + * does need weight raising, even if it did not make it to get its + * weight raised before being merged. To this purpose, we overload + * the field raising_time_left and assign 1 to it, to mark the queue + * as needing weight raising. + */ + cic->wr_time_left = 1; + INIT_LIST_HEAD(&cic->queue_list); + INIT_HLIST_NODE(&cic->cic_list); + cic->dtor = bfq_free_io_context; + cic->exit = bfq_exit_io_context; + elv_ioc_count_inc(bfq_ioc_count); + } + + return cic; +} + +/** + * bfq_drop_dead_cic - free an exited cic. + * @bfqd: bfq data for the device in use. + * @ioc: io_context owning @cic. + * @cic: the @cic to free. + * + * We drop cfq io contexts lazily, so we may find a dead one. + */ +static void bfq_drop_dead_cic(struct bfq_data *bfqd, struct io_context *ioc, + struct cfq_io_context *cic) +{ + unsigned long flags; + + WARN_ON(!list_empty(&cic->queue_list)); + BUG_ON(cic->key != bfqd_dead_key(bfqd)); + + spin_lock_irqsave(&ioc->lock, flags); + + BUG_ON(ioc->ioc_data == cic); + + /* + * With shared I/O contexts two lookups may race and drop the + * same cic more than one time: RCU guarantees that the storage + * will not be freed too early, here we make sure that we do + * not try to remove the cic from the hashing structures multiple + * times. + */ + if (!hlist_unhashed(&cic->cic_list)) { + radix_tree_delete(&ioc->bfq_radix_root, bfqd->cic_index); + hlist_del_init_rcu(&cic->cic_list); + bfq_cic_free(cic); + } + + spin_unlock_irqrestore(&ioc->lock, flags); +} + +/** + * bfq_cic_lookup - search into @ioc a cic associated to @bfqd. + * @bfqd: the lookup key. + * @ioc: the io_context of the process doing I/O. + * + * If @ioc already has a cic associated to @bfqd return it, return %NULL + * otherwise. + */ +static struct cfq_io_context *bfq_cic_lookup(struct bfq_data *bfqd, + struct io_context *ioc) +{ + struct cfq_io_context *cic; + unsigned long flags; + void *k; + + if (unlikely(ioc == NULL)) + return NULL; + + rcu_read_lock(); + + /* We maintain a last-hit cache, to avoid browsing over the tree. */ + cic = rcu_dereference(ioc->ioc_data); + if (cic != NULL) { + k = rcu_dereference(cic->key); + if (k == bfqd) + goto out; + } + + do { + cic = radix_tree_lookup(&ioc->bfq_radix_root, + bfqd->cic_index); + if (cic == NULL) + goto out; + + k = rcu_dereference(cic->key); + if (unlikely(k != bfqd)) { + rcu_read_unlock(); + bfq_drop_dead_cic(bfqd, ioc, cic); + rcu_read_lock(); + continue; + } + + spin_lock_irqsave(&ioc->lock, flags); + rcu_assign_pointer(ioc->ioc_data, cic); + spin_unlock_irqrestore(&ioc->lock, flags); + break; + } while (1); + +out: + rcu_read_unlock(); + + return cic; +} + +/** + * bfq_cic_link - add @cic to @ioc. + * @bfqd: bfq_data @cic refers to. + * @ioc: io_context @cic belongs to. + * @cic: the cic to link. + * @gfp_mask: the mask to use for radix tree preallocations. + * + * Add @cic to @ioc, using @bfqd as the search key. This enables us to + * lookup the process specific cfq io context when entered from the block + * layer. Also adds @cic to a per-bfqd list, used when this queue is + * removed. + */ +static int bfq_cic_link(struct bfq_data *bfqd, struct io_context *ioc, + struct cfq_io_context *cic, gfp_t gfp_mask) +{ + unsigned long flags; + int ret; + + ret = radix_tree_preload(gfp_mask); + if (ret == 0) { + cic->ioc = ioc; + + /* No write-side locking, cic is not published yet. */ + rcu_assign_pointer(cic->key, bfqd); + + spin_lock_irqsave(&ioc->lock, flags); + ret = radix_tree_insert(&ioc->bfq_radix_root, + bfqd->cic_index, cic); + if (ret == 0) + hlist_add_head_rcu(&cic->cic_list, &ioc->bfq_cic_list); + spin_unlock_irqrestore(&ioc->lock, flags); + + radix_tree_preload_end(); + + if (ret == 0) { + spin_lock_irqsave(bfqd->queue->queue_lock, flags); + list_add(&cic->queue_list, &bfqd->cic_list); + spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); + } + } + + if (ret != 0) + printk(KERN_ERR "bfq: cic link failed!\n"); + + return ret; +} + +/** + * bfq_ioc_set_ioprio - signal a priority change to the cics belonging to @ioc. + * @ioc: the io_context changing its priority. + */ +static inline void bfq_ioc_set_ioprio(struct io_context *ioc) +{ + call_for_each_cic(ioc, bfq_changed_ioprio); +} + +/** + * bfq_get_io_context - return the @cic associated to @bfqd in @ioc. + * @bfqd: the search key. + * @gfp_mask: the mask to use for cic allocation. + * + * Setup general io context and cfq io context. There can be several cfq + * io contexts per general io context, if this process is doing io to more + * than one device managed by cfq. + */ +static struct cfq_io_context *bfq_get_io_context(struct bfq_data *bfqd, + gfp_t gfp_mask) +{ + struct io_context *ioc = NULL; + struct cfq_io_context *cic; + + might_sleep_if(gfp_mask & __GFP_WAIT); + + ioc = get_io_context(gfp_mask, bfqd->queue->node); + if (ioc == NULL) + return NULL; + + /* Lookup for an existing cic. */ + cic = bfq_cic_lookup(bfqd, ioc); + if (cic != NULL) + goto out; + + /* Alloc one if needed. */ + cic = bfq_alloc_io_context(bfqd, gfp_mask); + if (cic == NULL) + goto err; + + /* Link it into the ioc's radix tree and cic list. */ + if (bfq_cic_link(bfqd, ioc, cic, gfp_mask) != 0) + goto err_free; + +out: + /* + * test_and_clear_bit() implies a memory barrier, paired with + * the wmb() in fs/ioprio.c, so the value seen for ioprio is the + * new one. + */ + if (unlikely(test_and_clear_bit(IOC_BFQ_IOPRIO_CHANGED, + ioc->ioprio_changed))) + bfq_ioc_set_ioprio(ioc); + + return cic; +err_free: + bfq_cic_free(cic); +err: + put_io_context(ioc); + return NULL; +} diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c new file mode 100644 index 00000000000..ee5a4aeaf43 --- /dev/null +++ b/block/bfq-iosched.c @@ -0,0 +1,3956 @@ +/* + * Budget Fair Queueing (BFQ) disk scheduler. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + * + * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ + * file. + * + * BFQ is a proportional-share storage-I/O scheduling algorithm based on + * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets, + * measured in number of sectors, to processes instead of time slices. The + * device is not granted to the in-service process for a given time slice, + * but until it has exhausted its assigned budget. This change from the time + * to the service domain allows BFQ to distribute the device throughput + * among processes as desired, without any distortion due to ZBR, workload + * fluctuations or other factors. BFQ uses an ad hoc internal scheduler, + * called B-WF2Q+, to schedule processes according to their budgets. More + * precisely, BFQ schedules queues associated to processes. Thanks to the + * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to + * I/O-bound processes issuing sequential requests (to boost the + * throughput), and yet guarantee a low latency to interactive and soft + * real-time applications. + * + * BFQ is described in [1], where also a reference to the initial, more + * theoretical paper on BFQ can be found. The interested reader can find + * in the latter paper full details on the main algorithm, as well as + * formulas of the guarantees and formal proofs of all the properties. + * With respect to the version of BFQ presented in these papers, this + * implementation adds a few more heuristics, such as the one that + * guarantees a low latency to soft real-time applications, and a + * hierarchical extension based on H-WF2Q+. + * + * B-WF2Q+ is based on WF2Q+, that is described in [2], together with + * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) + * complexity derives from the one introduced with EEVDF in [3]. + * + * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness + * with the BFQ Disk I/O Scheduler'', + * Proceedings of the 5th Annual International Systems and Storage + * Conference (SYSTOR '12), June 2012. + * + * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf + * + * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing + * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689, + * Oct 1997. + * + * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz + * + * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline + * First: A Flexible and Accurate Mechanism for Proportional Share + * Resource Allocation,'' technical report. + * + * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "bfq.h" + +/* Max number of dispatches in one round of service. */ +static const int bfq_quantum = 4; + +/* Expiration time of sync (0) and async (1) requests, in jiffies. */ +static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; + +/* Maximum backwards seek, in KiB. */ +static const int bfq_back_max = 16 * 1024; + +/* Penalty of a backwards seek, in number of sectors. */ +static const int bfq_back_penalty = 2; + +/* Idling period duration, in jiffies. */ +static int bfq_slice_idle = HZ / 125; + +/* Default maximum budget values, in sectors and number of requests. */ +static const int bfq_default_max_budget = 16 * 1024; +static const int bfq_max_budget_async_rq = 4; + +/* + * Async to sync throughput distribution is controlled as follows: + * when an async request is served, the entity is charged the number + * of sectors of the request, multiplied by the factor below + */ +static const int bfq_async_charge_factor = 10; + +/* Default timeout values, in jiffies, approximating CFQ defaults. */ +static const int bfq_timeout_sync = HZ / 8; +static int bfq_timeout_async = HZ / 25; + +struct kmem_cache *bfq_pool; +struct kmem_cache *bfq_ioc_pool; + +static DEFINE_PER_CPU(unsigned long, bfq_ioc_count); +static struct completion *bfq_ioc_gone; +static DEFINE_SPINLOCK(bfq_ioc_gone_lock); + +static DEFINE_SPINLOCK(cic_index_lock); +static DEFINE_IDA(cic_index_ida); + +/* Below this threshold (in ms), we consider thinktime immediate. */ +#define BFQ_MIN_TT 2 + +/* hw_tag detection: parallel requests threshold and min samples needed. */ +#define BFQ_HW_QUEUE_THRESHOLD 4 +#define BFQ_HW_QUEUE_SAMPLES 32 + +#define BFQQ_SEEK_THR (sector_t)(8 * 1024) +#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) + +/* Min samples used for peak rate estimation (for autotuning). */ +#define BFQ_PEAK_RATE_SAMPLES 32 + +/* Shift used for peak rate fixed precision calculations. */ +#define BFQ_RATE_SHIFT 16 + +/* + * By default, BFQ computes the duration of the weight raising for + * interactive applications automatically, using the following formula: + * duration = (R / r) * T, where r is the peak rate of the device, and + * R and T are two reference parameters. + * In particular, R is the peak rate of the reference device (see below), + * and T is a reference time: given the systems that are likely to be + * installed on the reference device according to its speed class, T is + * about the maximum time needed, under BFQ and while reading two files in + * parallel, to load typical large applications on these systems. + * In practice, the slower/faster the device at hand is, the more/less it + * takes to load applications with respect to the reference device. + * Accordingly, the longer/shorter BFQ grants weight raising to interactive + * applications. + * + * BFQ uses four different reference pairs (R, T), depending on: + * . whether the device is rotational or non-rotational; + * . whether the device is slow, such as old or portable HDDs, as well as + * SD cards, or fast, such as newer HDDs and SSDs. + * + * The device's speed class is dynamically (re)detected in + * bfq_update_peak_rate() every time the estimated peak rate is updated. + * + * In the following definitions, R_slow[0]/R_fast[0] and T_slow[0]/T_fast[0] + * are the reference values for a slow/fast rotational device, whereas + * R_slow[1]/R_fast[1] and T_slow[1]/T_fast[1] are the reference values for + * a slow/fast non-rotational device. Finally, device_speed_thresh are the + * thresholds used to switch between speed classes. + * Both the reference peak rates and the thresholds are measured in + * sectors/usec, left-shifted by BFQ_RATE_SHIFT. + */ +static int R_slow[2] = {1536, 10752}; +static int R_fast[2] = {17415, 34791}; +/* + * To improve readability, a conversion function is used to initialize the + * following arrays, which entails that they can be initialized only in a + * function. + */ +static int T_slow[2]; +static int T_fast[2]; +static int device_speed_thresh[2]; + +#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ + { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) + +#define RQ_CIC(rq) \ + ((struct cfq_io_context *) (rq)->elevator_private[0]) +#define RQ_BFQQ(rq) ((rq)->elevator_private[1]) + +static inline void bfq_schedule_dispatch(struct bfq_data *bfqd); + +#include "bfq-ioc.c" +#include "bfq-sched.c" +#include "bfq-cgroup.c" + +#define bfq_class_idle(bfqq) ((bfqq)->entity.ioprio_class ==\ + IOPRIO_CLASS_IDLE) +#define bfq_class_rt(bfqq) ((bfqq)->entity.ioprio_class ==\ + IOPRIO_CLASS_RT) + +#define bfq_sample_valid(samples) ((samples) > 80) + +/* + * We regard a request as SYNC, if either it's a read or has the SYNC bit + * set (in which case it could also be a direct WRITE). + */ +static inline int bfq_bio_sync(struct bio *bio) +{ + if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) + return 1; + + return 0; +} + +/* + * Scheduler run of queue, if there are requests pending and no one in the + * driver that will restart queueing. + */ +static inline void bfq_schedule_dispatch(struct bfq_data *bfqd) +{ + if (bfqd->queued != 0) { + bfq_log(bfqd, "schedule dispatch"); + kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work); + } +} + +/* + * Lifted from AS - choose which of rq1 and rq2 that is best served now. + * We choose the request that is closesr to the head right now. Distance + * behind the head is penalized and only allowed to a certain extent. + */ +static struct request *bfq_choose_req(struct bfq_data *bfqd, + struct request *rq1, + struct request *rq2, + sector_t last) +{ + sector_t s1, s2, d1 = 0, d2 = 0; + unsigned long back_max; +#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */ +#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */ + unsigned wrap = 0; /* bit mask: requests behind the disk head? */ + + if (rq1 == NULL || rq1 == rq2) + return rq2; + if (rq2 == NULL) + return rq1; + + if (rq_is_sync(rq1) && !rq_is_sync(rq2)) + return rq1; + else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) + return rq2; + if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) + return rq1; + else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META)) + return rq2; + + s1 = blk_rq_pos(rq1); + s2 = blk_rq_pos(rq2); + + /* + * By definition, 1KiB is 2 sectors. + */ + back_max = bfqd->bfq_back_max * 2; + + /* + * Strict one way elevator _except_ in the case where we allow + * short backward seeks which are biased as twice the cost of a + * similar forward seek. + */ + if (s1 >= last) + d1 = s1 - last; + else if (s1 + back_max >= last) + d1 = (last - s1) * bfqd->bfq_back_penalty; + else + wrap |= BFQ_RQ1_WRAP; + + if (s2 >= last) + d2 = s2 - last; + else if (s2 + back_max >= last) + d2 = (last - s2) * bfqd->bfq_back_penalty; + else + wrap |= BFQ_RQ2_WRAP; + + /* Found required data */ + + /* + * By doing switch() on the bit mask "wrap" we avoid having to + * check two variables for all permutations: --> faster! + */ + switch (wrap) { + case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ + if (d1 < d2) + return rq1; + else if (d2 < d1) + return rq2; + else { + if (s1 >= s2) + return rq1; + else + return rq2; + } + + case BFQ_RQ2_WRAP: + return rq1; + case BFQ_RQ1_WRAP: + return rq2; + case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */ + default: + /* + * Since both rqs are wrapped, + * start with the one that's further behind head + * (--> only *one* back seek required), + * since back seek takes more time than forward. + */ + if (s1 <= s2) + return rq1; + else + return rq2; + } +} + +static struct bfq_queue * +bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, + sector_t sector, struct rb_node **ret_parent, + struct rb_node ***rb_link) +{ + struct rb_node **p, *parent; + struct bfq_queue *bfqq = NULL; + + parent = NULL; + p = &root->rb_node; + while (*p) { + struct rb_node **n; + + parent = *p; + bfqq = rb_entry(parent, struct bfq_queue, pos_node); + + /* + * Sort strictly based on sector. Smallest to the left, + * largest to the right. + */ + if (sector > blk_rq_pos(bfqq->next_rq)) + n = &(*p)->rb_right; + else if (sector < blk_rq_pos(bfqq->next_rq)) + n = &(*p)->rb_left; + else + break; + p = n; + bfqq = NULL; + } + + *ret_parent = parent; + if (rb_link) + *rb_link = p; + + bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", + (long long unsigned)sector, + bfqq != NULL ? bfqq->pid : 0); + + return bfqq; +} + +static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + struct rb_node **p, *parent; + struct bfq_queue *__bfqq; + + if (bfqq->pos_root != NULL) { + rb_erase(&bfqq->pos_node, bfqq->pos_root); + bfqq->pos_root = NULL; + } + + if (bfq_class_idle(bfqq)) + return; + if (!bfqq->next_rq) + return; + + bfqq->pos_root = &bfqd->rq_pos_tree; + __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, + blk_rq_pos(bfqq->next_rq), &parent, &p); + if (__bfqq == NULL) { + rb_link_node(&bfqq->pos_node, parent, p); + rb_insert_color(&bfqq->pos_node, bfqq->pos_root); + } else + bfqq->pos_root = NULL; +} + +/* + * Tell whether there are active queues or groups with differentiated weights. + */ +static inline bool bfq_differentiated_weights(struct bfq_data *bfqd) +{ + BUG_ON(!bfqd->hw_tag); + /* + * For weights to differ, at least one of the trees must contain + * at least two nodes. + */ + return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) && + (bfqd->queue_weights_tree.rb_node->rb_left || + bfqd->queue_weights_tree.rb_node->rb_right) +#ifdef CONFIG_CGROUP_BFQIO + ) || + (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) && + (bfqd->group_weights_tree.rb_node->rb_left || + bfqd->group_weights_tree.rb_node->rb_right) +#endif + ); +} + +/* + * If the weight-counter tree passed as input contains no counter for + * the weight of the input entity, then add that counter; otherwise just + * increment the existing counter. + * + * Note that weight-counter trees contain few nodes in mostly symmetric + * scenarios. For example, if all queues have the same weight, then the + * weight-counter tree for the queues may contain at most one node. + * This holds even if low_latency is on, because weight-raised queues + * are not inserted in the tree. + * In most scenarios, the rate at which nodes are created/destroyed + * should be low too. + */ +static void bfq_weights_tree_add(struct bfq_data *bfqd, + struct bfq_entity *entity, + struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* + * Do not insert if: + * - the device does not support queueing; + * - the entity is already associated with a counter, which happens if: + * 1) the entity is associated with a queue, 2) a request arrival + * has caused the queue to become both non-weight-raised, and hence + * change its weight, and backlogged; in this respect, each + * of the two events causes an invocation of this function, + * 3) this is the invocation of this function caused by the second + * event. This second invocation is actually useless, and we handle + * this fact by exiting immediately. More efficient or clearer + * solutions might possibly be adopted. + */ + if (!bfqd->hw_tag || entity->weight_counter) + return; + + while (*new) { + struct bfq_weight_counter *__counter = container_of(*new, + struct bfq_weight_counter, + weights_node); + parent = *new; + + if (entity->weight == __counter->weight) { + entity->weight_counter = __counter; + goto inc_counter; + } + if (entity->weight < __counter->weight) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), + GFP_ATOMIC); + entity->weight_counter->weight = entity->weight; + rb_link_node(&entity->weight_counter->weights_node, parent, new); + rb_insert_color(&entity->weight_counter->weights_node, root); + +inc_counter: + entity->weight_counter->num_active++; +} + +/* + * Decrement the weight counter associated with the entity, and, if the + * counter reaches 0, remove the counter from the tree. + * See the comments to the function bfq_weights_tree_add() for considerations + * about overhead. + */ +static void bfq_weights_tree_remove(struct bfq_data *bfqd, + struct bfq_entity *entity, + struct rb_root *root) +{ + /* + * Check whether the entity is actually associated with a counter. + * In fact, the device may not be considered NCQ-capable for a while, + * which implies that no insertion in the weight trees is performed, + * after which the device may start to be deemed NCQ-capable, and hence + * this function may start to be invoked. This may cause the function + * to be invoked for entities that are not associated with any counter. + */ + if (!entity->weight_counter) + return; + + BUG_ON(RB_EMPTY_ROOT(root)); + BUG_ON(entity->weight_counter->weight != entity->weight); + + BUG_ON(!entity->weight_counter->num_active); + entity->weight_counter->num_active--; + if (entity->weight_counter->num_active > 0) + goto reset_entity_pointer; + + rb_erase(&entity->weight_counter->weights_node, root); + kfree(entity->weight_counter); + +reset_entity_pointer: + entity->weight_counter = NULL; +} + +static struct request *bfq_find_next_rq(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct request *last) +{ + struct rb_node *rbnext = rb_next(&last->rb_node); + struct rb_node *rbprev = rb_prev(&last->rb_node); + struct request *next = NULL, *prev = NULL; + + BUG_ON(RB_EMPTY_NODE(&last->rb_node)); + + if (rbprev != NULL) + prev = rb_entry_rq(rbprev); + + if (rbnext != NULL) + next = rb_entry_rq(rbnext); + else { + rbnext = rb_first(&bfqq->sort_list); + if (rbnext && rbnext != &last->rb_node) + next = rb_entry_rq(rbnext); + } + + return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last)); +} + +/* see the definition of bfq_async_charge_factor for details */ +static inline unsigned long bfq_serv_to_charge(struct request *rq, + struct bfq_queue *bfqq) +{ + return blk_rq_sectors(rq) * + (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) * + bfq_async_charge_factor)); +} + +/** + * bfq_updated_next_req - update the queue after a new next_rq selection. + * @bfqd: the device data the queue belongs to. + * @bfqq: the queue to update. + * + * If the first request of a queue changes we make sure that the queue + * has enough budget to serve at least its first request (if the + * request has grown). We do this because if the queue has not enough + * budget for its first request, it has to go through two dispatch + * rounds to actually get it dispatched. + */ +static void bfq_updated_next_req(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + struct request *next_rq = bfqq->next_rq; + unsigned long new_budget; + + if (next_rq == NULL) + return; + + if (bfqq == bfqd->in_service_queue) + /* + * In order not to break guarantees, budgets cannot be + * changed after an entity has been selected. + */ + return; + + BUG_ON(entity->tree != &st->active); + BUG_ON(entity == entity->sched_data->in_service_entity); + + new_budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + if (entity->budget != new_budget) { + entity->budget = new_budget; + bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", + new_budget); + bfq_activate_bfqq(bfqd, bfqq); + } +} + +static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd) +{ + u64 dur; + + if (bfqd->bfq_wr_max_time > 0) + return bfqd->bfq_wr_max_time; + + dur = bfqd->RT_prod; + do_div(dur, bfqd->peak_rate); + + return dur; +} + +static inline unsigned +bfq_bfqq_cooperations(struct bfq_queue *bfqq) +{ + return bfqq->cic ? bfqq->cic->cooperations : 0; +} + +static inline void +bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct cfq_io_context *cic) +{ + if (cic->saved_idle_window) + bfq_mark_bfqq_idle_window(bfqq); + else + bfq_clear_bfqq_idle_window(bfqq); + if (cic->saved_IO_bound) + bfq_mark_bfqq_IO_bound(bfqq); + else + bfq_clear_bfqq_IO_bound(bfqq); + if (cic->wr_time_left && bfqq->bfqd->low_latency && + cic->cooperations < bfqq->bfqd->bfq_coop_thresh) { + /* + * Start a weight raising period with the duration given by + * the raising_time_left snapshot. + */ + if (bfq_bfqq_busy(bfqq)) + bfqq->bfqd->wr_busy_queues++; + bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff; + bfqq->wr_cur_max_time = cic->wr_time_left; + bfqq->last_wr_start_finish = jiffies; + bfqq->entity.ioprio_changed = 1; + } + /* + * Clear raising_time_left to prevent bfq_bfqq_save_state() from + * getting confused about the queue's need of a weight-raising + * period. + */ + cic->wr_time_left = 0; +} + +/* + * Must be called with the queue_lock held. + */ +static int bfqq_process_refs(struct bfq_queue *bfqq) +{ + int process_refs, io_refs; + + io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; + process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; + BUG_ON(process_refs < 0); + return process_refs; +} + +static void bfq_add_request(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_entity *entity = &bfqq->entity; + struct bfq_data *bfqd = bfqq->bfqd; + struct request *next_rq, *prev; + unsigned long old_wr_coeff = bfqq->wr_coeff; + int idle_for_long_time = 0; + + bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); + bfqq->queued[rq_is_sync(rq)]++; + bfqd->queued++; + + elv_rb_add(&bfqq->sort_list, rq); + + spin_lock(&bfqd->eqm_lock); + + /* + * Check if this request is a better next-serve candidate. + */ + prev = bfqq->next_rq; + next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); + BUG_ON(next_rq == NULL); + bfqq->next_rq = next_rq; + + /* + * Adjust priority tree position, if next_rq changes. + */ + if (prev != bfqq->next_rq) + bfq_rq_pos_tree_add(bfqd, bfqq); + + spin_unlock(&bfqd->eqm_lock); + + if (!bfq_bfqq_busy(bfqq)) { + int soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && + bfq_bfqq_cooperations(bfqq) < bfqd->bfq_coop_thresh && + time_is_before_jiffies(bfqq->soft_rt_next_start); + idle_for_long_time = bfq_bfqq_cooperations(bfqq) < + bfqd->bfq_coop_thresh && + time_is_before_jiffies( + bfqq->budget_timeout + + bfqd->bfq_wr_min_idle_time); + entity->budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + + if (!bfq_bfqq_IO_bound(bfqq)) { + if (time_before(jiffies, + RQ_CIC(rq)->ttime.last_end_request + + bfqd->bfq_slice_idle)) { + bfqq->requests_within_timer++; + if (bfqq->requests_within_timer >= + bfqd->bfq_requests_within_timer) + bfq_mark_bfqq_IO_bound(bfqq); + } else + bfqq->requests_within_timer = 0; + } + + if (!bfqd->low_latency) + goto add_bfqq_busy; + + if (bfq_bfqq_just_split(bfqq)) + goto set_ioprio_changed; + + /* + * If the queue: + * - is not being boosted, + * - has been idle for enough time, + * - is not a sync queue or is linked to a cfq_io_context (it is + * shared "for its nature" or it is not shared and its + * requests have not been redirected to a shared queue) + * start a weight-raising period. + */ + if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt) && + (!bfq_bfqq_sync(bfqq) || bfqq->cic != NULL)) { + bfqq->wr_coeff = bfqd->bfq_wr_coeff; + if (idle_for_long_time) + bfqq->wr_cur_max_time = + bfq_wr_duration(bfqd); + else + bfqq->wr_cur_max_time = + bfqd->bfq_wr_rt_max_time; + bfq_log_bfqq(bfqd, bfqq, + "wrais starting at %lu, rais_max_time %u", + jiffies, + jiffies_to_msecs(bfqq-> + wr_cur_max_time)); + } else if (old_wr_coeff > 1) { + if (idle_for_long_time) + bfqq->wr_cur_max_time = + bfq_wr_duration(bfqd); + else if (bfq_bfqq_cooperations(bfqq) >= + bfqd->bfq_coop_thresh || + (bfqq->wr_cur_max_time == + bfqd->bfq_wr_rt_max_time && + !soft_rt)) { + bfqq->wr_coeff = 1; + bfq_log_bfqq(bfqd, bfqq, + "wrais ending at %lu, rais_max_time %u", + jiffies, + jiffies_to_msecs(bfqq-> + wr_cur_max_time)); + } else if (time_before( + bfqq->last_wr_start_finish + + bfqq->wr_cur_max_time, + jiffies + + bfqd->bfq_wr_rt_max_time) && + soft_rt) { + /* + * + * The remaining weight-raising time is lower + * than bfqd->bfq_raising_rt_max_time, which + * means that the application is enjoying + * weight raising either because deemed soft- + * rt in the near past, or because deemed + * interactive a long ago. In both cases, + * resetting now the current remaining weight- + * raising time for the application to the + * weight-raising duration for soft rt + * applications would not cause any latency + * increase for the application (as the new + * duration would be higher than the remaining + * time). + * + * In addition, the application is now meeting + * the requirements for being deemed soft rt. + * In the end we can correctly and safely + * (re)charge the weight-raising duration for + * the application with the weight-raising + * duration for soft rt applications. + * + * In particular, doing this recharge now, i.e., + * before the weight-raising period for the + * application finishes, reduces the probability + * of the following negative scenario: + * 1) the weight of a soft rt application is + * raised at startup (as for any newly + * created application), + * 2) since the application is not interactive, + * at a certain time weight-raising is + * stopped for the application, + * 3) at that time the application happens to + * still have pending requests, and hence + * is destined to not have a chance to be + * deemed soft rt before these requests are + * completed (see the comments to the + * function bfq_bfqq_softrt_next_start() + * for details on soft rt detection), + * 4) these pending requests experience a high + * latency because the application is not + * weight-raised while they are pending. + */ + bfqq->last_wr_start_finish = jiffies; + bfqq->wr_cur_max_time = + bfqd->bfq_wr_rt_max_time; + } + } +set_ioprio_changed: + if (old_wr_coeff != bfqq->wr_coeff) + entity->ioprio_changed = 1; +add_bfqq_busy: + bfqq->last_idle_bklogged = jiffies; + bfqq->service_from_backlogged = 0; + bfq_clear_bfqq_softrt_update(bfqq); + bfq_add_bfqq_busy(bfqd, bfqq); + } else { + if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) && + time_is_before_jiffies( + bfqq->last_wr_start_finish + + bfqd->bfq_wr_min_inter_arr_async)) { + bfqq->wr_coeff = bfqd->bfq_wr_coeff; + bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); + + bfqd->wr_busy_queues++; + entity->ioprio_changed = 1; + bfq_log_bfqq(bfqd, bfqq, + "non-idle wrais starting at %lu, rais_max_time %u", + jiffies, + jiffies_to_msecs(bfqq->wr_cur_max_time)); + } + if (prev != bfqq->next_rq) + bfq_updated_next_req(bfqd, bfqq); + } + + if (bfqd->low_latency && + (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || + idle_for_long_time)) + bfqq->last_wr_start_finish = jiffies; +} + +static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, + struct bio *bio) +{ + struct task_struct *tsk = current; + struct cfq_io_context *cic; + struct bfq_queue *bfqq; + + cic = bfq_cic_lookup(bfqd, tsk->io_context); + if (cic == NULL) + return NULL; + + spin_lock(&bfqd->eqm_lock); + bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio)); + spin_unlock(&bfqd->eqm_lock); + if (bfqq != NULL) { + sector_t sector = bio->bi_sector + bio_sectors(bio); + + return elv_rb_find(&bfqq->sort_list, sector); + } + + return NULL; +} + +static void bfq_activate_request(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + + bfqd->rq_in_driver++; + bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); + bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", + (long long unsigned)bfqd->last_position); +} + +static inline void bfq_deactivate_request(struct request_queue *q, + struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + + BUG_ON(bfqd->rq_in_driver == 0); + bfqd->rq_in_driver--; +} + +static void bfq_remove_request(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_data *bfqd = bfqq->bfqd; + const int sync = rq_is_sync(rq); + + spin_lock(&bfqq->bfqd->eqm_lock); + if (bfqq->next_rq == rq) { + bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); + bfq_updated_next_req(bfqd, bfqq); + } + + list_del_init(&rq->queuelist); + BUG_ON(bfqq->queued[sync] == 0); + bfqq->queued[sync]--; + bfqd->queued--; + elv_rb_del(&bfqq->sort_list, rq); + + if (RB_EMPTY_ROOT(&bfqq->sort_list)) { + if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) + bfq_del_bfqq_busy(bfqd, bfqq, 1); + /* + * Remove queue from request-position tree as it is empty. + */ + if (bfqq->pos_root != NULL) { + rb_erase(&bfqq->pos_node, bfqq->pos_root); + bfqq->pos_root = NULL; + } + } + spin_unlock(&bfqq->bfqd->eqm_lock); + + if (rq->cmd_flags & REQ_META) { + BUG_ON(bfqq->meta_pending == 0); + bfqq->meta_pending--; + } +} + +static int bfq_merge(struct request_queue *q, struct request **req, + struct bio *bio) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct request *__rq; + + __rq = bfq_find_rq_fmerge(bfqd, bio); + if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; + } + + return ELEVATOR_NO_MERGE; +} + +static void bfq_merged_request(struct request_queue *q, struct request *req, + int type) +{ + if (type == ELEVATOR_FRONT_MERGE && + rb_prev(&req->rb_node) && + blk_rq_pos(req) < + blk_rq_pos(container_of(rb_prev(&req->rb_node), + struct request, rb_node))) { + struct bfq_queue *bfqq = RQ_BFQQ(req); + struct bfq_data *bfqd = bfqq->bfqd; + struct request *prev, *next_rq; + + /* Reposition request in its sort_list */ + elv_rb_del(&bfqq->sort_list, req); + elv_rb_add(&bfqq->sort_list, req); + /* Choose next request to be served for bfqq */ + prev = bfqq->next_rq; + next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req, + bfqd->last_position); + BUG_ON(next_rq == NULL); + bfqq->next_rq = next_rq; + /* + * If next_rq changes, update both the queue's budget to + * fit the new request and the queue's position in its + * rq_pos_tree. + */ + if (prev != bfqq->next_rq) { + bfq_updated_next_req(bfqd, bfqq); + bfq_rq_pos_tree_add(bfqd, bfqq); + } + } +} + +static void bfq_merged_requests(struct request_queue *q, struct request *rq, + struct request *next) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + /* + * Reposition in fifo if next is older than rq. + */ + if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && + time_before(rq_fifo_time(next), rq_fifo_time(rq))) { + list_move(&rq->queuelist, &next->queuelist); + rq_set_fifo_time(rq, rq_fifo_time(next)); + } + + /* + * eqm_lock needed to avoid that other critical sections not holding + * the queue_lock read an inconsistent value from bfqq->next_rq while + * traversing the rq_pos_trees + */ + if (bfqq->next_rq == next) { + spin_lock(&bfqq->bfqd->eqm_lock); + bfqq->next_rq = rq; + spin_unlock(&bfqq->bfqd->eqm_lock); + } + + bfq_remove_request(next); +} + +/* Must be called with bfqq != NULL */ +static inline void bfq_bfqq_end_wr(struct bfq_queue *bfqq) +{ + BUG_ON(bfqq == NULL); + if (bfq_bfqq_busy(bfqq)) + bfqq->bfqd->wr_busy_queues--; + bfqq->wr_coeff = 1; + bfqq->wr_cur_max_time = 0; + /* Trigger a weight change on the next activation of the queue */ + bfqq->entity.ioprio_changed = 1; +} + +static void bfq_end_wr_async_queues(struct bfq_data *bfqd, + struct bfq_group *bfqg) +{ + int i, j; + + for (i = 0; i < 2; i++) + for (j = 0; j < IOPRIO_BE_NR; j++) + if (bfqg->async_bfqq[i][j] != NULL) + bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]); + if (bfqg->async_idle_bfqq != NULL) + bfq_bfqq_end_wr(bfqg->async_idle_bfqq); +} + +static void bfq_end_wr(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq; + + spin_lock_irq(bfqd->queue->queue_lock); + + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) + bfq_bfqq_end_wr(bfqq); + list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) + bfq_bfqq_end_wr(bfqq); + bfq_end_wr_async(bfqd); + + spin_unlock_irq(bfqd->queue->queue_lock); +} + +static inline sector_t bfq_io_struct_pos(void *io_struct, bool request) +{ + if (request) + return blk_rq_pos(io_struct); + else + return ((struct bio *)io_struct)->bi_sector; +} + +static inline sector_t bfq_dist_from(sector_t pos1, + sector_t pos2) +{ + if (pos1 >= pos2) + return pos1 - pos2; + else + return pos2 - pos1; +} + +static inline int bfq_rq_close_to_sector(void *io_struct, bool request, + sector_t sector) +{ + return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <= + BFQQ_SEEK_THR; +} + +static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector) +{ + struct rb_root *root = &bfqd->rq_pos_tree; + struct rb_node *parent, *node; + struct bfq_queue *__bfqq; + + if (RB_EMPTY_ROOT(root)) + return NULL; + + /* + * First, if we find a request starting at the end of the last + * request, choose it. + */ + __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); + if (__bfqq != NULL) + return __bfqq; + + /* + * If the exact sector wasn't found, the parent of the NULL leaf + * will contain the closest sector (rq_pos_tree sorted by + * next_request position). + */ + __bfqq = rb_entry(parent, struct bfq_queue, pos_node); + if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) + return __bfqq; + + if (blk_rq_pos(__bfqq->next_rq) < sector) + node = rb_next(&__bfqq->pos_node); + else + node = rb_prev(&__bfqq->pos_node); + if (node == NULL) + return NULL; + + __bfqq = rb_entry(node, struct bfq_queue, pos_node); + if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) + return __bfqq; + + return NULL; +} + +/* + * bfqd - obvious + * cur_bfqq - passed in so that we don't decide that the current queue + * is closely cooperating with itself + * sector - used as a reference point to search for a close queue + */ +static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, + struct bfq_queue *cur_bfqq, + sector_t sector) +{ + struct bfq_queue *bfqq; + + if (bfq_class_idle(cur_bfqq)) + return NULL; + if (!bfq_bfqq_sync(cur_bfqq)) + return NULL; + if (BFQQ_SEEKY(cur_bfqq)) + return NULL; + + /* If device has only one backlogged bfq_queue, don't search. */ + if (bfqd->busy_queues == 1) + return NULL; + + /* + * We should notice if some of the queues are cooperating, e.g. + * working closely on the same area of the disk. In that case, + * we can group them together and don't waste time idling. + */ + bfqq = bfqq_close(bfqd, sector); + if (bfqq == NULL || bfqq == cur_bfqq) + return NULL; + + /* + * Do not merge queues from different bfq_groups. + */ + if (bfqq->entity.parent != cur_bfqq->entity.parent) + return NULL; + + /* + * It only makes sense to merge sync queues. + */ + if (!bfq_bfqq_sync(bfqq)) + return NULL; + if (BFQQ_SEEKY(bfqq)) + return NULL; + + /* + * Do not merge queues of different priority classes. + */ + if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq)) + return NULL; + + return bfqq; +} + +static struct bfq_queue * +bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +{ + int process_refs, new_process_refs; + struct bfq_queue *__bfqq; + + /* + * If there are no process references on the new_bfqq, then it is + * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain + * may have dropped their last reference (not just their last process + * reference). + */ + if (!bfqq_process_refs(new_bfqq)) + return NULL; + + /* Avoid a circular list and skip interim queue merges. */ + while ((__bfqq = new_bfqq->new_bfqq)) { + if (__bfqq == bfqq) + return NULL; + new_bfqq = __bfqq; + } + + process_refs = bfqq_process_refs(bfqq); + new_process_refs = bfqq_process_refs(new_bfqq); + /* + * If the process for the bfqq has gone away, there is no + * sense in merging the queues. + */ + if (process_refs == 0 || new_process_refs == 0) + return NULL; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", + new_bfqq->pid); + + /* + * Merging is just a redirection: the requests of the process owning + * one of the two queues are redirected to the other queue. The latter + * queue, in its turn, is set as shared if this is the first time that + * the requests of some process are redirected to it. + * + * We redirect bfqq to new_bfqq and not the opposite, because we + * are in the context of the process owning bfqq, hence we have the + * io_cq of this process. So we can immediately configure this io_cq + * to redirect the requests of the process to new_bfqq. + * + * NOTE, even if new_bfqq coincides with the in-service queue, the + * io_cq of new_bfqq is not available, because, if the in-service queue + * is shared, bfqd->in_service_cic may not point to the io_cq of the + * in-service queue. + * Redirecting the requests of the process owning bfqq to the currently + * in-service queue is in any case the best option, as we feed the + * in-service queue with new requests close to the last request served + * and, by doing so, hopefully increase the throughput. + */ + bfqq->new_bfqq = new_bfqq; + atomic_add(process_refs, &new_bfqq->ref); + return new_bfqq; +} + +/* + * Attempt to schedule a merge of bfqq with the currently in-service queue + * or with a close queue among the scheduled queues. + * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue + * structure otherwise. + */ +static struct bfq_queue * +bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + void *io_struct, bool request) +{ + struct bfq_queue *in_service_bfqq, *new_bfqq; + + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + + if (!io_struct) + return NULL; + + in_service_bfqq = bfqd->in_service_queue; + + if (in_service_bfqq == NULL || in_service_bfqq == bfqq || + !bfqd->in_service_cic) + goto check_scheduled; + + if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq)) + goto check_scheduled; + + if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq)) + goto check_scheduled; + + if (in_service_bfqq->entity.parent != bfqq->entity.parent) + goto check_scheduled; + + if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && + bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) + if ((new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq))) + return new_bfqq; /* Merge with in-service queue */ + + /* + * Check whether there is a cooperator among currently scheduled + * queues. The only thing we need is that the bio/request is not + * NULL, as we need it to establish whether a cooperator exists. + */ +check_scheduled: + new_bfqq = bfq_close_cooperator(bfqd, bfqq, + bfq_io_struct_pos(io_struct, request)); + if (new_bfqq) + return bfq_setup_merge(bfqq, new_bfqq); + + return NULL; +} + +static inline void +bfq_bfqq_save_state(struct bfq_queue *bfqq) +{ + /* + * If bfqq->cic == NULL, the queue is already shared or its requests + * have already been redirected to a shared queue; both idle window + * and weight raising state have already been saved. Do nothing. + */ + if (bfqq->cic == NULL) + return; + if (bfqq->cic->wr_time_left) + /* + * This is the queue of a just-started process, and would + * deserve weight raising: we set raising_time_left to the full + * weight-raising duration to trigger weight-raising when + * and if the queue is split and the first request of the + * queue is enqueued. + */ + bfqq->cic->wr_time_left = bfq_wr_duration(bfqq->bfqd); + else if (bfqq->wr_coeff > 1) { + unsigned long wr_duration = + jiffies - bfqq->last_wr_start_finish; + /* + * It may happen that a queue's weight raising period lasts + * longer than its raising_cur_max_time, as weight raising is + * handled only when a request is enqueued or dispatched (it + * does not use any timer). If the weight raising period is + * about to end, don't save it. + */ + if (bfqq->wr_cur_max_time <= wr_duration) + bfqq->cic->wr_time_left = 0; + else + bfqq->cic->wr_time_left = + bfqq->wr_cur_max_time - wr_duration; + /* + * The bfq_queue is becoming shared or the requests of the + * process owning the queue are being redirected to a shared + * queue. Stop the weight raising period of the queue, as in + * both cases it should not be owned by an interactive or + * soft real-time application. + */ + bfq_bfqq_end_wr(bfqq); + } else + bfqq->cic->wr_time_left = 0; + bfqq->cic->saved_idle_window = bfq_bfqq_idle_window(bfqq); + bfqq->cic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); + bfqq->cic->cooperations++; + bfqq->cic->failed_cooperations = 0; +} + +static inline void +bfq_get_cic_reference(struct bfq_queue *bfqq) +{ + /* + * If bfqq->cic has a non-NULL value, the cic to which it belongs + * is about to begin using a shared bfq_queue. + */ + if (bfqq->cic) + atomic_long_inc(&bfqq->cic->ioc->refcount); +} + +static void +bfq_merge_bfqqs(struct bfq_data *bfqd, struct cfq_io_context *cic, + struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +{ + bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", + (long unsigned)new_bfqq->pid); + /* Save weight raising and idle window of the merged queues */ + bfq_bfqq_save_state(bfqq); + bfq_bfqq_save_state(new_bfqq); + if (bfq_bfqq_IO_bound(bfqq)) + bfq_mark_bfqq_IO_bound(new_bfqq); + bfq_clear_bfqq_IO_bound(bfqq); + /* + * Grab a reference to the cic, to prevent it from being destroyed + * before being possibly touched by a bfq_split_bfqq(). + */ + bfq_get_cic_reference(bfqq); + bfq_get_cic_reference(new_bfqq); + /* + * Merge queues (that is, let cic redirect its requests to new_bfqq) + */ + cic_set_bfqq(cic, new_bfqq, 1); + bfq_mark_bfqq_coop(new_bfqq); + /* + * new_bfqq now belongs to at least two cics (it is a shared queue): set + * new_bfqq->cic to NULL. bfqq either: + * - does not belong to any cic any more, and hence bfqq->cic must + * be set to NULL, or + * - is a queue whose owning cics have already been redirected to a + * different queue, hence the queue is destined to not belong to any + * cic soon and bfqq->cic is already NULL (therefore the next + * assignment causes no harm). + */ + new_bfqq->cic = NULL; + bfqq->cic = NULL; + bfq_put_queue(bfqq); +} + +static inline void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq) +{ + struct cfq_io_context *cic = bfqq->cic; + struct bfq_data *bfqd = bfqq->bfqd; + + if (cic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) { + cic->failed_cooperations++; + if (cic->failed_cooperations >= bfqd->bfq_failed_cooperations) + cic->cooperations = 0; + } +} + +static int bfq_allow_merge(struct request_queue *q, struct request *rq, + struct bio *bio) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct cfq_io_context *cic; + struct bfq_queue *bfqq, *new_bfqq; + unsigned long flags; + + /* Disallow merge of a sync bio into an async request. */ + if (bfq_bio_sync(bio) && !rq_is_sync(rq)) + return 0; + + /* + * Lookup the bfqq that this bio will be queued with. Allow + * merge only if rq is queued there. + */ + cic = bfq_cic_lookup(bfqd, current->io_context); + if (cic == NULL) + return 0; + + /* + * The allow_merge_fn scheduler hook may be called with or without + * the queue_lock being held. Access to the rq_pos_tree data + * structures and to cic->bfqq[] is protected by the eqm_lock. + */ + spin_lock_irqsave(&bfqd->eqm_lock, flags); + bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio)); + /* + * We take advantage of this function to perform an early merge + * of the queues of possible cooperating processes. + */ + if (bfqq != NULL) { + new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false); + if (new_bfqq != NULL) { + bfq_merge_bfqqs(bfqd, cic, bfqq, new_bfqq); + /* + * If we get here, the bio will be queued in the + * shared queue, i.e., new_bfqq, so use new_bfqq + * to decide whether bio and rq can be merged. + */ + bfqq = new_bfqq; + } else + bfq_bfqq_increase_failed_cooperations(bfqq); + } + spin_unlock_irqrestore(&bfqd->eqm_lock, flags); + + return bfqq == RQ_BFQQ(rq); +} + +static void __bfq_set_in_service_queue(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + if (bfqq != NULL) { + bfq_mark_bfqq_must_alloc(bfqq); + bfq_mark_bfqq_budget_new(bfqq); + bfq_clear_bfqq_fifo_expire(bfqq); + + bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; + + bfq_log_bfqq(bfqd, bfqq, + "set_in_service_queue, cur-budget = %lu", + bfqq->entity.budget); + } + + bfqd->in_service_queue = bfqq; +} + +/* + * Get and set a new queue for service. + */ +static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq = bfq_get_next_queue(bfqd); + + __bfq_set_in_service_queue(bfqd, bfqq); + return bfqq; +} + +/* + * If enough samples have been computed, return the current max budget + * stored in bfqd, which is dynamically updated according to the + * estimated disk peak rate; otherwise return the default max budget + */ +static inline unsigned long bfq_max_budget(struct bfq_data *bfqd) +{ + if (bfqd->budgets_assigned < 194) + return bfq_default_max_budget; + else + return bfqd->bfq_max_budget; +} + +/* + * Return min budget, which is a fraction of the current or default + * max budget (trying with 1/32) + */ +static inline unsigned long bfq_min_budget(struct bfq_data *bfqd) +{ + if (bfqd->budgets_assigned < 194) + return bfq_default_max_budget / 32; + else + return bfqd->bfq_max_budget / 32; +} + +static void bfq_arm_slice_timer(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq = bfqd->in_service_queue; + struct cfq_io_context *cic; + unsigned long sl; + + BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); + + /* Processes have exited, don't wait. */ + cic = bfqd->in_service_cic; + if (cic == NULL || atomic_read(&cic->ioc->nr_tasks) == 0) + return; + + bfq_mark_bfqq_wait_request(bfqq); + + /* + * We don't want to idle for seeks, but we do want to allow + * fair distribution of slice time for a process doing back-to-back + * seeks. So allow a little bit of time for him to submit a new rq. + * + * To prevent processes with (partly) seeky workloads from + * being too ill-treated, grant them a small fraction of the + * assigned budget before reducing the waiting time to + * BFQ_MIN_TT. This happened to help reduce latency. + */ + sl = bfqd->bfq_slice_idle; + /* + * Unless the queue is being weight-raised, grant only minimum idle + * time if the queue either has been seeky for long enough or has + * already proved to be constantly seeky. + */ + if (bfq_sample_valid(bfqq->seek_samples) && + ((BFQQ_SEEKY(bfqq) && bfqq->entity.service > + bfq_max_budget(bfqq->bfqd) / 8) || + bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1) + sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); + else if (bfqq->wr_coeff > 1) + sl = sl * 3; + bfqd->last_idling_start = ktime_get(); + mod_timer(&bfqd->idle_slice_timer, jiffies + sl); + bfq_log(bfqd, "arm idle: %u/%u ms", + jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); +} + +/* + * Set the maximum time for the in-service queue to consume its + * budget. This prevents seeky processes from lowering the disk + * throughput (always guaranteed with a time slice scheme as in CFQ). + */ +static void bfq_set_budget_timeout(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq = bfqd->in_service_queue; + unsigned int timeout_coeff; + if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time) + timeout_coeff = 1; + else + timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; + + bfqd->last_budget_start = ktime_get(); + + bfq_clear_bfqq_budget_new(bfqq); + bfqq->budget_timeout = jiffies + + bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; + + bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", + jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * + timeout_coeff)); +} + +/* + * Move request from internal lists to the request queue dispatch list. + */ +static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + /* + * For consistency, the next instruction should have been executed + * after removing the request from the queue and dispatching it. + * We execute instead this instruction before bfq_remove_request() + * (and hence introduce a temporary inconsistency), for efficiency. + * In fact, in a forced_dispatch, this prevents two counters related + * to bfqq->dispatched to risk to be uselessly decremented if bfqq + * is not in service, and then to be incremented again after + * incrementing bfqq->dispatched. + */ + bfqq->dispatched++; + bfq_remove_request(rq); + elv_dispatch_sort(q, rq); + + if (bfq_bfqq_sync(bfqq)) + bfqd->sync_flight++; +} + +/* + * Return expired entry, or NULL to just start from scratch in rbtree. + */ +static struct request *bfq_check_fifo(struct bfq_queue *bfqq) +{ + struct request *rq = NULL; + + if (bfq_bfqq_fifo_expire(bfqq)) + return NULL; + + bfq_mark_bfqq_fifo_expire(bfqq); + + if (list_empty(&bfqq->fifo)) + return NULL; + + rq = rq_entry_fifo(bfqq->fifo.next); + + if (time_before(jiffies, rq_fifo_time(rq))) + return NULL; + + return rq; +} + +static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + return entity->budget - entity->service; +} + +/* Must be called with eqm_lock held */ +static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + BUG_ON(bfqq != bfqd->in_service_queue); + + __bfq_bfqd_reset_in_service(bfqd); + + /* + * If this bfqq is shared between multiple processes, check + * to make sure that those processes are still issuing I/Os + * within the mean seek distance. If not, it may be time to + * break the queues apart again. + */ + if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) + bfq_mark_bfqq_split_coop(bfqq); + + if (RB_EMPTY_ROOT(&bfqq->sort_list)) { + /* + * Overloading budget_timeout field to store the time + * at which the queue remains with no backlog; used by + * the weight-raising mechanism. + */ + bfqq->budget_timeout = jiffies; + bfq_del_bfqq_busy(bfqd, bfqq, 1); + } + else { + bfq_activate_bfqq(bfqd, bfqq); + /* + * Resort priority tree of potential close cooperators. + */ + bfq_rq_pos_tree_add(bfqd, bfqq); + } +} + +/** + * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior. + * @bfqd: device data. + * @bfqq: queue to update. + * @reason: reason for expiration. + * + * Handle the feedback on @bfqq budget. See the body for detailed + * comments. + */ +static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + enum bfqq_expiration reason) +{ + struct request *next_rq; + unsigned long budget, min_budget; + + budget = bfqq->max_budget; + min_budget = bfq_min_budget(bfqd); + + BUG_ON(bfqq != bfqd->in_service_queue); + + bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu", + bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); + bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu", + budget, bfq_min_budget(bfqd)); + bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", + bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); + + if (bfq_bfqq_sync(bfqq)) { + switch (reason) { + /* + * Caveat: in all the following cases we trade latency + * for throughput. + */ + case BFQ_BFQQ_TOO_IDLE: + /* + * This is the only case where we may reduce + * the budget: if there is no request of the + * process still waiting for completion, then + * we assume (tentatively) that the timer has + * expired because the batch of requests of + * the process could have been served with a + * smaller budget. Hence, betting that + * process will behave in the same way when it + * becomes backlogged again, we reduce its + * next budget. As long as we guess right, + * this budget cut reduces the latency + * experienced by the process. + * + * However, if there are still outstanding + * requests, then the process may have not yet + * issued its next request just because it is + * still waiting for the completion of some of + * the still outstanding ones. So in this + * subcase we do not reduce its budget, on the + * contrary we increase it to possibly boost + * the throughput, as discussed in the + * comments to the BUDGET_TIMEOUT case. + */ + if (bfqq->dispatched > 0) /* still outstanding reqs */ + budget = min(budget * 2, bfqd->bfq_max_budget); + else { + if (budget > 5 * min_budget) + budget -= 4 * min_budget; + else + budget = min_budget; + } + break; + case BFQ_BFQQ_BUDGET_TIMEOUT: + /* + * We double the budget here because: 1) it + * gives the chance to boost the throughput if + * this is not a seeky process (which may have + * bumped into this timeout because of, e.g., + * ZBR), 2) together with charge_full_budget + * it helps give seeky processes higher + * timestamps, and hence be served less + * frequently. + */ + budget = min(budget * 2, bfqd->bfq_max_budget); + break; + case BFQ_BFQQ_BUDGET_EXHAUSTED: + /* + * The process still has backlog, and did not + * let either the budget timeout or the disk + * idling timeout expire. Hence it is not + * seeky, has a short thinktime and may be + * happy with a higher budget too. So + * definitely increase the budget of this good + * candidate to boost the disk throughput. + */ + budget = min(budget * 4, bfqd->bfq_max_budget); + break; + case BFQ_BFQQ_NO_MORE_REQUESTS: + /* + * Leave the budget unchanged. + */ + default: + return; + } + } else /* async queue */ + /* async queues get always the maximum possible budget + * (their ability to dispatch is limited by + * @bfqd->bfq_max_budget_async_rq). + */ + budget = bfqd->bfq_max_budget; + + bfqq->max_budget = budget; + + if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 && + bfqq->max_budget > bfqd->bfq_max_budget) + bfqq->max_budget = bfqd->bfq_max_budget; + + /* + * Make sure that we have enough budget for the next request. + * Since the finish time of the bfqq must be kept in sync with + * the budget, be sure to call __bfq_bfqq_expire() after the + * update. + */ + next_rq = bfqq->next_rq; + if (next_rq != NULL) + bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + else + bfqq->entity.budget = bfqq->max_budget; + + bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu", + next_rq != NULL ? blk_rq_sectors(next_rq) : 0, + bfqq->entity.budget); +} + +static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) +{ + unsigned long max_budget; + + /* + * The max_budget calculated when autotuning is equal to the + * amount of sectors transfered in timeout_sync at the + * estimated peak rate. + */ + max_budget = (unsigned long)(peak_rate * 1000 * + timeout >> BFQ_RATE_SHIFT); + + return max_budget; +} + +/* + * In addition to updating the peak rate, checks whether the process + * is "slow", and returns 1 if so. This slow flag is used, in addition + * to the budget timeout, to reduce the amount of service provided to + * seeky processes, and hence reduce their chances to lower the + * throughput. See the code for more details. + */ +static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, + int compensate, enum bfqq_expiration reason) +{ + u64 bw, usecs, expected, timeout; + ktime_t delta; + int update = 0; + + if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) + return 0; + + if (compensate) + delta = bfqd->last_idling_start; + else + delta = ktime_get(); + delta = ktime_sub(delta, bfqd->last_budget_start); + usecs = ktime_to_us(delta); + + /* Don't trust short/unrealistic values. */ + if (usecs < 100 || usecs >= LONG_MAX) + return 0; + + /* + * Calculate the bandwidth for the last slice. We use a 64 bit + * value to store the peak rate, in sectors per usec in fixed + * point math. We do so to have enough precision in the estimate + * and to avoid overflows. + */ + bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; + do_div(bw, (unsigned long)usecs); + + timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); + + /* + * Use only long (> 20ms) intervals to filter out spikes for + * the peak rate estimation. + */ + if (usecs > 20000) { + if (bw > bfqd->peak_rate || + (!BFQQ_SEEKY(bfqq) && + reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { + bfq_log(bfqd, "measured bw =%llu", bw); + /* + * To smooth oscillations use a low-pass filter with + * alpha=7/8, i.e., + * new_rate = (7/8) * old_rate + (1/8) * bw + */ + do_div(bw, 8); + if (bw == 0) + return 0; + bfqd->peak_rate *= 7; + do_div(bfqd->peak_rate, 8); + bfqd->peak_rate += bw; + update = 1; + bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); + } + + update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; + + if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) + bfqd->peak_rate_samples++; + + if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && + update) { + int dev_type = blk_queue_nonrot(bfqd->queue); + if (bfqd->bfq_user_max_budget == 0) { + bfqd->bfq_max_budget = + bfq_calc_max_budget(bfqd->peak_rate, + timeout); + bfq_log(bfqd, "new max_budget=%lu", + bfqd->bfq_max_budget); + } + if (bfqd->device_speed == BFQ_BFQD_FAST && + bfqd->peak_rate < device_speed_thresh[dev_type]) { + bfqd->device_speed = BFQ_BFQD_SLOW; + bfqd->RT_prod = R_slow[dev_type] * + T_slow[dev_type]; + } else if (bfqd->device_speed == BFQ_BFQD_SLOW && + bfqd->peak_rate > device_speed_thresh[dev_type]) { + bfqd->device_speed = BFQ_BFQD_FAST; + bfqd->RT_prod = R_fast[dev_type] * + T_fast[dev_type]; + } + } + } + + /* + * If the process has been served for a too short time + * interval to let its possible sequential accesses prevail on + * the initial seek time needed to move the disk head on the + * first sector it requested, then give the process a chance + * and for the moment return false. + */ + if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) + return 0; + + /* + * A process is considered ``slow'' (i.e., seeky, so that we + * cannot treat it fairly in the service domain, as it would + * slow down too much the other processes) if, when a slice + * ends for whatever reason, it has received service at a + * rate that would not be high enough to complete the budget + * before the budget timeout expiration. + */ + expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; + + /* + * Caveat: processes doing IO in the slower disk zones will + * tend to be slow(er) even if not seeky. And the estimated + * peak rate will actually be an average over the disk + * surface. Hence, to not be too harsh with unlucky processes, + * we keep a budget/3 margin of safety before declaring a + * process slow. + */ + return expected > (4 * bfqq->entity.budget) / 3; +} + +/* + * To be deemed as soft real-time, an application must meet two + * requirements. First, the application must not require an average + * bandwidth higher than the approximate bandwidth required to playback or + * record a compressed high-definition video. + * The next function is invoked on the completion of the last request of a + * batch, to compute the next-start time instant, soft_rt_next_start, such + * that, if the next request of the application does not arrive before + * soft_rt_next_start, then the above requirement on the bandwidth is met. + * + * The second requirement is that the request pattern of the application is + * isochronous, i.e., that, after issuing a request or a batch of requests, + * the application stops issuing new requests until all its pending requests + * have been completed. After that, the application may issue a new batch, + * and so on. + * For this reason the next function is invoked to compute + * soft_rt_next_start only for applications that meet this requirement, + * whereas soft_rt_next_start is set to infinity for applications that do + * not. + * + * Unfortunately, even a greedy application may happen to behave in an + * isochronous way if the CPU load is high. In fact, the application may + * stop issuing requests while the CPUs are busy serving other processes, + * then restart, then stop again for a while, and so on. In addition, if + * the disk achieves a low enough throughput with the request pattern + * issued by the application (e.g., because the request pattern is random + * and/or the device is slow), then the application may meet the above + * bandwidth requirement too. To prevent such a greedy application to be + * deemed as soft real-time, a further rule is used in the computation of + * soft_rt_next_start: soft_rt_next_start must be higher than the current + * time plus the maximum time for which the arrival of a request is waited + * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle. + * This filters out greedy applications, as the latter issue instead their + * next request as soon as possible after the last one has been completed + * (in contrast, when a batch of requests is completed, a soft real-time + * application spends some time processing data). + * + * Unfortunately, the last filter may easily generate false positives if + * only bfqd->bfq_slice_idle is used as a reference time interval and one + * or both the following cases occur: + * 1) HZ is so low that the duration of a jiffy is comparable to or higher + * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with + * HZ=100. + * 2) jiffies, instead of increasing at a constant rate, may stop increasing + * for a while, then suddenly 'jump' by several units to recover the lost + * increments. This seems to happen, e.g., inside virtual machines. + * To address this issue, we do not use as a reference time interval just + * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In + * particular we add the minimum number of jiffies for which the filter + * seems to be quite precise also in embedded systems and KVM/QEMU virtual + * machines. + */ +static inline unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + return max(bfqq->last_idle_bklogged + + HZ * bfqq->service_from_backlogged / + bfqd->bfq_wr_max_softrt_rate, + jiffies + bfqq->bfqd->bfq_slice_idle + 4); +} + +/* + * Return the largest-possible time instant such that, for as long as possible, + * the current time will be lower than this time instant according to the macro + * time_is_before_jiffies(). + */ +static inline unsigned long bfq_infinity_from_now(unsigned long now) +{ + return now + ULONG_MAX / 2; +} + +/** + * bfq_bfqq_expire - expire a queue. + * @bfqd: device owning the queue. + * @bfqq: the queue to expire. + * @compensate: if true, compensate for the time spent idling. + * @reason: the reason causing the expiration. + * + * + * If the process associated to the queue is slow (i.e., seeky), or in + * case of budget timeout, or, finally, if it is async, we + * artificially charge it an entire budget (independently of the + * actual service it received). As a consequence, the queue will get + * higher timestamps than the correct ones upon reactivation, and + * hence it will be rescheduled as if it had received more service + * than what it actually received. In the end, this class of processes + * will receive less service in proportion to how slowly they consume + * their budgets (and hence how seriously they tend to lower the + * throughput). + * + * In contrast, when a queue expires because it has been idling for + * too much or because it exhausted its budget, we do not touch the + * amount of service it has received. Hence when the queue will be + * reactivated and its timestamps updated, the latter will be in sync + * with the actual service received by the queue until expiration. + * + * Charging a full budget to the first type of queues and the exact + * service to the others has the effect of using the WF2Q+ policy to + * schedule the former on a timeslice basis, without violating the + * service domain guarantees of the latter. + */ +static void bfq_bfqq_expire(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + int compensate, + enum bfqq_expiration reason) +{ + int slow; + BUG_ON(bfqq != bfqd->in_service_queue); + + /* Update disk peak rate for autotuning and check whether the + * process is slow (see bfq_update_peak_rate). + */ + slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); + + /* + * As above explained, 'punish' slow (i.e., seeky), timed-out + * and async queues, to favor sequential sync workloads. + * + * Processes doing I/O in the slower disk zones will tend to be + * slow(er) even if not seeky. Hence, since the estimated peak + * rate is actually an average over the disk surface, these + * processes may timeout just for bad luck. To avoid punishing + * them we do not charge a full budget to a process that + * succeeded in consuming at least 2/3 of its budget. + */ + if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT && + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)) + bfq_bfqq_charge_full_budget(bfqq); + + bfqq->service_from_backlogged += bfqq->entity.service; + + if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT && + !bfq_bfqq_constantly_seeky(bfqq)) { + bfq_mark_bfqq_constantly_seeky(bfqq); + if (!blk_queue_nonrot(bfqd->queue)) + bfqd->const_seeky_busy_in_flight_queues++; + } + + if (reason == BFQ_BFQQ_TOO_IDLE && + bfqq->entity.service <= 2 * bfqq->entity.budget / 10 ) + bfq_clear_bfqq_IO_bound(bfqq); + + if (bfqd->low_latency && bfqq->wr_coeff == 1) + bfqq->last_wr_start_finish = jiffies; + + if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 && + RB_EMPTY_ROOT(&bfqq->sort_list)) { + /* + * If we get here, and there are no outstanding requests, + * then the request pattern is isochronous (see the comments + * to the function bfq_bfqq_softrt_next_start()). Hence we + * can compute soft_rt_next_start. If, instead, the queue + * still has outstanding requests, then we have to wait + * for the completion of all the outstanding requests to + * discover whether the request pattern is actually + * isochronous. + */ + if (bfqq->dispatched == 0) + bfqq->soft_rt_next_start = + bfq_bfqq_softrt_next_start(bfqd, bfqq); + else { + /* + * The application is still waiting for the + * completion of one or more requests: + * prevent it from possibly being incorrectly + * deemed as soft real-time by setting its + * soft_rt_next_start to infinity. In fact, + * without this assignment, the application + * would be incorrectly deemed as soft + * real-time if: + * 1) it issued a new request before the + * completion of all its in-flight + * requests, and + * 2) at that time, its soft_rt_next_start + * happened to be in the past. + */ + bfqq->soft_rt_next_start = + bfq_infinity_from_now(jiffies); + /* + * Schedule an update of soft_rt_next_start to when + * the task may be discovered to be isochronous. + */ + bfq_mark_bfqq_softrt_update(bfqq); + } + } + + bfq_log_bfqq(bfqd, bfqq, + "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, + slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); + + /* + * Increase, decrease or leave budget unchanged according to + * reason. + */ + __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); + spin_lock(&bfqd->eqm_lock); + __bfq_bfqq_expire(bfqd, bfqq); + spin_unlock(&bfqd->eqm_lock); +} + +/* + * Budget timeout is not implemented through a dedicated timer, but + * just checked on request arrivals and completions, as well as on + * idle timer expirations. + */ +static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) +{ + if (bfq_bfqq_budget_new(bfqq) || + time_before(jiffies, bfqq->budget_timeout)) + return 0; + return 1; +} + +/* + * If we expire a queue that is waiting for the arrival of a new + * request, we may prevent the fictitious timestamp back-shifting that + * allows the guarantees of the queue to be preserved (see [1] for + * this tricky aspect). Hence we return true only if this condition + * does not hold, or if the queue is slow enough to deserve only to be + * kicked off for preserving a high throughput. +*/ +static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) +{ + bfq_log_bfqq(bfqq->bfqd, bfqq, + "may_budget_timeout: wait_request %d left %d timeout %d", + bfq_bfqq_wait_request(bfqq), + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, + bfq_bfqq_budget_timeout(bfqq)); + + return (!bfq_bfqq_wait_request(bfqq) || + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3) + && + bfq_bfqq_budget_timeout(bfqq); +} + +/* + * Device idling is allowed only for the queues for which this function + * returns true. For this reason, the return value of this function plays a + * critical role for both throughput boosting and service guarantees. The + * return value is computed through a logical expression. In this rather + * long comment, we try to briefly describe all the details and motivations + * behind the components of this logical expression. + * + * First, the expression may be true only for sync queues. Besides, if + * bfqq is also being weight-raised, then the expression always evaluates + * to true, as device idling is instrumental for preserving low-latency + * guarantees (see [1]). Otherwise, the expression evaluates to true only + * if bfqq has a non-null idle window and at least one of the following + * two conditions holds. The first condition is that the device is not + * performing NCQ, because idling the device most certainly boosts the + * throughput if this condition holds and bfqq has been granted a non-null + * idle window. The second compound condition is made of the logical AND of + * two components. + * + * The first component is true only if there is no weight-raised busy + * queue. This guarantees that the device is not idled for a sync non- + * weight-raised queue when there are busy weight-raised queues. The former + * is then expired immediately if empty. Combined with the timestamping + * rules of BFQ (see [1] for details), this causes sync non-weight-raised + * queues to get a lower number of requests served, and hence to ask for a + * lower number of requests from the request pool, before the busy weight- + * raised queues get served again. + * + * This is beneficial for the processes associated with weight-raised + * queues, when the request pool is saturated (e.g., in the presence of + * write hogs). In fact, if the processes associated with the other queues + * ask for requests at a lower rate, then weight-raised processes have a + * higher probability to get a request from the pool immediately (or at + * least soon) when they need one. Hence they have a higher probability to + * actually get a fraction of the disk throughput proportional to their + * high weight. This is especially true with NCQ-capable drives, which + * enqueue several requests in advance and further reorder internally- + * queued requests. + * + * In the end, mistreating non-weight-raised queues when there are busy + * weight-raised queues seems to mitigate starvation problems in the + * presence of heavy write workloads and NCQ, and hence to guarantee a + * higher application and system responsiveness in these hostile scenarios. + * + * If the first component of the compound condition is instead true, i.e., + * there is no weight-raised busy queue, then the second component of the + * compound condition takes into account service-guarantee and throughput + * issues related to NCQ (recall that the compound condition is evaluated + * only if the device is detected as supporting NCQ). + * + * As for service guarantees, allowing the drive to enqueue more than one + * request at a time, and hence delegating de facto final scheduling + * decisions to the drive's internal scheduler, causes loss of control on + * the actual request service order. In this respect, when the drive is + * allowed to enqueue more than one request at a time, the service + * distribution enforced by the drive's internal scheduler is likely to + * coincide with the desired device-throughput distribution only in the + * following, perfectly symmetric, scenario: + * 1) all active queues have the same weight, + * 2) all active groups at the same level in the groups tree have the same + * weight, + * 3) all active groups at the same level in the groups tree have the same + * number of children. + * + * Even in such a scenario, sequential I/O may still receive a preferential + * treatment, but this is not likely to be a big issue with flash-based + * devices, because of their non-dramatic loss of throughput with random + * I/O. Things do differ with HDDs, for which additional care is taken, as + * explained after completing the discussion for flash-based devices. + * + * Unfortunately, keeping the necessary state for evaluating exactly the + * above symmetry conditions would be quite complex and time-consuming. + * Therefore BFQ evaluates instead the following stronger sub-conditions, + * for which it is much easier to maintain the needed state: + * 1) all active queues have the same weight, + * 2) all active groups have the same weight, + * 3) all active groups have at most one active child each. + * In particular, the last two conditions are always true if hierarchical + * support and the cgroups interface are not enabled, hence no state needs + * to be maintained in this case. + * + * According to the above considerations, the second component of the + * compound condition evaluates to true if any of the above symmetry + * sub-condition does not hold, or the device is not flash-based. Therefore, + * if also the first component is true, then idling is allowed for a sync + * queue. These are the only sub-conditions considered if the device is + * flash-based, as, for such a device, it is sensible to force idling only + * for service-guarantee issues. In fact, as for throughput, idling + * NCQ-capable flash-based devices would not boost the throughput even + * with sequential I/O; rather it would lower the throughput in proportion + * to how fast the device is. In the end, (only) if all the three + * sub-conditions hold and the device is flash-based, the compound + * condition evaluates to false and therefore no idling is performed. + * + * As already said, things change with a rotational device, where idling + * boosts the throughput with sequential I/O (even with NCQ). Hence, for + * such a device the second component of the compound condition evaluates + * to true also if the following additional sub-condition does not hold: + * the queue is constantly seeky. Unfortunately, this different behavior + * with respect to flash-based devices causes an additional asymmetry: if + * some sync queues enjoy idling and some other sync queues do not, then + * the latter get a low share of the device throughput, simply because the + * former get many requests served after being set as in service, whereas + * the latter do not. As a consequence, to guarantee the desired throughput + * distribution, on HDDs the compound expression evaluates to true (and + * hence device idling is performed) also if the following last symmetry + * condition does not hold: no other queue is benefiting from idling. Also + * this last condition is actually replaced with a simpler-to-maintain and + * stronger condition: there is no busy queue which is not constantly seeky + * (and hence may also benefit from idling). + * + * To sum up, when all the required symmetry and throughput-boosting + * sub-conditions hold, the second component of the compound condition + * evaluates to false, and hence no idling is performed. This helps to + * keep the drives' internal queues full on NCQ-capable devices, and hence + * to boost the throughput, without causing 'almost' any loss of service + * guarantees. The 'almost' follows from the fact that, if the internal + * queue of one such device is filled while all the sub-conditions hold, + * but at some point in time some sub-condition stops to hold, then it may + * become impossible to let requests be served in the new desired order + * until all the requests already queued in the device have been served. + */ +static inline bool bfq_bfqq_must_not_expire(struct bfq_queue *bfqq) +{ + struct bfq_data *bfqd = bfqq->bfqd; +#ifdef CONFIG_CGROUP_BFQIO +#define symmetric_scenario (!bfqd->active_numerous_groups && \ + !bfq_differentiated_weights(bfqd)) +#else +#define symmetric_scenario (!bfq_differentiated_weights(bfqd)) +#endif +#define cond_for_seeky_on_ncq_hdd (bfq_bfqq_constantly_seeky(bfqq) && \ + bfqd->busy_in_flight_queues == \ + bfqd->const_seeky_busy_in_flight_queues) +/* + * Condition for expiring a non-weight-raised queue (and hence not idling + * the device). + */ +#define cond_for_expiring_non_wr (bfqd->hw_tag && \ + (bfqd->wr_busy_queues > 0 || \ + (symmetric_scenario && \ + (blk_queue_nonrot(bfqd->queue) || \ + cond_for_seeky_on_ncq_hdd)))) + + return bfq_bfqq_sync(bfqq) && + (bfq_bfqq_IO_bound(bfqq) || bfqq->wr_coeff > 1) && + (bfqq->wr_coeff > 1 || + (bfq_bfqq_idle_window(bfqq) && + !cond_for_expiring_non_wr) + ); +} + +/* + * If the in-service queue is empty but sync, and the function + * bfq_bfqq_must_not_expire returns true, then: + * 1) the queue must remain in service and cannot be expired, and + * 2) the disk must be idled to wait for the possible arrival of a new + * request for the queue. + * See the comments to the function bfq_bfqq_must_not_expire for the reasons + * why performing device idling is the best choice to boost the throughput + * and preserve service guarantees when bfq_bfqq_must_not_expire itself + * returns true. + */ +static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) +{ + struct bfq_data *bfqd = bfqq->bfqd; + + return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 && + bfq_bfqq_must_not_expire(bfqq); +} + +/* + * Select a queue for service. If we have a current in-service queue, + * check whether to continue servicing it, or retrieve and set a new one. + */ +static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq; + struct request *next_rq; + enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; + + bfqq = bfqd->in_service_queue; + if (bfqq == NULL) + goto new_queue; + + bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); + + if (bfq_may_expire_for_budg_timeout(bfqq) && + !timer_pending(&bfqd->idle_slice_timer) && + !bfq_bfqq_must_idle(bfqq)) + goto expire; + + next_rq = bfqq->next_rq; + /* + * If bfqq has requests queued and it has enough budget left to + * serve them, keep the queue, otherwise expire it. + */ + if (next_rq != NULL) { + if (bfq_serv_to_charge(next_rq, bfqq) > + bfq_bfqq_budget_left(bfqq)) { + reason = BFQ_BFQQ_BUDGET_EXHAUSTED; + goto expire; + } else { + /* + * The idle timer may be pending because we may + * not disable disk idling even when a new request + * arrives. + */ + if (timer_pending(&bfqd->idle_slice_timer)) { + /* + * If we get here: 1) at least a new request + * has arrived but we have not disabled the + * timer because the request was too small, + * 2) then the block layer has unplugged + * the device, causing the dispatch to be + * invoked. + * + * Since the device is unplugged, now the + * requests are probably large enough to + * provide a reasonable throughput. + * So we disable idling. + */ + bfq_clear_bfqq_wait_request(bfqq); + del_timer(&bfqd->idle_slice_timer); + } + goto keep_queue; + } + } + + /* + * No requests pending. If the in-service queue still has requests + * in flight (possibly waiting for a completion) or is idling for a + * new request, then keep it. + */ + if (timer_pending(&bfqd->idle_slice_timer) || + (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) { + bfqq = NULL; + goto keep_queue; + } + + reason = BFQ_BFQQ_NO_MORE_REQUESTS; +expire: + bfq_bfqq_expire(bfqd, bfqq, 0, reason); +new_queue: + bfqq = bfq_set_in_service_queue(bfqd); + bfq_log(bfqd, "select_queue: new queue %d returned", + bfqq != NULL ? bfqq->pid : 0); +keep_queue: + return bfqq; +} + +static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */ + bfq_log_bfqq(bfqd, bfqq, + "raising period dur %u/%u msec, old coeff %u, w %d(%d)", + jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), + jiffies_to_msecs(bfqq->wr_cur_max_time), + bfqq->wr_coeff, + bfqq->entity.weight, bfqq->entity.orig_weight); + + BUG_ON(bfqq != bfqd->in_service_queue && entity->weight != + entity->orig_weight * bfqq->wr_coeff); + if (entity->ioprio_changed) + bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); + + /* + * If too much time has elapsed from the beginning + * of this weight-raising period, or the queue has + * exceeded the acceptable number of cooperations, + * stop it. + */ + if (bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh || + time_is_before_jiffies(bfqq->last_wr_start_finish + + bfqq->wr_cur_max_time)) { + bfqq->last_wr_start_finish = jiffies; + bfq_log_bfqq(bfqd, bfqq, + "wrais ending at %lu, rais_max_time %u", + bfqq->last_wr_start_finish, + jiffies_to_msecs(bfqq->wr_cur_max_time)); + bfq_bfqq_end_wr(bfqq); + } + } + /* Update weight both if it must be raised and if it must be lowered */ + if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1)) + __bfq_entity_update_weight_prio( + bfq_entity_service_tree(entity), + entity); +} + +/* + * Dispatch one request from bfqq, moving it to the request queue + * dispatch list. + */ +static int bfq_dispatch_request(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + int dispatched = 0; + struct request *rq; + unsigned long service_to_charge; + + BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); + + /* Follow expired path, else get first next available. */ + rq = bfq_check_fifo(bfqq); + if (rq == NULL) + rq = bfqq->next_rq; + service_to_charge = bfq_serv_to_charge(rq, bfqq); + + if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { + /* + * This may happen if the next rq is chosen in fifo order + * instead of sector order. The budget is properly + * dimensioned to be always sufficient to serve the next + * request only if it is chosen in sector order. The reason + * is that it would be quite inefficient and little useful + * to always make sure that the budget is large enough to + * serve even the possible next rq in fifo order. + * In fact, requests are seldom served in fifo order. + * + * Expire the queue for budget exhaustion, and make sure + * that the next act_budget is enough to serve the next + * request, even if it comes from the fifo expired path. + */ + bfqq->next_rq = rq; + /* + * Since this dispatch is failed, make sure that + * a new one will be performed + */ + if (!bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); + goto expire; + } + + /* Finally, insert request into driver dispatch list. */ + bfq_bfqq_served(bfqq, service_to_charge); + bfq_dispatch_insert(bfqd->queue, rq); + + bfq_update_wr_data(bfqd, bfqq); + + bfq_log_bfqq(bfqd, bfqq, + "dispatched %u sec req (%llu), budg left %lu", + blk_rq_sectors(rq), + (long long unsigned)blk_rq_pos(rq), + bfq_bfqq_budget_left(bfqq)); + + dispatched++; + + if (bfqd->in_service_cic == NULL) { + atomic_long_inc(&RQ_CIC(rq)->ioc->refcount); + bfqd->in_service_cic = RQ_CIC(rq); + } + + if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) && + dispatched >= bfqd->bfq_max_budget_async_rq) || + bfq_class_idle(bfqq))) + goto expire; + + return dispatched; + +expire: + bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED); + return dispatched; +} + +static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq) +{ + int dispatched = 0; + + while (bfqq->next_rq != NULL) { + bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq); + dispatched++; + } + + BUG_ON(!list_empty(&bfqq->fifo)); + return dispatched; +} + +/* + * Drain our current requests. + * Used for barriers and when switching io schedulers on-the-fly. + */ +static int bfq_forced_dispatch(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq, *n; + struct bfq_service_tree *st; + int dispatched = 0; + + bfqq = bfqd->in_service_queue; + if (bfqq != NULL) { + spin_lock(&bfqd->eqm_lock); + __bfq_bfqq_expire(bfqd, bfqq); + spin_unlock(&bfqd->eqm_lock); + } + + /* + * Loop through classes, and be careful to leave the scheduler + * in a consistent state, as feedback mechanisms and vtime + * updates cannot be disabled during the process. + */ + list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) { + st = bfq_entity_service_tree(&bfqq->entity); + + dispatched += __bfq_forced_dispatch_bfqq(bfqq); + bfqq->max_budget = bfq_max_budget(bfqd); + + bfq_forget_idle(st); + } + + BUG_ON(bfqd->busy_queues != 0); + + return dispatched; +} + +static int bfq_dispatch_requests(struct request_queue *q, int force) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq; + int max_dispatch; + + bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); + if (bfqd->busy_queues == 0) + return 0; + + if (unlikely(force)) + return bfq_forced_dispatch(bfqd); + + bfqq = bfq_select_queue(bfqd); + if (bfqq == NULL) + return 0; + + max_dispatch = bfqd->bfq_quantum; + if (bfq_class_idle(bfqq)) + max_dispatch = 1; + + if (!bfq_bfqq_sync(bfqq)) + max_dispatch = bfqd->bfq_max_budget_async_rq; + + if (bfqq->dispatched >= max_dispatch) { + if (bfqd->busy_queues > 1) + return 0; + if (bfqq->dispatched >= 4 * max_dispatch) + return 0; + } + + if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq)) + return 0; + + bfq_clear_bfqq_wait_request(bfqq); + BUG_ON(timer_pending(&bfqd->idle_slice_timer)); + + if (!bfq_dispatch_request(bfqd, bfqq)) + return 0; + + bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d (max_disp %d)", + bfqq->pid, max_dispatch); + + return 1; +} + +/* + * Task holds one reference to the queue, dropped when task exits. Each rq + * in-flight on this queue also holds a reference, dropped when rq is freed. + * + * Queue lock must be held here. + */ +static void bfq_put_queue(struct bfq_queue *bfqq) +{ + struct bfq_data *bfqd = bfqq->bfqd; + + BUG_ON(atomic_read(&bfqq->ref) <= 0); + + bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, + atomic_read(&bfqq->ref)); + if (!atomic_dec_and_test(&bfqq->ref)) + return; + + BUG_ON(rb_first(&bfqq->sort_list) != NULL); + BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); + BUG_ON(bfqq->entity.tree != NULL); + BUG_ON(bfq_bfqq_busy(bfqq)); + BUG_ON(bfqd->in_service_queue == bfqq); + + bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); + + kmem_cache_free(bfq_pool, bfqq); +} + +static void bfq_put_cooperator(struct bfq_queue *bfqq) +{ + struct bfq_queue *__bfqq, *next; + + /* + * If this queue was scheduled to merge with another queue, be + * sure to drop the reference taken on that queue (and others in + * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs. + */ + __bfqq = bfqq->new_bfqq; + while (__bfqq) { + if (__bfqq == bfqq) + break; + next = __bfqq->new_bfqq; + bfq_put_queue(__bfqq); + __bfqq = next; + } +} + +/* Coop lock is taken in __bfq_exit_single_io_context() */ +static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + if (bfqq == bfqd->in_service_queue) { + __bfq_bfqq_expire(bfqd, bfqq); + bfq_schedule_dispatch(bfqd); + } + + bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, + atomic_read(&bfqq->ref)); + + bfq_put_cooperator(bfqq); + + bfq_put_queue(bfqq); +} + +/* + * Update the entity prio values; note that the new values will not + * be used until the next (re)activation. + */ +static void bfq_init_prio_data(struct bfq_queue *bfqq, struct io_context *ioc) +{ + struct task_struct *tsk = current; + int ioprio_class; + + if (!bfq_bfqq_prio_changed(bfqq)) + return; + + ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); + switch (ioprio_class) { + default: + dev_err(bfqq->bfqd->queue->backing_dev_info.dev, + "bfq: bad prio %x\n", ioprio_class); + case IOPRIO_CLASS_NONE: + /* + * No prio set, inherit CPU scheduling settings. + */ + bfqq->entity.new_ioprio = task_nice_ioprio(tsk); + bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk); + break; + case IOPRIO_CLASS_RT: + bfqq->entity.new_ioprio = task_ioprio(ioc); + bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT; + break; + case IOPRIO_CLASS_BE: + bfqq->entity.new_ioprio = task_ioprio(ioc); + bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE; + break; + case IOPRIO_CLASS_IDLE: + bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE; + bfqq->entity.new_ioprio = 7; + bfq_clear_bfqq_idle_window(bfqq); + break; + } + + bfqq->entity.ioprio_changed = 1; + + bfq_clear_bfqq_prio_changed(bfqq); +} + +static void bfq_changed_ioprio(struct io_context *ioc, + struct cfq_io_context *cic) +{ + struct bfq_data *bfqd; + struct bfq_queue *bfqq, *new_bfqq; + struct bfq_group *bfqg; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&cic->key, &flags); + if (unlikely(bfqd == NULL)) + return; + + spin_lock(&bfqd->eqm_lock); + bfqq = cic->cfqq[BLK_RW_ASYNC]; + if (bfqq != NULL) { + bfqg = container_of(bfqq->entity.sched_data, struct bfq_group, + sched_data); + new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, cic->ioc, + GFP_ATOMIC); + if (new_bfqq != NULL) { + cic->cfqq[BLK_RW_ASYNC] = new_bfqq; + bfq_log_bfqq(bfqd, bfqq, + "changed_ioprio: bfqq %p %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + } + } + + bfqq = cic->cfqq[BLK_RW_SYNC]; + spin_unlock(&bfqd->eqm_lock); + if (bfqq != NULL) + bfq_mark_bfqq_prio_changed(bfqq); + + bfq_put_bfqd_unlock(bfqd, &flags); +} + +static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, + pid_t pid, int is_sync) +{ + RB_CLEAR_NODE(&bfqq->entity.rb_node); + INIT_LIST_HEAD(&bfqq->fifo); + + atomic_set(&bfqq->ref, 0); + bfqq->bfqd = bfqd; + + bfq_mark_bfqq_prio_changed(bfqq); + + if (is_sync) { + if (!bfq_class_idle(bfqq)) + bfq_mark_bfqq_idle_window(bfqq); + bfq_mark_bfqq_sync(bfqq); + } + bfq_mark_bfqq_IO_bound(bfqq); + + /* Tentative initial value to trade off between thr and lat */ + bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; + bfqq->pid = pid; + + bfqq->wr_coeff = 1; + bfqq->last_wr_start_finish = 0; + /* + * Set to the value for which bfqq will not be deemed as + * soft rt when it becomes backlogged. + */ + bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies); +} + +static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd, + struct bfq_group *bfqg, + int is_sync, + struct io_context *ioc, + gfp_t gfp_mask) +{ + struct bfq_queue *bfqq, *new_bfqq = NULL; + struct cfq_io_context *cic; + +retry: + cic = bfq_cic_lookup(bfqd, ioc); + /* cic always exists here */ + bfqq = cic_to_bfqq(cic, is_sync); + + /* + * Always try a new alloc if we fall back to the OOM bfqq + * originally, since it should just be a temporary situation. + */ + if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { + bfqq = NULL; + if (new_bfqq != NULL) { + bfqq = new_bfqq; + new_bfqq = NULL; + } else if (gfp_mask & __GFP_WAIT) { + spin_unlock(&bfqd->eqm_lock); + spin_unlock_irq(bfqd->queue->queue_lock); + new_bfqq = kmem_cache_alloc_node(bfq_pool, + gfp_mask | __GFP_ZERO, + bfqd->queue->node); + spin_lock_irq(bfqd->queue->queue_lock); + spin_lock(&bfqd->eqm_lock); + if (new_bfqq != NULL) + goto retry; + } else { + bfqq = kmem_cache_alloc_node(bfq_pool, + gfp_mask | __GFP_ZERO, + bfqd->queue->node); + } + + if (bfqq != NULL) { + bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync); + bfq_log_bfqq(bfqd, bfqq, "allocated"); + } else { + bfqq = &bfqd->oom_bfqq; + bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); + } + + bfq_init_prio_data(bfqq, ioc); + bfq_init_entity(&bfqq->entity, bfqg); + } + + if (new_bfqq != NULL) + kmem_cache_free(bfq_pool, new_bfqq); + + return bfqq; +} + +static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, + struct bfq_group *bfqg, + int ioprio_class, int ioprio) +{ + switch (ioprio_class) { + case IOPRIO_CLASS_RT: + return &bfqg->async_bfqq[0][ioprio]; + case IOPRIO_CLASS_BE: + return &bfqg->async_bfqq[1][ioprio]; + case IOPRIO_CLASS_IDLE: + return &bfqg->async_idle_bfqq; + default: + BUG(); + } +} + +static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, + struct bfq_group *bfqg, int is_sync, + struct io_context *ioc, gfp_t gfp_mask) +{ + const int ioprio = task_ioprio(ioc); + const int ioprio_class = task_ioprio_class(ioc); + struct bfq_queue **async_bfqq = NULL; + struct bfq_queue *bfqq = NULL; + + if (!is_sync) { + async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, + ioprio); + bfqq = *async_bfqq; + } + + if (bfqq == NULL) + bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, ioc, gfp_mask); + + /* + * Pin the queue now that it's allocated, scheduler exit will + * prune it. + */ + if (!is_sync && *async_bfqq == NULL) { + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", + bfqq, atomic_read(&bfqq->ref)); + *async_bfqq = bfqq; + } + + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, + atomic_read(&bfqq->ref)); + return bfqq; +} + +static void bfq_update_io_thinktime(struct bfq_data *bfqd, + struct cfq_io_context *cic) +{ + unsigned long elapsed = jiffies - cic->ttime.last_end_request; + unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); + + cic->ttime.ttime_samples = (7*cic->ttime.ttime_samples + 256) / 8; + cic->ttime.ttime_total = (7*cic->ttime.ttime_total + 256*ttime) / 8; + cic->ttime.ttime_mean = (cic->ttime.ttime_total + 128) / + cic->ttime.ttime_samples; +} + +static void bfq_update_io_seektime(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct request *rq) +{ + sector_t sdist; + u64 total; + + if (bfqq->last_request_pos < blk_rq_pos(rq)) + sdist = blk_rq_pos(rq) - bfqq->last_request_pos; + else + sdist = bfqq->last_request_pos - blk_rq_pos(rq); + + /* + * Don't allow the seek distance to get too large from the + * odd fragment, pagein, etc. + */ + if (bfqq->seek_samples == 0) /* first request, not really a seek */ + sdist = 0; + else if (bfqq->seek_samples <= 60) /* second & third seek */ + sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); + else + sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); + + bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; + bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; + total = bfqq->seek_total + (bfqq->seek_samples/2); + do_div(total, bfqq->seek_samples); + bfqq->seek_mean = (sector_t)total; + + bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, + (u64)bfqq->seek_mean); +} + +/* + * Disable idle window if the process thinks too long or seeks so much that + * it doesn't matter. + */ +static void bfq_update_idle_window(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct cfq_io_context *cic) +{ + int enable_idle; + + /* Don't idle for async or idle io prio class. */ + if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) + return; + + /* Idle window just restored, statistics are meaningless. */ + if (bfq_bfqq_just_split(bfqq)) + return; + + enable_idle = bfq_bfqq_idle_window(bfqq); + + if (atomic_read(&cic->ioc->nr_tasks) == 0 || + bfqd->bfq_slice_idle == 0 || + (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && + bfqq->wr_coeff == 1)) + enable_idle = 0; + else if (bfq_sample_valid(cic->ttime.ttime_samples)) { + if (cic->ttime.ttime_mean > bfqd->bfq_slice_idle && + bfqq->wr_coeff == 1) + enable_idle = 0; + else + enable_idle = 1; + } + bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", + enable_idle); + + if (enable_idle) + bfq_mark_bfqq_idle_window(bfqq); + else + bfq_clear_bfqq_idle_window(bfqq); +} + +/* + * Called when a new fs request (rq) is added to bfqq. Check if there's + * something we should do about it. + */ +static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, + struct request *rq) +{ + struct cfq_io_context *cic = RQ_CIC(rq); + + if (rq->cmd_flags & REQ_META) + bfqq->meta_pending++; + + bfq_update_io_thinktime(bfqd, cic); + bfq_update_io_seektime(bfqd, bfqq, rq); + if (!BFQQ_SEEKY(bfqq) && bfq_bfqq_constantly_seeky(bfqq)) { + bfq_clear_bfqq_constantly_seeky(bfqq); + if (!blk_queue_nonrot(bfqd->queue)) { + BUG_ON(!bfqd->const_seeky_busy_in_flight_queues); + bfqd->const_seeky_busy_in_flight_queues--; + } + } + if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || + !BFQQ_SEEKY(bfqq)) + bfq_update_idle_window(bfqd, bfqq, cic); + bfq_clear_bfqq_just_split(bfqq); + + bfq_log_bfqq(bfqd, bfqq, + "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", + bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), + (long long unsigned)bfqq->seek_mean); + + bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); + + if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) { + int small_req = bfqq->queued[rq_is_sync(rq)] == 1 && + blk_rq_sectors(rq) < 32; + int budget_timeout = bfq_bfqq_budget_timeout(bfqq); + + /* + * There is just this request queued: if the request + * is small and the queue is not to be expired, then + * just exit. + * + * In this way, if the disk is being idled to wait for + * a new request from the in-service queue, we avoid + * unplugging the device and committing the disk to serve + * just a small request. On the contrary, we wait for + * the block layer to decide when to unplug the device: + * hopefully, new requests will be merged to this one + * quickly, then the device will be unplugged and + * larger requests will be dispatched. + */ + if (small_req && !budget_timeout) + return; + + /* + * A large enough request arrived, or the queue is to + * be expired: in both cases disk idling is to be + * stopped, so clear wait_request flag and reset + * timer. + */ + bfq_clear_bfqq_wait_request(bfqq); + del_timer(&bfqd->idle_slice_timer); + + /* + * The queue is not empty, because a new request just + * arrived. Hence we can safely expire the queue, in + * case of budget timeout, without risking that the + * timestamps of the queue are not updated correctly. + * See [1] for more details. + */ + if (budget_timeout) + bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); + + /* + * Let the request rip immediately, or let a new queue be + * selected if bfqq has just been expired. + */ + __blk_run_queue(bfqd->queue); + } +} + +static void bfq_insert_request(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq; + + assert_spin_locked(bfqd->queue->queue_lock); + + /* + * An unplug may trigger a requeue of a request from the device + * driver: make sure we are in process context while trying to + * merge two bfq_queues. + */ + spin_lock(&bfqd->eqm_lock); + if (!in_interrupt()) { + new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); + if (new_bfqq != NULL) { + if (cic_to_bfqq(RQ_CIC(rq), 1) != bfqq) + new_bfqq = cic_to_bfqq(RQ_CIC(rq), 1); + /* + * Release the request's reference to the old bfqq + * and make sure one is taken to the shared queue. + */ + new_bfqq->allocated[rq_data_dir(rq)]++; + bfqq->allocated[rq_data_dir(rq)]--; + atomic_inc(&new_bfqq->ref); + bfq_put_queue(bfqq); + if (cic_to_bfqq(RQ_CIC(rq), 1) == bfqq) + bfq_merge_bfqqs(bfqd, RQ_CIC(rq), + bfqq, new_bfqq); + rq->elevator_private[1] = new_bfqq; + bfqq = new_bfqq; + } else + bfq_bfqq_increase_failed_cooperations(bfqq); + } + spin_unlock(&bfqd->eqm_lock); + + bfq_init_prio_data(bfqq, RQ_CIC(rq)->ioc); + + bfq_add_request(rq); + + /* + * Here a newly-created bfq_queue has already started a weight-raising + * period: clear wr_time_left to prevent bfq_bfqq_save_state() + * from assigning it a full weight-raising period. See the detailed + * comments about this field in bfq_init_icq(). + */ + if (bfqq->cic != NULL) + bfqq->cic->wr_time_left = 0; + rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]); + list_add_tail(&rq->queuelist, &bfqq->fifo); + + bfq_rq_enqueued(bfqd, bfqq, rq); +} + +static void bfq_update_hw_tag(struct bfq_data *bfqd) +{ + bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver, + bfqd->rq_in_driver); + + if (bfqd->hw_tag == 1) + return; + + /* + * This sample is valid if the number of outstanding requests + * is large enough to allow a queueing behavior. Note that the + * sum is not exact, as it's not taking into account deactivated + * requests. + */ + if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD) + return; + + if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES) + return; + + bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; + bfqd->max_rq_in_driver = 0; + bfqd->hw_tag_samples = 0; +} + +static void bfq_completed_request(struct request_queue *q, struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_data *bfqd = bfqq->bfqd; + bool sync = bfq_bfqq_sync(bfqq); + + bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)", + blk_rq_sectors(rq), sync); + + bfq_update_hw_tag(bfqd); + + BUG_ON(!bfqd->rq_in_driver); + BUG_ON(!bfqq->dispatched); + bfqd->rq_in_driver--; + bfqq->dispatched--; + + if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { + bfq_weights_tree_remove(bfqd, &bfqq->entity, + &bfqd->queue_weights_tree); + if (!blk_queue_nonrot(bfqd->queue)) { + BUG_ON(!bfqd->busy_in_flight_queues); + bfqd->busy_in_flight_queues--; + if (bfq_bfqq_constantly_seeky(bfqq)) { + BUG_ON(!bfqd-> + const_seeky_busy_in_flight_queues); + bfqd->const_seeky_busy_in_flight_queues--; + } + } + } + + if (sync) { + bfqd->sync_flight--; + RQ_CIC(rq)->ttime.last_end_request = jiffies; + } + + /* + * If we are waiting to discover whether the request pattern of the + * task associated with the queue is actually isochronous, and + * both requisites for this condition to hold are satisfied, then + * compute soft_rt_next_start (see the comments to the function + * bfq_bfqq_softrt_next_start()). + */ + if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 && + RB_EMPTY_ROOT(&bfqq->sort_list)) + bfqq->soft_rt_next_start = + bfq_bfqq_softrt_next_start(bfqd, bfqq); + + /* + * If this is the in-service queue, check if it needs to be expired, + * or if we want to idle in case it has no pending requests. + */ + if (bfqd->in_service_queue == bfqq) { + if (bfq_bfqq_budget_new(bfqq)) + bfq_set_budget_timeout(bfqd); + + if (bfq_bfqq_must_idle(bfqq)) { + bfq_arm_slice_timer(bfqd); + goto out; + } else if (bfq_may_expire_for_budg_timeout(bfqq)) + bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); + else if (RB_EMPTY_ROOT(&bfqq->sort_list) && + (bfqq->dispatched == 0 || + !bfq_bfqq_must_not_expire(bfqq))) + bfq_bfqq_expire(bfqd, bfqq, 0, + BFQ_BFQQ_NO_MORE_REQUESTS); + } + + if (!bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); + +out: + return; +} + +static inline int __bfq_may_queue(struct bfq_queue *bfqq) +{ + if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) { + bfq_clear_bfqq_must_alloc(bfqq); + return ELV_MQUEUE_MUST; + } + + return ELV_MQUEUE_MAY; +} + +static int bfq_may_queue(struct request_queue *q, int rw) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct task_struct *tsk = current; + struct cfq_io_context *cic; + struct bfq_queue *bfqq; + + /* + * Don't force setup of a queue from here, as a call to may_queue + * does not necessarily imply that a request actually will be + * queued. So just lookup a possibly existing queue, or return + * 'may queue' if that fails. + */ + cic = bfq_cic_lookup(bfqd, tsk->io_context); + if (cic == NULL) + return ELV_MQUEUE_MAY; + + spin_lock(&bfqd->eqm_lock); + bfqq = cic_to_bfqq(cic, rw_is_sync(rw)); + spin_unlock(&bfqd->eqm_lock); + if (bfqq != NULL) { + bfq_init_prio_data(bfqq, cic->ioc); + + return __bfq_may_queue(bfqq); + } + + return ELV_MQUEUE_MAY; +} + +/* + * Queue lock held here. + */ +static void bfq_put_request(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + if (bfqq != NULL) { + const int rw = rq_data_dir(rq); + + BUG_ON(!bfqq->allocated[rw]); + bfqq->allocated[rw]--; + + put_io_context(RQ_CIC(rq)->ioc); + + rq->elevator_private[0] = NULL; + rq->elevator_private[1] = NULL; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + } +} + +/* + * Returns NULL if a new bfqq should be allocated, or the old bfqq if this + * was the last process referring to said bfqq. + */ +static struct bfq_queue * +bfq_split_bfqq(struct cfq_io_context *cic, struct bfq_queue *bfqq) +{ + bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); + + put_io_context(cic->ioc); + + if (bfqq_process_refs(bfqq) == 1) { + bfqq->pid = current->pid; + bfq_clear_bfqq_coop(bfqq); + bfq_clear_bfqq_split_coop(bfqq); + return bfqq; + } + + cic_set_bfqq(cic, NULL, 1); + + bfq_put_cooperator(bfqq); + + bfq_put_queue(bfqq); + return NULL; +} + +/* + * Allocate bfq data structures associated with this request. + */ +static int bfq_set_request(struct request_queue *q, struct request *rq, + gfp_t gfp_mask) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct cfq_io_context *cic; + const int rw = rq_data_dir(rq); + const int is_sync = rq_is_sync(rq); + struct bfq_queue *bfqq; + struct bfq_group *bfqg; + unsigned long flags; + bool split = false; + + might_sleep_if(gfp_mask & __GFP_WAIT); + + cic = bfq_get_io_context(bfqd, gfp_mask); + + spin_lock_irqsave(q->queue_lock, flags); + + if (cic == NULL) + goto queue_fail; + + bfqg = bfq_cic_update_cgroup(cic); + + spin_lock(&bfqd->eqm_lock); + +new_queue: + bfqq = cic_to_bfqq(cic, is_sync); + if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { + bfqq = bfq_get_queue(bfqd, bfqg, is_sync, cic->ioc, gfp_mask); + cic_set_bfqq(cic, bfqq, is_sync); + } else { + /* If the queue was seeky for too long, break it apart. */ + if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { + bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); + bfqq = bfq_split_bfqq(cic, bfqq); + split = true; + if (!bfqq) + goto new_queue; + } + } + + bfqq->allocated[rw]++; + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, + atomic_read(&bfqq->ref)); + + rq->elevator_private[0] = cic; + rq->elevator_private[1] = bfqq; + + /* + * If a bfq_queue has only one process reference, it is owned + * by only one cfq_io_context: we can set the cic field of the + * bfq_queue to the address of that structure. Also, if the + * queue has just been split, mark a flag so that the + * information is available to the other scheduler hooks. + */ + if (bfqq_process_refs(bfqq) == 1) { + bfqq->cic = cic; + if (split) { + bfq_mark_bfqq_just_split(bfqq); + /* + * If the queue has just been split from a shared + * queue, restore the idle window and the possible + * weight raising period. + */ + bfq_bfqq_resume_state(bfqq, cic); + } + } + + spin_unlock(&bfqd->eqm_lock); + spin_unlock_irqrestore(q->queue_lock, flags); + + return 0; + +queue_fail: + if (cic != NULL) + put_io_context(cic->ioc); + + bfq_schedule_dispatch(bfqd); + spin_unlock_irqrestore(q->queue_lock, flags); + + return 1; +} + +static void bfq_kick_queue(struct work_struct *work) +{ + struct bfq_data *bfqd = + container_of(work, struct bfq_data, unplug_work); + struct request_queue *q = bfqd->queue; + + spin_lock_irq(q->queue_lock); + __blk_run_queue(q); + spin_unlock_irq(q->queue_lock); +} + +/* + * Handler of the expiration of the timer running if the in-service queue + * is idling inside its time slice. + */ +static void bfq_idle_slice_timer(unsigned long data) +{ + struct bfq_data *bfqd = (struct bfq_data *)data; + struct bfq_queue *bfqq; + unsigned long flags; + enum bfqq_expiration reason; + + spin_lock_irqsave(bfqd->queue->queue_lock, flags); + + bfqq = bfqd->in_service_queue; + /* + * Theoretical race here: the in-service queue can be NULL or + * different from the queue that was idling if the timer handler + * spins on the queue_lock and a new request arrives for the + * current queue and there is a full dispatch cycle that changes + * the in-service queue. This can hardly happen, but in the worst + * case we just expire a queue too early. + */ + if (bfqq != NULL) { + bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); + if (bfq_bfqq_budget_timeout(bfqq)) + /* + * Also here the queue can be safely expired + * for budget timeout without wasting + * guarantees + */ + reason = BFQ_BFQQ_BUDGET_TIMEOUT; + else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0) + /* + * The queue may not be empty upon timer expiration, + * because we may not disable the timer when the + * first request of the in-service queue arrives + * during disk idling. + */ + reason = BFQ_BFQQ_TOO_IDLE; + else + goto schedule_dispatch; + + bfq_bfqq_expire(bfqd, bfqq, 1, reason); + } + +schedule_dispatch: + bfq_schedule_dispatch(bfqd); + + spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); +} + +static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) +{ + del_timer_sync(&bfqd->idle_slice_timer); + cancel_work_sync(&bfqd->unplug_work); +} + +static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd, + struct bfq_queue **bfqq_ptr) +{ + struct bfq_group *root_group = bfqd->root_group; + struct bfq_queue *bfqq = *bfqq_ptr; + + bfq_log(bfqd, "put_async_bfqq: %p", bfqq); + if (bfqq != NULL) { + bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group); + bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + *bfqq_ptr = NULL; + } +} + +/* + * Release all the bfqg references to its async queues. If we are + * deallocating the group these queues may still contain requests, so + * we reparent them to the root cgroup (i.e., the only one that will + * exist for sure until all the requests on a device are gone). + */ +static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) +{ + int i, j; + + for (i = 0; i < 2; i++) + for (j = 0; j < IOPRIO_BE_NR; j++) + __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]); + + __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); +} + +static void bfq_exit_queue(struct elevator_queue *e) +{ + struct bfq_data *bfqd = e->elevator_data; + struct request_queue *q = bfqd->queue; + struct bfq_queue *bfqq, *n; + struct cfq_io_context *cic; + + bfq_shutdown_timer_wq(bfqd); + + spin_lock_irq(q->queue_lock); + + while (!list_empty(&bfqd->cic_list)) { + cic = list_entry(bfqd->cic_list.next, struct cfq_io_context, + queue_list); + __bfq_exit_single_io_context(bfqd, cic); + } + + BUG_ON(bfqd->in_service_queue != NULL); + list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) + bfq_deactivate_bfqq(bfqd, bfqq, 0); + + bfq_disconnect_groups(bfqd); + spin_unlock_irq(q->queue_lock); + + bfq_shutdown_timer_wq(bfqd); + + spin_lock(&cic_index_lock); + ida_remove(&cic_index_ida, bfqd->cic_index); + spin_unlock(&cic_index_lock); + + /* Wait for cic->key accessors to exit their grace periods. */ + synchronize_rcu(); + + BUG_ON(timer_pending(&bfqd->idle_slice_timer)); + + bfq_free_root_group(bfqd); + kfree(bfqd); +} + +static int bfq_alloc_cic_index(void) +{ + int index, error; + + do { + if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&cic_index_lock); + error = ida_get_new(&cic_index_ida, &index); + spin_unlock(&cic_index_lock); + if (error && error != -EAGAIN) + return error; + } while (error); + + return index; +} + +static void *bfq_init_queue(struct request_queue *q) +{ + struct bfq_group *bfqg; + struct bfq_data *bfqd; + int i; + + i = bfq_alloc_cic_index(); + if (i < 0) + return NULL; + + bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node); + if (bfqd == NULL) + return NULL; + + bfqd->cic_index = i; + + /* + * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. + * Grab a permanent reference to it, so that the normal code flow + * will not attempt to free it. + */ + bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0); + atomic_inc(&bfqd->oom_bfqq.ref); + + spin_lock_init(&bfqd->eqm_lock); + INIT_LIST_HEAD(&bfqd->cic_list); + + bfqd->queue = q; + + bfqg = bfq_alloc_root_group(bfqd, q->node); + if (bfqg == NULL) { + kfree(bfqd); + return NULL; + } + + bfqd->root_group = bfqg; +#ifdef CONFIG_CGROUP_BFQIO + bfqd->active_numerous_groups = 0; +#endif + + init_timer(&bfqd->idle_slice_timer); + bfqd->idle_slice_timer.function = bfq_idle_slice_timer; + bfqd->idle_slice_timer.data = (unsigned long)bfqd; + + bfqd->rq_pos_tree = RB_ROOT; + bfqd->queue_weights_tree = RB_ROOT; + bfqd->group_weights_tree = RB_ROOT; + + INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); + + INIT_LIST_HEAD(&bfqd->active_list); + INIT_LIST_HEAD(&bfqd->idle_list); + + bfqd->hw_tag = -1; + + bfqd->bfq_max_budget = bfq_default_max_budget; + + bfqd->bfq_quantum = bfq_quantum; + bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0]; + bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1]; + bfqd->bfq_back_max = bfq_back_max; + bfqd->bfq_back_penalty = bfq_back_penalty; + bfqd->bfq_slice_idle = bfq_slice_idle; + bfqd->bfq_class_idle_last_service = 0; + bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq; + bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; + bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; + + bfqd->bfq_coop_thresh = 2; + bfqd->bfq_failed_cooperations = 7000; + bfqd->bfq_requests_within_timer = 120; + + bfqd->low_latency = true; + + bfqd->bfq_wr_coeff = 20; + bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300); + bfqd->bfq_wr_max_time = 0; + bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000); + bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500); + bfqd->bfq_wr_max_softrt_rate = 7000; /* + * Approximate rate required + * to playback or record a + * high-definition compressed + * video. + */ + bfqd->wr_busy_queues = 0; + bfqd->busy_in_flight_queues = 0; + bfqd->const_seeky_busy_in_flight_queues = 0; + + /* + * Begin by assuming, optimistically, that the device peak rate is + * equal to the highest reference rate. + */ + bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] * + T_fast[blk_queue_nonrot(bfqd->queue)]; + bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)]; + bfqd->device_speed = BFQ_BFQD_FAST; + + return bfqd; +} + +static void bfq_slab_kill(void) +{ + if (bfq_pool != NULL) + kmem_cache_destroy(bfq_pool); + if (bfq_ioc_pool != NULL) + kmem_cache_destroy(bfq_ioc_pool); +} + +static int __init bfq_slab_setup(void) +{ + bfq_pool = KMEM_CACHE(bfq_queue, 0); + if (bfq_pool == NULL) + goto fail; + + bfq_ioc_pool = kmem_cache_create("bfq_io_context", + sizeof(struct cfq_io_context), + __alignof__(struct cfq_io_context), + 0, NULL); + if (bfq_ioc_pool == NULL) + goto fail; + + return 0; +fail: + bfq_slab_kill(); + return -ENOMEM; +} + +static ssize_t bfq_var_show(unsigned int var, char *page) +{ + return sprintf(page, "%d\n", var); +} + +static ssize_t bfq_var_store(unsigned long *var, const char *page, + size_t count) +{ + unsigned long new_val; + int ret = kstrtoul(page, 10, &new_val); + + if (ret == 0) + *var = new_val; + + return count; +} + +static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page) +{ + struct bfq_data *bfqd = e->elevator_data; + return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ? + jiffies_to_msecs(bfqd->bfq_wr_max_time) : + jiffies_to_msecs(bfq_wr_duration(bfqd))); +} + +static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) +{ + struct bfq_queue *bfqq; + struct bfq_data *bfqd = e->elevator_data; + ssize_t num_char = 0; + + spin_lock_irq(bfqd->queue->queue_lock); + + num_char += sprintf(page + num_char, "Active:\n"); + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { + num_char += sprintf(page + num_char, + "pid%d: weight %hu, dur %d/%u\n", + bfqq->pid, + bfqq->entity.weight, + jiffies_to_msecs(jiffies - + bfqq->last_wr_start_finish), + jiffies_to_msecs(bfqq->wr_cur_max_time)); + } + num_char += sprintf(page + num_char, "Idle:\n"); + list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { + num_char += sprintf(page + num_char, + "pid%d: weight %hu, dur %d/%u\n", + bfqq->pid, + bfqq->entity.weight, + jiffies_to_msecs(jiffies - + bfqq->last_wr_start_finish), + jiffies_to_msecs(bfqq->wr_cur_max_time)); + } + + spin_unlock_irq(bfqd->queue->queue_lock); + + return num_char; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ + struct bfq_data *bfqd = e->elevator_data; \ + unsigned int __data = __VAR; \ + if (__CONV) \ + __data = jiffies_to_msecs(__data); \ + return bfq_var_show(__data, (page)); \ +} +SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0); +SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); +SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); +SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); +SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); +SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); +SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); +SHOW_FUNCTION(bfq_max_budget_async_rq_show, + bfqd->bfq_max_budget_async_rq, 0); +SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1); +SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1); +SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); +SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0); +SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1); +SHOW_FUNCTION(bfq_wr_min_idle_time_show, bfqd->bfq_wr_min_idle_time, + 1); +SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async, + 1); +SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t \ +__FUNC(struct elevator_queue *e, const char *page, size_t count) \ +{ \ + struct bfq_data *bfqd = e->elevator_data; \ + unsigned long uninitialized_var(__data); \ + int ret = bfq_var_store(&__data, (page), count); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + if (__CONV) \ + *(__PTR) = msecs_to_jiffies(__data); \ + else \ + *(__PTR) = __data; \ + return ret; \ +} +STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0); +STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, + INT_MAX, 1); +STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, + INT_MAX, 1); +STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); +STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, + INT_MAX, 0); +STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, + 1, INT_MAX, 0); +STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, + INT_MAX, 1); +STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0); +STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, + INT_MAX, 1); +STORE_FUNCTION(bfq_wr_min_idle_time_store, + &bfqd->bfq_wr_min_idle_time, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_wr_min_inter_arr_async_store, + &bfqd->bfq_wr_min_inter_arr_async, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_wr_max_softrt_rate_store, + &bfqd->bfq_wr_max_softrt_rate, 0, INT_MAX, 0); +#undef STORE_FUNCTION + +/* do nothing for the moment */ +static ssize_t bfq_weights_store(struct elevator_queue *e, + const char *page, size_t count) +{ + return count; +} + +static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) +{ + u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); + + if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) + return bfq_calc_max_budget(bfqd->peak_rate, timeout); + else + return bfq_default_max_budget; +} + +static ssize_t bfq_max_budget_store(struct elevator_queue *e, + const char *page, size_t count) +{ + struct bfq_data *bfqd = e->elevator_data; + unsigned long uninitialized_var(__data); + int ret = bfq_var_store(&__data, (page), count); + + if (__data == 0) + bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); + else { + if (__data > INT_MAX) + __data = INT_MAX; + bfqd->bfq_max_budget = __data; + } + + bfqd->bfq_user_max_budget = __data; + + return ret; +} + +static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, + const char *page, size_t count) +{ + struct bfq_data *bfqd = e->elevator_data; + unsigned long uninitialized_var(__data); + int ret = bfq_var_store(&__data, (page), count); + + if (__data < 1) + __data = 1; + else if (__data > INT_MAX) + __data = INT_MAX; + + bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); + if (bfqd->bfq_user_max_budget == 0) + bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); + + return ret; +} + +static ssize_t bfq_low_latency_store(struct elevator_queue *e, + const char *page, size_t count) +{ + struct bfq_data *bfqd = e->elevator_data; + unsigned long uninitialized_var(__data); + int ret = bfq_var_store(&__data, (page), count); + + if (__data > 1) + __data = 1; + if (__data == 0 && bfqd->low_latency != 0) + bfq_end_wr(bfqd); + bfqd->low_latency = __data; + + return ret; +} + +#define BFQ_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store) + +static struct elv_fs_entry bfq_attrs[] = { + BFQ_ATTR(quantum), + BFQ_ATTR(fifo_expire_sync), + BFQ_ATTR(fifo_expire_async), + BFQ_ATTR(back_seek_max), + BFQ_ATTR(back_seek_penalty), + BFQ_ATTR(slice_idle), + BFQ_ATTR(max_budget), + BFQ_ATTR(max_budget_async_rq), + BFQ_ATTR(timeout_sync), + BFQ_ATTR(timeout_async), + BFQ_ATTR(low_latency), + BFQ_ATTR(wr_coeff), + BFQ_ATTR(wr_max_time), + BFQ_ATTR(wr_rt_max_time), + BFQ_ATTR(wr_min_idle_time), + BFQ_ATTR(wr_min_inter_arr_async), + BFQ_ATTR(wr_max_softrt_rate), + BFQ_ATTR(weights), + __ATTR_NULL +}; + +static struct elevator_type iosched_bfq = { + .ops = { + .elevator_merge_fn = bfq_merge, + .elevator_merged_fn = bfq_merged_request, + .elevator_merge_req_fn = bfq_merged_requests, + .elevator_allow_merge_fn = bfq_allow_merge, + .elevator_dispatch_fn = bfq_dispatch_requests, + .elevator_add_req_fn = bfq_insert_request, + .elevator_activate_req_fn = bfq_activate_request, + .elevator_deactivate_req_fn = bfq_deactivate_request, + .elevator_completed_req_fn = bfq_completed_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, + .elevator_set_req_fn = bfq_set_request, + .elevator_put_req_fn = bfq_put_request, + .elevator_may_queue_fn = bfq_may_queue, + .elevator_init_fn = bfq_init_queue, + .elevator_exit_fn = bfq_exit_queue, + .trim = bfq_free_io_context, + }, + .elevator_attrs = bfq_attrs, + .elevator_name = "bfq", + .elevator_owner = THIS_MODULE, +}; + +static int __init bfq_init(void) +{ + /* + * Can be 0 on HZ < 1000 setups. + */ + if (bfq_slice_idle == 0) + bfq_slice_idle = 1; + + if (bfq_timeout_async == 0) + bfq_timeout_async = 1; + + if (bfq_slab_setup()) + return -ENOMEM; + + /* + * Times to load large popular applications for the typical systems + * installed on the reference devices (see the comments before the + * definitions of the two arrays). + */ + T_slow[0] = msecs_to_jiffies(2600); + T_slow[1] = msecs_to_jiffies(1000); + T_fast[0] = msecs_to_jiffies(5500); + T_fast[1] = msecs_to_jiffies(2000); + + /* + * Thresholds that determine the switch between speed classes (see + * the comments before the definition of the array). + */ + device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2; + device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2; + + elv_register(&iosched_bfq); + pr_info("BFQ I/O-scheduler version: v7r5"); + + return 0; +} + +static void __exit bfq_exit(void) +{ + DECLARE_COMPLETION_ONSTACK(all_gone); + elv_unregister(&iosched_bfq); + bfq_ioc_gone = &all_gone; + /* bfq_ioc_gone's update must be visible before reading bfq_ioc_count */ + smp_wmb(); + if (elv_ioc_count_read(bfq_ioc_count) != 0) + wait_for_completion(&all_gone); + ida_destroy(&cic_index_ida); + bfq_slab_kill(); +} + +module_init(bfq_init); +module_exit(bfq_exit); + +MODULE_AUTHOR("Fabio Checconi, Paolo Valente"); +MODULE_LICENSE("GPL"); diff --git a/block/bfq-sched.c b/block/bfq-sched.c new file mode 100644 index 00000000000..7926dce517c --- /dev/null +++ b/block/bfq-sched.c @@ -0,0 +1,1177 @@ +/* + * BFQ: Hierarchical B-WF2Q+ scheduler. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + */ + +#ifdef CONFIG_CGROUP_BFQIO +#define for_each_entity(entity) \ + for (; entity != NULL; entity = entity->parent) + +#define for_each_entity_safe(entity, parent) \ + for (; entity && ({ parent = entity->parent; 1; }); entity = parent) + +static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, + int extract, + struct bfq_data *bfqd); + +static inline void bfq_update_budget(struct bfq_entity *next_in_service) +{ + struct bfq_entity *bfqg_entity; + struct bfq_group *bfqg; + struct bfq_sched_data *group_sd; + + BUG_ON(next_in_service == NULL); + + group_sd = next_in_service->sched_data; + + bfqg = container_of(group_sd, struct bfq_group, sched_data); + /* + * bfq_group's my_entity field is not NULL only if the group + * is not the root group. We must not touch the root entity + * as it must never become an in-service entity. + */ + bfqg_entity = bfqg->my_entity; + if (bfqg_entity != NULL) + bfqg_entity->budget = next_in_service->budget; +} + +static int bfq_update_next_in_service(struct bfq_sched_data *sd) +{ + struct bfq_entity *next_in_service; + + if (sd->in_service_entity != NULL) + /* will update/requeue at the end of service */ + return 0; + + /* + * NOTE: this can be improved in many ways, such as returning + * 1 (and thus propagating upwards the update) only when the + * budget changes, or caching the bfqq that will be scheduled + * next from this subtree. By now we worry more about + * correctness than about performance... + */ + next_in_service = bfq_lookup_next_entity(sd, 0, NULL); + sd->next_in_service = next_in_service; + + if (next_in_service != NULL) + bfq_update_budget(next_in_service); + + return 1; +} + +static inline void bfq_check_next_in_service(struct bfq_sched_data *sd, + struct bfq_entity *entity) +{ + BUG_ON(sd->next_in_service != entity); +} +#else +#define for_each_entity(entity) \ + for (; entity != NULL; entity = NULL) + +#define for_each_entity_safe(entity, parent) \ + for (parent = NULL; entity != NULL; entity = parent) + +static inline int bfq_update_next_in_service(struct bfq_sched_data *sd) +{ + return 0; +} + +static inline void bfq_check_next_in_service(struct bfq_sched_data *sd, + struct bfq_entity *entity) +{ +} + +static inline void bfq_update_budget(struct bfq_entity *next_in_service) +{ +} +#endif + +/* + * Shift for timestamp calculations. This actually limits the maximum + * service allowed in one timestamp delta (small shift values increase it), + * the maximum total weight that can be used for the queues in the system + * (big shift values increase it), and the period of virtual time + * wraparounds. + */ +#define WFQ_SERVICE_SHIFT 22 + +/** + * bfq_gt - compare two timestamps. + * @a: first ts. + * @b: second ts. + * + * Return @a > @b, dealing with wrapping correctly. + */ +static inline int bfq_gt(u64 a, u64 b) +{ + return (s64)(a - b) > 0; +} + +static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = NULL; + + BUG_ON(entity == NULL); + + if (entity->my_sched_data == NULL) + bfqq = container_of(entity, struct bfq_queue, entity); + + return bfqq; +} + + +/** + * bfq_delta - map service into the virtual time domain. + * @service: amount of service. + * @weight: scale factor (weight of an entity or weight sum). + */ +static inline u64 bfq_delta(unsigned long service, + unsigned long weight) +{ + u64 d = (u64)service << WFQ_SERVICE_SHIFT; + + do_div(d, weight); + return d; +} + +/** + * bfq_calc_finish - assign the finish time to an entity. + * @entity: the entity to act upon. + * @service: the service to be charged to the entity. + */ +static inline void bfq_calc_finish(struct bfq_entity *entity, + unsigned long service) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(entity->weight == 0); + + entity->finish = entity->start + + bfq_delta(service, entity->weight); + + if (bfqq != NULL) { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "calc_finish: serv %lu, w %d", + service, entity->weight); + bfq_log_bfqq(bfqq->bfqd, bfqq, + "calc_finish: start %llu, finish %llu, delta %llu", + entity->start, entity->finish, + bfq_delta(service, entity->weight)); + } +} + +/** + * bfq_entity_of - get an entity from a node. + * @node: the node field of the entity. + * + * Convert a node pointer to the relative entity. This is used only + * to simplify the logic of some functions and not as the generic + * conversion mechanism because, e.g., in the tree walking functions, + * the check for a %NULL value would be redundant. + */ +static inline struct bfq_entity *bfq_entity_of(struct rb_node *node) +{ + struct bfq_entity *entity = NULL; + + if (node != NULL) + entity = rb_entry(node, struct bfq_entity, rb_node); + + return entity; +} + +/** + * bfq_extract - remove an entity from a tree. + * @root: the tree root. + * @entity: the entity to remove. + */ +static inline void bfq_extract(struct rb_root *root, + struct bfq_entity *entity) +{ + BUG_ON(entity->tree != root); + + entity->tree = NULL; + rb_erase(&entity->rb_node, root); +} + +/** + * bfq_idle_extract - extract an entity from the idle tree. + * @st: the service tree of the owning @entity. + * @entity: the entity being removed. + */ +static void bfq_idle_extract(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct rb_node *next; + + BUG_ON(entity->tree != &st->idle); + + if (entity == st->first_idle) { + next = rb_next(&entity->rb_node); + st->first_idle = bfq_entity_of(next); + } + + if (entity == st->last_idle) { + next = rb_prev(&entity->rb_node); + st->last_idle = bfq_entity_of(next); + } + + bfq_extract(&st->idle, entity); + + if (bfqq != NULL) + list_del(&bfqq->bfqq_list); +} + +/** + * bfq_insert - generic tree insertion. + * @root: tree root. + * @entity: entity to insert. + * + * This is used for the idle and the active tree, since they are both + * ordered by finish time. + */ +static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) +{ + struct bfq_entity *entry; + struct rb_node **node = &root->rb_node; + struct rb_node *parent = NULL; + + BUG_ON(entity->tree != NULL); + + while (*node != NULL) { + parent = *node; + entry = rb_entry(parent, struct bfq_entity, rb_node); + + if (bfq_gt(entry->finish, entity->finish)) + node = &parent->rb_left; + else + node = &parent->rb_right; + } + + rb_link_node(&entity->rb_node, parent, node); + rb_insert_color(&entity->rb_node, root); + + entity->tree = root; +} + +/** + * bfq_update_min - update the min_start field of a entity. + * @entity: the entity to update. + * @node: one of its children. + * + * This function is called when @entity may store an invalid value for + * min_start due to updates to the active tree. The function assumes + * that the subtree rooted at @node (which may be its left or its right + * child) has a valid min_start value. + */ +static inline void bfq_update_min(struct bfq_entity *entity, + struct rb_node *node) +{ + struct bfq_entity *child; + + if (node != NULL) { + child = rb_entry(node, struct bfq_entity, rb_node); + if (bfq_gt(entity->min_start, child->min_start)) + entity->min_start = child->min_start; + } +} + +/** + * bfq_update_active_node - recalculate min_start. + * @node: the node to update. + * + * @node may have changed position or one of its children may have moved, + * this function updates its min_start value. The left and right subtrees + * are assumed to hold a correct min_start value. + */ +static inline void bfq_update_active_node(struct rb_node *node) +{ + struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); + + entity->min_start = entity->start; + bfq_update_min(entity, node->rb_right); + bfq_update_min(entity, node->rb_left); +} + +/** + * bfq_update_active_tree - update min_start for the whole active tree. + * @node: the starting node. + * + * @node must be the deepest modified node after an update. This function + * updates its min_start using the values held by its children, assuming + * that they did not change, and then updates all the nodes that may have + * changed in the path to the root. The only nodes that may have changed + * are the ones in the path or their siblings. + */ +static void bfq_update_active_tree(struct rb_node *node) +{ + struct rb_node *parent; + +up: + bfq_update_active_node(node); + + parent = rb_parent(node); + if (parent == NULL) + return; + + if (node == parent->rb_left && parent->rb_right != NULL) + bfq_update_active_node(parent->rb_right); + else if (parent->rb_left != NULL) + bfq_update_active_node(parent->rb_left); + + node = parent; + goto up; +} + +static void bfq_weights_tree_add(struct bfq_data *bfqd, + struct bfq_entity *entity, + struct rb_root *root); + +static void bfq_weights_tree_remove(struct bfq_data *bfqd, + struct bfq_entity *entity, + struct rb_root *root); + + +/** + * bfq_active_insert - insert an entity in the active tree of its + * group/device. + * @st: the service tree of the entity. + * @entity: the entity being inserted. + * + * The active tree is ordered by finish time, but an extra key is kept + * per each node, containing the minimum value for the start times of + * its children (and the node itself), so it's possible to search for + * the eligible node with the lowest finish time in logarithmic time. + */ +static void bfq_active_insert(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct rb_node *node = &entity->rb_node; +#ifdef CONFIG_CGROUP_BFQIO + struct bfq_sched_data *sd = NULL; + struct bfq_group *bfqg = NULL; + struct bfq_data *bfqd = NULL; +#endif + + bfq_insert(&st->active, entity); + + if (node->rb_left != NULL) + node = node->rb_left; + else if (node->rb_right != NULL) + node = node->rb_right; + + bfq_update_active_tree(node); + +#ifdef CONFIG_CGROUP_BFQIO + sd = entity->sched_data; + bfqg = container_of(sd, struct bfq_group, sched_data); + BUG_ON(!bfqg); + bfqd = (struct bfq_data *)bfqg->bfqd; +#endif + if (bfqq != NULL) + list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); +#ifdef CONFIG_CGROUP_BFQIO + else { /* bfq_group */ + BUG_ON(!bfqd); + bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree); + } + if (bfqg != bfqd->root_group) { + BUG_ON(!bfqg); + BUG_ON(!bfqd); + bfqg->active_entities++; + if (bfqg->active_entities == 2) + bfqd->active_numerous_groups++; + } +#endif +} + +/** + * bfq_ioprio_to_weight - calc a weight from an ioprio. + * @ioprio: the ioprio value to convert. + */ +static inline unsigned short bfq_ioprio_to_weight(int ioprio) +{ + BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); + return IOPRIO_BE_NR - ioprio; +} + +/** + * bfq_weight_to_ioprio - calc an ioprio from a weight. + * @weight: the weight value to convert. + * + * To preserve as mush as possible the old only-ioprio user interface, + * 0 is used as an escape ioprio value for weights (numerically) equal or + * larger than IOPRIO_BE_NR + */ +static inline unsigned short bfq_weight_to_ioprio(int weight) +{ + BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT); + return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight; +} + +static inline void bfq_get_entity(struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + if (bfqq != NULL) { + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", + bfqq, atomic_read(&bfqq->ref)); + } +} + +/** + * bfq_find_deepest - find the deepest node that an extraction can modify. + * @node: the node being removed. + * + * Do the first step of an extraction in an rb tree, looking for the + * node that will replace @node, and returning the deepest node that + * the following modifications to the tree can touch. If @node is the + * last node in the tree return %NULL. + */ +static struct rb_node *bfq_find_deepest(struct rb_node *node) +{ + struct rb_node *deepest; + + if (node->rb_right == NULL && node->rb_left == NULL) + deepest = rb_parent(node); + else if (node->rb_right == NULL) + deepest = node->rb_left; + else if (node->rb_left == NULL) + deepest = node->rb_right; + else { + deepest = rb_next(node); + if (deepest->rb_right != NULL) + deepest = deepest->rb_right; + else if (rb_parent(deepest) != node) + deepest = rb_parent(deepest); + } + + return deepest; +} + +/** + * bfq_active_extract - remove an entity from the active tree. + * @st: the service_tree containing the tree. + * @entity: the entity being removed. + */ +static void bfq_active_extract(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct rb_node *node; +#ifdef CONFIG_CGROUP_BFQIO + struct bfq_sched_data *sd = NULL; + struct bfq_group *bfqg = NULL; + struct bfq_data *bfqd = NULL; +#endif + + node = bfq_find_deepest(&entity->rb_node); + bfq_extract(&st->active, entity); + + if (node != NULL) + bfq_update_active_tree(node); + +#ifdef CONFIG_CGROUP_BFQIO + sd = entity->sched_data; + bfqg = container_of(sd, struct bfq_group, sched_data); + BUG_ON(!bfqg); + bfqd = (struct bfq_data *)bfqg->bfqd; +#endif + if (bfqq != NULL) + list_del(&bfqq->bfqq_list); +#ifdef CONFIG_CGROUP_BFQIO + else { /* bfq_group */ + BUG_ON(!bfqd); + bfq_weights_tree_remove(bfqd, entity, + &bfqd->group_weights_tree); + } + if (bfqg != bfqd->root_group) { + BUG_ON(!bfqg); + BUG_ON(!bfqd); + BUG_ON(!bfqg->active_entities); + bfqg->active_entities--; + if (bfqg->active_entities == 1) { + BUG_ON(!bfqd->active_numerous_groups); + bfqd->active_numerous_groups--; + } + } +#endif +} + +/** + * bfq_idle_insert - insert an entity into the idle tree. + * @st: the service tree containing the tree. + * @entity: the entity to insert. + */ +static void bfq_idle_insert(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct bfq_entity *first_idle = st->first_idle; + struct bfq_entity *last_idle = st->last_idle; + + if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish)) + st->first_idle = entity; + if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish)) + st->last_idle = entity; + + bfq_insert(&st->idle, entity); + + if (bfqq != NULL) + list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list); +} + +/** + * bfq_forget_entity - remove an entity from the wfq trees. + * @st: the service tree. + * @entity: the entity being removed. + * + * Update the device status and forget everything about @entity, putting + * the device reference to it, if it is a queue. Entities belonging to + * groups are not refcounted. + */ +static void bfq_forget_entity(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(!entity->on_st); + + entity->on_st = 0; + st->wsum -= entity->weight; + if (bfqq != NULL) { + bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + } +} + +/** + * bfq_put_idle_entity - release the idle tree ref of an entity. + * @st: service tree for the entity. + * @entity: the entity being released. + */ +static void bfq_put_idle_entity(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + bfq_idle_extract(st, entity); + bfq_forget_entity(st, entity); +} + +/** + * bfq_forget_idle - update the idle tree if necessary. + * @st: the service tree to act upon. + * + * To preserve the global O(log N) complexity we only remove one entry here; + * as the idle tree will not grow indefinitely this can be done safely. + */ +static void bfq_forget_idle(struct bfq_service_tree *st) +{ + struct bfq_entity *first_idle = st->first_idle; + struct bfq_entity *last_idle = st->last_idle; + + if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL && + !bfq_gt(last_idle->finish, st->vtime)) { + /* + * Forget the whole idle tree, increasing the vtime past + * the last finish time of idle entities. + */ + st->vtime = last_idle->finish; + } + + if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime)) + bfq_put_idle_entity(st, first_idle); +} + +static struct bfq_service_tree * +__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, + struct bfq_entity *entity) +{ + struct bfq_service_tree *new_st = old_st; + + if (entity->ioprio_changed) { + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + unsigned short prev_weight, new_weight; + struct bfq_data *bfqd = NULL; + struct rb_root *root; +#ifdef CONFIG_CGROUP_BFQIO + struct bfq_sched_data *sd; + struct bfq_group *bfqg; +#endif + + if (bfqq != NULL) + bfqd = bfqq->bfqd; +#ifdef CONFIG_CGROUP_BFQIO + else { + sd = entity->my_sched_data; + bfqg = container_of(sd, struct bfq_group, sched_data); + BUG_ON(!bfqg); + bfqd = (struct bfq_data *)bfqg->bfqd; + BUG_ON(!bfqd); + } +#endif + + BUG_ON(old_st->wsum < entity->weight); + old_st->wsum -= entity->weight; + + if (entity->new_weight != entity->orig_weight) { + entity->orig_weight = entity->new_weight; + entity->ioprio = + bfq_weight_to_ioprio(entity->orig_weight); + } else if (entity->new_ioprio != entity->ioprio) { + entity->ioprio = entity->new_ioprio; + entity->orig_weight = + bfq_ioprio_to_weight(entity->ioprio); + } else + entity->new_weight = entity->orig_weight = + bfq_ioprio_to_weight(entity->ioprio); + + entity->ioprio_class = entity->new_ioprio_class; + entity->ioprio_changed = 0; + + /* + * NOTE: here we may be changing the weight too early, + * this will cause unfairness. The correct approach + * would have required additional complexity to defer + * weight changes to the proper time instants (i.e., + * when entity->finish <= old_st->vtime). + */ + new_st = bfq_entity_service_tree(entity); + + prev_weight = entity->weight; + new_weight = entity->orig_weight * + (bfqq != NULL ? bfqq->wr_coeff : 1); + /* + * If the weight of the entity changes, remove the entity + * from its old weight counter (if there is a counter + * associated with the entity), and add it to the counter + * associated with its new weight. + */ + if (prev_weight != new_weight) { + root = bfqq ? &bfqd->queue_weights_tree : + &bfqd->group_weights_tree; + bfq_weights_tree_remove(bfqd, entity, root); + } + entity->weight = new_weight; + /* + * Add the entity to its weights tree only if it is + * not associated with a weight-raised queue. + */ + if (prev_weight != new_weight && + (bfqq ? bfqq->wr_coeff == 1 : 1)) + /* If we get here, root has been initialized. */ + bfq_weights_tree_add(bfqd, entity, root); + + new_st->wsum += entity->weight; + + if (new_st != old_st) + entity->start = new_st->vtime; + } + + return new_st; +} + +/** + * bfq_bfqq_served - update the scheduler status after selection for + * service. + * @bfqq: the queue being served. + * @served: bytes to transfer. + * + * NOTE: this can be optimized, as the timestamps of upper level entities + * are synchronized every time a new bfqq is selected for service. By now, + * we keep it to better check consistency. + */ +static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served) +{ + struct bfq_entity *entity = &bfqq->entity; + struct bfq_service_tree *st; + + for_each_entity(entity) { + st = bfq_entity_service_tree(entity); + + entity->service += served; + BUG_ON(entity->service > entity->budget); + BUG_ON(st->wsum == 0); + + st->vtime += bfq_delta(served, st->wsum); + bfq_forget_idle(st); + } + bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served); +} + +/** + * bfq_bfqq_charge_full_budget - set the service to the entity budget. + * @bfqq: the queue that needs a service update. + * + * When it's not possible to be fair in the service domain, because + * a queue is not consuming its budget fast enough (the meaning of + * fast depends on the timeout parameter), we charge it a full + * budget. In this way we should obtain a sort of time-domain + * fairness among all the seeky/slow queues. + */ +static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); + + bfq_bfqq_served(bfqq, entity->budget - entity->service); +} + +/** + * __bfq_activate_entity - activate an entity. + * @entity: the entity being activated. + * + * Called whenever an entity is activated, i.e., it is not active and one + * of its children receives a new request, or has to be reactivated due to + * budget exhaustion. It uses the current budget of the entity (and the + * service received if @entity is active) of the queue to calculate its + * timestamps. + */ +static void __bfq_activate_entity(struct bfq_entity *entity) +{ + struct bfq_sched_data *sd = entity->sched_data; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + + if (entity == sd->in_service_entity) { + BUG_ON(entity->tree != NULL); + /* + * If we are requeueing the current entity we have + * to take care of not charging to it service it has + * not received. + */ + bfq_calc_finish(entity, entity->service); + entity->start = entity->finish; + sd->in_service_entity = NULL; + } else if (entity->tree == &st->active) { + /* + * Requeueing an entity due to a change of some + * next_in_service entity below it. We reuse the + * old start time. + */ + bfq_active_extract(st, entity); + } else if (entity->tree == &st->idle) { + /* + * Must be on the idle tree, bfq_idle_extract() will + * check for that. + */ + bfq_idle_extract(st, entity); + entity->start = bfq_gt(st->vtime, entity->finish) ? + st->vtime : entity->finish; + } else { + /* + * The finish time of the entity may be invalid, and + * it is in the past for sure, otherwise the queue + * would have been on the idle tree. + */ + entity->start = st->vtime; + st->wsum += entity->weight; + bfq_get_entity(entity); + + BUG_ON(entity->on_st); + entity->on_st = 1; + } + + st = __bfq_entity_update_weight_prio(st, entity); + bfq_calc_finish(entity, entity->budget); + bfq_active_insert(st, entity); +} + +/** + * bfq_activate_entity - activate an entity and its ancestors if necessary. + * @entity: the entity to activate. + * + * Activate @entity and all the entities on the path from it to the root. + */ +static void bfq_activate_entity(struct bfq_entity *entity) +{ + struct bfq_sched_data *sd; + + for_each_entity(entity) { + __bfq_activate_entity(entity); + + sd = entity->sched_data; + if (!bfq_update_next_in_service(sd)) + /* + * No need to propagate the activation to the + * upper entities, as they will be updated when + * the in-service entity is rescheduled. + */ + break; + } +} + +/** + * __bfq_deactivate_entity - deactivate an entity from its service tree. + * @entity: the entity to deactivate. + * @requeue: if false, the entity will not be put into the idle tree. + * + * Deactivate an entity, independently from its previous state. If the + * entity was not on a service tree just return, otherwise if it is on + * any scheduler tree, extract it from that tree, and if necessary + * and if the caller did not specify @requeue, put it on the idle tree. + * + * Return %1 if the caller should update the entity hierarchy, i.e., + * if the entity was in service or if it was the next_in_service for + * its sched_data; return %0 otherwise. + */ +static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) +{ + struct bfq_sched_data *sd = entity->sched_data; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + int was_in_service = entity == sd->in_service_entity; + int ret = 0; + + if (!entity->on_st) + return 0; + + BUG_ON(was_in_service && entity->tree != NULL); + + if (was_in_service) { + bfq_calc_finish(entity, entity->service); + sd->in_service_entity = NULL; + } else if (entity->tree == &st->active) + bfq_active_extract(st, entity); + else if (entity->tree == &st->idle) + bfq_idle_extract(st, entity); + else if (entity->tree != NULL) + BUG(); + + if (was_in_service || sd->next_in_service == entity) + ret = bfq_update_next_in_service(sd); + + if (!requeue || !bfq_gt(entity->finish, st->vtime)) + bfq_forget_entity(st, entity); + else + bfq_idle_insert(st, entity); + + BUG_ON(sd->in_service_entity == entity); + BUG_ON(sd->next_in_service == entity); + + return ret; +} + +/** + * bfq_deactivate_entity - deactivate an entity. + * @entity: the entity to deactivate. + * @requeue: true if the entity can be put on the idle tree + */ +static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) +{ + struct bfq_sched_data *sd; + struct bfq_entity *parent; + + for_each_entity_safe(entity, parent) { + sd = entity->sched_data; + + if (!__bfq_deactivate_entity(entity, requeue)) + /* + * The parent entity is still backlogged, and + * we don't need to update it as it is still + * in service. + */ + break; + + if (sd->next_in_service != NULL) + /* + * The parent entity is still backlogged and + * the budgets on the path towards the root + * need to be updated. + */ + goto update; + + /* + * If we reach there the parent is no more backlogged and + * we want to propagate the dequeue upwards. + */ + requeue = 1; + } + + return; + +update: + entity = parent; + for_each_entity(entity) { + __bfq_activate_entity(entity); + + sd = entity->sched_data; + if (!bfq_update_next_in_service(sd)) + break; + } +} + +/** + * bfq_update_vtime - update vtime if necessary. + * @st: the service tree to act upon. + * + * If necessary update the service tree vtime to have at least one + * eligible entity, skipping to its start time. Assumes that the + * active tree of the device is not empty. + * + * NOTE: this hierarchical implementation updates vtimes quite often, + * we may end up with reactivated processes getting timestamps after a + * vtime skip done because we needed a ->first_active entity on some + * intermediate node. + */ +static void bfq_update_vtime(struct bfq_service_tree *st) +{ + struct bfq_entity *entry; + struct rb_node *node = st->active.rb_node; + + entry = rb_entry(node, struct bfq_entity, rb_node); + if (bfq_gt(entry->min_start, st->vtime)) { + st->vtime = entry->min_start; + bfq_forget_idle(st); + } +} + +/** + * bfq_first_active_entity - find the eligible entity with + * the smallest finish time + * @st: the service tree to select from. + * + * This function searches the first schedulable entity, starting from the + * root of the tree and going on the left every time on this side there is + * a subtree with at least one eligible (start >= vtime) entity. The path on + * the right is followed only if a) the left subtree contains no eligible + * entities and b) no eligible entity has been found yet. + */ +static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) +{ + struct bfq_entity *entry, *first = NULL; + struct rb_node *node = st->active.rb_node; + + while (node != NULL) { + entry = rb_entry(node, struct bfq_entity, rb_node); +left: + if (!bfq_gt(entry->start, st->vtime)) + first = entry; + + BUG_ON(bfq_gt(entry->min_start, st->vtime)); + + if (node->rb_left != NULL) { + entry = rb_entry(node->rb_left, + struct bfq_entity, rb_node); + if (!bfq_gt(entry->min_start, st->vtime)) { + node = node->rb_left; + goto left; + } + } + if (first != NULL) + break; + node = node->rb_right; + } + + BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active)); + return first; +} + +/** + * __bfq_lookup_next_entity - return the first eligible entity in @st. + * @st: the service tree. + * + * Update the virtual time in @st and return the first eligible entity + * it contains. + */ +static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, + bool force) +{ + struct bfq_entity *entity, *new_next_in_service = NULL; + + if (RB_EMPTY_ROOT(&st->active)) + return NULL; + + bfq_update_vtime(st); + entity = bfq_first_active_entity(st); + BUG_ON(bfq_gt(entity->start, st->vtime)); + + /* + * If the chosen entity does not match with the sched_data's + * next_in_service and we are forcedly serving the IDLE priority + * class tree, bubble up budget update. + */ + if (unlikely(force && entity != entity->sched_data->next_in_service)) { + new_next_in_service = entity; + for_each_entity(new_next_in_service) + bfq_update_budget(new_next_in_service); + } + + return entity; +} + +/** + * bfq_lookup_next_entity - return the first eligible entity in @sd. + * @sd: the sched_data. + * @extract: if true the returned entity will be also extracted from @sd. + * + * NOTE: since we cache the next_in_service entity at each level of the + * hierarchy, the complexity of the lookup can be decreased with + * absolutely no effort just returning the cached next_in_service value; + * we prefer to do full lookups to test the consistency of * the data + * structures. + */ +static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, + int extract, + struct bfq_data *bfqd) +{ + struct bfq_service_tree *st = sd->service_tree; + struct bfq_entity *entity; + int i = 0; + + BUG_ON(sd->in_service_entity != NULL); + + if (bfqd != NULL && + jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { + entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, + true); + if (entity != NULL) { + i = BFQ_IOPRIO_CLASSES - 1; + bfqd->bfq_class_idle_last_service = jiffies; + sd->next_in_service = entity; + } + } + for (; i < BFQ_IOPRIO_CLASSES; i++) { + entity = __bfq_lookup_next_entity(st + i, false); + if (entity != NULL) { + if (extract) { + bfq_check_next_in_service(sd, entity); + bfq_active_extract(st + i, entity); + sd->in_service_entity = entity; + sd->next_in_service = NULL; + } + break; + } + } + + return entity; +} + +/* + * Get next queue for service. + */ +static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) +{ + struct bfq_entity *entity = NULL; + struct bfq_sched_data *sd; + struct bfq_queue *bfqq; + + BUG_ON(bfqd->in_service_queue != NULL); + + if (bfqd->busy_queues == 0) + return NULL; + + sd = &bfqd->root_group->sched_data; + for (; sd != NULL; sd = entity->my_sched_data) { + entity = bfq_lookup_next_entity(sd, 1, bfqd); + BUG_ON(entity == NULL); + entity->service = 0; + } + + bfqq = bfq_entity_to_bfqq(entity); + BUG_ON(bfqq == NULL); + + return bfqq; +} + +static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) +{ + if (bfqd->in_service_cic != NULL) { + put_io_context(bfqd->in_service_cic->ioc); + bfqd->in_service_cic = NULL; + } + + bfqd->in_service_queue = NULL; + del_timer(&bfqd->idle_slice_timer); +} + +static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, + int requeue) +{ + struct bfq_entity *entity = &bfqq->entity; + + if (bfqq == bfqd->in_service_queue) + __bfq_bfqd_reset_in_service(bfqd); + + bfq_deactivate_entity(entity, requeue); +} + +static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + + bfq_activate_entity(entity); +} + +/* + * Called when the bfqq no longer has requests pending, remove it from + * the service tree. + */ +static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, + int requeue) +{ + BUG_ON(!bfq_bfqq_busy(bfqq)); + BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); + + bfq_log_bfqq(bfqd, bfqq, "del from busy"); + + bfq_clear_bfqq_busy(bfqq); + + BUG_ON(bfqd->busy_queues == 0); + bfqd->busy_queues--; + + if (!bfqq->dispatched) { + bfq_weights_tree_remove(bfqd, &bfqq->entity, + &bfqd->queue_weights_tree); + if (!blk_queue_nonrot(bfqd->queue)) { + BUG_ON(!bfqd->busy_in_flight_queues); + bfqd->busy_in_flight_queues--; + if (bfq_bfqq_constantly_seeky(bfqq)) { + BUG_ON(!bfqd-> + const_seeky_busy_in_flight_queues); + bfqd->const_seeky_busy_in_flight_queues--; + } + } + } + if (bfqq->wr_coeff > 1) + bfqd->wr_busy_queues--; + + bfq_deactivate_bfqq(bfqd, bfqq, requeue); +} + +/* + * Called when an inactive queue receives a new request. + */ +static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + BUG_ON(bfq_bfqq_busy(bfqq)); + BUG_ON(bfqq == bfqd->in_service_queue); + + bfq_log_bfqq(bfqd, bfqq, "add to busy"); + + bfq_activate_bfqq(bfqd, bfqq); + + bfq_mark_bfqq_busy(bfqq); + bfqd->busy_queues++; + + if (!bfqq->dispatched) { + if (bfqq->wr_coeff == 1) + bfq_weights_tree_add(bfqd, &bfqq->entity, + &bfqd->queue_weights_tree); + if (!blk_queue_nonrot(bfqd->queue)) { + bfqd->busy_in_flight_queues++; + if (bfq_bfqq_constantly_seeky(bfqq)) + bfqd->const_seeky_busy_in_flight_queues++; + } + } + if (bfqq->wr_coeff > 1) + bfqd->wr_busy_queues++; +} diff --git a/block/bfq.h b/block/bfq.h new file mode 100644 index 00000000000..869fc354da7 --- /dev/null +++ b/block/bfq.h @@ -0,0 +1,742 @@ +/* + * BFQ-v7r5 for 3.1.0: data structures and common functions prototypes. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + */ + +#ifndef _BFQ_H +#define _BFQ_H + +#include +#include +#include +#include + +#define BFQ_IOPRIO_CLASSES 3 +#define BFQ_CL_IDLE_TIMEOUT (HZ/5) + +#define BFQ_MIN_WEIGHT 1 +#define BFQ_MAX_WEIGHT 1000 + +#define BFQ_DEFAULT_GRP_WEIGHT 10 +#define BFQ_DEFAULT_GRP_IOPRIO 0 +#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE + +struct bfq_entity; + +/** + * struct bfq_service_tree - per ioprio_class service tree. + * @active: tree for active entities (i.e., those backlogged). + * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i). + * @first_idle: idle entity with minimum F_i. + * @last_idle: idle entity with maximum F_i. + * @vtime: scheduler virtual time. + * @wsum: scheduler weight sum; active and idle entities contribute to it. + * + * Each service tree represents a B-WF2Q+ scheduler on its own. Each + * ioprio_class has its own independent scheduler, and so its own + * bfq_service_tree. All the fields are protected by the queue lock + * of the containing bfqd. + */ +struct bfq_service_tree { + struct rb_root active; + struct rb_root idle; + + struct bfq_entity *first_idle; + struct bfq_entity *last_idle; + + u64 vtime; + unsigned long wsum; +}; + +/** + * struct bfq_sched_data - multi-class scheduler. + * @in_service_entity: entity in service. + * @next_in_service: head-of-the-line entity in the scheduler. + * @service_tree: array of service trees, one per ioprio_class. + * + * bfq_sched_data is the basic scheduler queue. It supports three + * ioprio_classes, and can be used either as a toplevel queue or as + * an intermediate queue on a hierarchical setup. + * @next_in_service points to the active entity of the sched_data + * service trees that will be scheduled next. + * + * The supported ioprio_classes are the same as in CFQ, in descending + * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. + * Requests from higher priority queues are served before all the + * requests from lower priority queues; among requests of the same + * queue requests are served according to B-WF2Q+. + * All the fields are protected by the queue lock of the containing bfqd. + */ +struct bfq_sched_data { + struct bfq_entity *in_service_entity; + struct bfq_entity *next_in_service; + struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; +}; + +/** + * struct bfq_weight_counter - counter of the number of all active entities + * with a given weight. + * @weight: weight of the entities that this counter refers to. + * @num_active: number of active entities with this weight. + * @weights_node: weights tree member (see bfq_data's @queue_weights_tree + * and @group_weights_tree). + */ +struct bfq_weight_counter { + short int weight; + unsigned int num_active; + struct rb_node weights_node; +}; + +/** + * struct bfq_entity - schedulable entity. + * @rb_node: service_tree member. + * @weight_counter: pointer to the weight counter associated with this entity. + * @on_st: flag, true if the entity is on a tree (either the active or + * the idle one of its service_tree). + * @finish: B-WF2Q+ finish timestamp (aka F_i). + * @start: B-WF2Q+ start timestamp (aka S_i). + * @tree: tree the entity is enqueued into; %NULL if not on a tree. + * @min_start: minimum start time of the (active) subtree rooted at + * this entity; used for O(log N) lookups into active trees. + * @service: service received during the last round of service. + * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight. + * @weight: weight of the queue + * @parent: parent entity, for hierarchical scheduling. + * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the + * associated scheduler queue, %NULL on leaf nodes. + * @sched_data: the scheduler queue this entity belongs to. + * @ioprio: the ioprio in use. + * @new_weight: when a weight change is requested, the new weight value. + * @orig_weight: original weight, used to implement weight boosting + * @new_ioprio: when an ioprio change is requested, the new ioprio value. + * @ioprio_class: the ioprio_class in use. + * @new_ioprio_class: when an ioprio_class change is requested, the new + * ioprio_class value. + * @ioprio_changed: flag, true when the user requested a weight, ioprio or + * ioprio_class change. + * + * A bfq_entity is used to represent either a bfq_queue (leaf node in the + * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each + * entity belongs to the sched_data of the parent group in the cgroup + * hierarchy. Non-leaf entities have also their own sched_data, stored + * in @my_sched_data. + * + * Each entity stores independently its priority values; this would + * allow different weights on different devices, but this + * functionality is not exported to userspace by now. Priorities and + * weights are updated lazily, first storing the new values into the + * new_* fields, then setting the @ioprio_changed flag. As soon as + * there is a transition in the entity state that allows the priority + * update to take place the effective and the requested priority + * values are synchronized. + * + * Unless cgroups are used, the weight value is calculated from the + * ioprio to export the same interface as CFQ. When dealing with + * ``well-behaved'' queues (i.e., queues that do not spend too much + * time to consume their budget and have true sequential behavior, and + * when there are no external factors breaking anticipation) the + * relative weights at each level of the cgroups hierarchy should be + * guaranteed. All the fields are protected by the queue lock of the + * containing bfqd. + */ +struct bfq_entity { + struct rb_node rb_node; + struct bfq_weight_counter *weight_counter; + + int on_st; + + u64 finish; + u64 start; + + struct rb_root *tree; + + u64 min_start; + + unsigned long service, budget; + unsigned short weight, new_weight; + unsigned short orig_weight; + + struct bfq_entity *parent; + + struct bfq_sched_data *my_sched_data; + struct bfq_sched_data *sched_data; + + unsigned short ioprio, new_ioprio; + unsigned short ioprio_class, new_ioprio_class; + + int ioprio_changed; +}; + +struct bfq_group; + +/** + * struct bfq_queue - leaf schedulable entity. + * @ref: reference counter. + * @bfqd: parent bfq_data. + * @new_bfqq: shared bfq_queue if queue is cooperating with + * one or more other queues. + * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree). + * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree). + * @sort_list: sorted list of pending requests. + * @next_rq: if fifo isn't expired, next request to serve. + * @queued: nr of requests queued in @sort_list. + * @allocated: currently allocated requests. + * @meta_pending: pending metadata requests. + * @fifo: fifo list of requests in sort_list. + * @entity: entity representing this queue in the scheduler. + * @max_budget: maximum budget allowed from the feedback mechanism. + * @budget_timeout: budget expiration (in jiffies). + * @dispatched: number of requests on the dispatch list or inside driver. + * @flags: status flags. + * @bfqq_list: node for active/idle bfqq list inside our bfqd. + * @seek_samples: number of seeks sampled + * @seek_total: sum of the distances of the seeks sampled + * @seek_mean: mean seek distance + * @last_request_pos: position of the last request enqueued + * @requests_within_timer: number of consecutive pairs of request completion + * and arrival, such that the queue becomes idle + * after the completion, but the next request arrives + * within an idle time slice; used only if the queue's + * IO_bound has been cleared. + * @pid: pid of the process owning the queue, used for logging purposes. + * @last_wr_start_finish: start time of the current weight-raising period if + * the @bfq-queue is being weight-raised, otherwise + * finish time of the last weight-raising period + * @wr_cur_max_time: current max raising time for this queue + * @soft_rt_next_start: minimum time instant such that, only if a new + * request is enqueued after this time instant in an + * idle @bfq_queue with no outstanding requests, then + * the task associated with the queue it is deemed as + * soft real-time (see the comments to the function + * bfq_bfqq_softrt_next_start()) + * @last_idle_bklogged: time of the last transition of the @bfq_queue from + * idle to backlogged + * @service_from_backlogged: cumulative service received from the @bfq_queue + * since the last transition from idle to + * backlogged + * @cic: pointer to the cfq_io_context owning the bfq_queue, set to %NULL if the + * queue is shared + * + * A bfq_queue is a leaf request queue; it can be associated with an + * io_context or more, if it is async or shared between cooperating + * processes. @cgroup holds a reference to the cgroup, to be sure that it + * does not disappear while a bfqq still references it (mostly to avoid + * races between request issuing and task migration followed by cgroup + * destruction). + * All the fields are protected by the queue lock of the containing bfqd. + */ +struct bfq_queue { + atomic_t ref; + struct bfq_data *bfqd; + + /* fields for cooperating queues handling */ + struct bfq_queue *new_bfqq; + struct rb_node pos_node; + struct rb_root *pos_root; + + struct rb_root sort_list; + struct request *next_rq; + int queued[2]; + int allocated[2]; + int meta_pending; + struct list_head fifo; + + struct bfq_entity entity; + + unsigned long max_budget; + unsigned long budget_timeout; + + int dispatched; + + unsigned int flags; + + struct list_head bfqq_list; + + unsigned int seek_samples; + u64 seek_total; + sector_t seek_mean; + sector_t last_request_pos; + + unsigned int requests_within_timer; + + pid_t pid; + + /* weight-raising fields */ + unsigned long wr_cur_max_time; + unsigned long soft_rt_next_start; + unsigned long last_wr_start_finish; + unsigned int wr_coeff; + unsigned long last_idle_bklogged; + unsigned long service_from_backlogged; + + struct cfq_io_context *cic; +}; + +enum bfq_device_speed { + BFQ_BFQD_FAST, + BFQ_BFQD_SLOW, +}; + +/** + * struct bfq_data - per device data structure. + * @queue: request queue for the managed device. + * @root_group: root bfq_group for the device. + * @rq_pos_tree: rbtree sorted by next_request position, + * used when determining if two or more queues + * have interleaving requests (see bfq_close_cooperator). + * @eqm_lock: spinlock used to protect all data structures pertaining + * the Early Queue Merge (EQM) mechanism. + * @active_numerous_groups: number of bfq_groups containing more than one + * active @bfq_entity. + * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by + * weight. Used to keep track of whether all @bfq_queues + * have the same weight. The tree contains one counter + * for each distinct weight associated to some active + * and not weight-raised @bfq_queue (see the comments to + * the functions bfq_weights_tree_[add|remove] for + * further details). + * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted + * by weight. Used to keep track of whether all + * @bfq_groups have the same weight. The tree contains + * one counter for each distinct weight associated to + * some active @bfq_group (see the comments to the + * functions bfq_weights_tree_[add|remove] for further + * details). + * @busy_queues: number of bfq_queues containing requests (including the + * queue under service, even if it is idling). + * @busy_in_flight_queues: number of @bfq_queues containing pending or + * in-flight requests, plus the @bfq_queue in service, + * even if idle but waiting for the possible arrival + * of its next sync request. This field is updated only + * if the device is rotational, but used only if the + * device is also NCQ-capable. The reason why the field + * is updated also for non-NCQ-capable rotational + * devices is related to the fact that the value of + * hw_tag may be set also later than when this field may + * need to be incremented for the first time(s). + * Taking also this possibility into account, to avoid + * unbalanced increments/decrements, would imply more + * overhead than just updating this field regardless of + * the value of hw_tag. + * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues + * (that is, seeky queues that expired + * for budget timeout at least once) + * containing pending or in-flight + * requests, including the in-service + * @bfq_queue if constantly seeky. This + * field is updated only if the device + * is rotational, but used only if the + * device is also NCQ-capable (see the + * comments to @busy_in_flight_queues). + * @raised_busy_queues: number of weight-raised busy bfq_queues. + * @queued: number of queued requests. + * @rq_in_driver: number of requests dispatched and waiting for completion. + * @sync_flight: number of sync requests in the driver. + * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples + * completed requests. + * @hw_tag_samples: nr of samples used to calculate hw_tag. + * @hw_tag: flag set to one if the driver is showing a queueing behavior. + * @budgets_assigned: number of budgets assigned. + * @idle_slice_timer: timer set when idling for the next sequential request + * from the queue under service. + * @unplug_work: delayed work to restart dispatching on the request queue. + * @in_service_queue: @bfq_queue under service. + * @in_service_cic: cfq_io_context (cic) associated with the @in_service_queue. + * @last_position: on-disk position of the last served request. + * @last_budget_start: beginning of the last budget. + * @last_idling_start: beginning of the last idle slice. + * @peak_rate: peak transfer rate observed for a budget. + * @peak_rate_samples: number of samples used to calculate @peak_rate. + * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling. + * @cic_index: use small consequent indexes as radix tree keys to reduce depth + * @cic_list: list of all the cics active on the bfq_data device. + * @group_list: list of all the bfq_groups active on the device. + * @active_list: list of all the bfq_queues active on the device. + * @idle_list: list of all the bfq_queues idle on the device. + * @bfq_quantum: max number of requests dispatched per dispatch round. + * @bfq_fifo_expire: timeout for async/sync requests; when it expires + * requests are served in fifo order. + * @bfq_back_penalty: weight of backward seeks wrt forward ones. + * @bfq_back_max: maximum allowed backward seek. + * @bfq_slice_idle: maximum idling time. + * @bfq_user_max_budget: user-configured max budget value + * (0 for auto-tuning). + * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to + * async queues. + * @bfq_timeout: timeout for bfq_queues to consume their budget; used to + * to prevent seeky queues to impose long latencies to well + * behaved ones (this also implies that seeky queues cannot + * receive guarantees in the service domain; after a timeout + * they are charged for the whole allocated budget, to try + * to preserve a behavior reasonably fair among them, but + * without service-domain guarantees). + * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is + * no more granted any weight-raising. + * @bfq_failed_cooperations: number of consecutive failed cooperation + * chances after which weight-raising is restored + * to a queue subject to more than bfq_coop_thresh + * queue merges. + * @bfq_requests_within_timer: number of consecutive requests that must be + * issued within the idle time slice to set + * again idling to a queue which was marked as + * non-I/O-bound (see the definition of the + * IO_bound flag for further details). + * @bfq_wr_coeff: Maximum factor by which the weight of a boosted + * queue is multiplied + * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies) + * @bfq_wr_rt_max_time: maximum duration for soft real-time processes + * @bfq_wr_min_idle_time: minimum idle period after which weight-raising + * may be reactivated for a queue (in jiffies) + * @bfq_wr_min_inter_arr_async: minimum period between request arrivals + * after which weight-raising may be + * reactivated for an already busy queue + * (in jiffies) + * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue, + * sectors per seconds + * @RT_prod: cached value of the product R*T used for computing the maximum + * duration of the weight raising automatically + * @device_speed: device-speed class for the low-latency heuristic + * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions + * + * All the fields are protected by the @queue lock. + */ +struct bfq_data { + struct request_queue *queue; + + struct bfq_group *root_group; +#ifdef CONFIG_CGROUP_BFQIO + int active_numerous_groups; +#endif + + struct rb_root rq_pos_tree; + spinlock_t eqm_lock; + + struct rb_root queue_weights_tree; + struct rb_root group_weights_tree; + + int busy_queues; + int busy_in_flight_queues; + int const_seeky_busy_in_flight_queues; + int wr_busy_queues; + int queued; + int rq_in_driver; + int sync_flight; + + int max_rq_in_driver; + int hw_tag_samples; + int hw_tag; + + int budgets_assigned; + + struct timer_list idle_slice_timer; + struct work_struct unplug_work; + + struct bfq_queue *in_service_queue; + struct cfq_io_context *in_service_cic; + + sector_t last_position; + + ktime_t last_budget_start; + ktime_t last_idling_start; + int peak_rate_samples; + u64 peak_rate; + unsigned long bfq_max_budget; + + unsigned int cic_index; + struct list_head cic_list; + struct hlist_head group_list; + struct list_head active_list; + struct list_head idle_list; + + unsigned int bfq_quantum; + unsigned int bfq_fifo_expire[2]; + unsigned int bfq_back_penalty; + unsigned int bfq_back_max; + unsigned int bfq_slice_idle; + u64 bfq_class_idle_last_service; + + unsigned int bfq_user_max_budget; + unsigned int bfq_max_budget_async_rq; + unsigned int bfq_timeout[2]; + + unsigned int bfq_coop_thresh; + unsigned int bfq_failed_cooperations; + unsigned int bfq_requests_within_timer; + + bool low_latency; + + /* parameters of the low_latency heuristics */ + unsigned int bfq_wr_coeff; + unsigned int bfq_wr_max_time; + unsigned int bfq_wr_rt_max_time; + unsigned int bfq_wr_min_idle_time; + unsigned long bfq_wr_min_inter_arr_async; + unsigned int bfq_wr_max_softrt_rate; + u64 RT_prod; + enum bfq_device_speed device_speed; + + struct bfq_queue oom_bfqq; +}; + +enum bfqq_state_flags { + BFQ_BFQQ_FLAG_busy = 0, /* has requests or is in service */ + BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */ + BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ + BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ + BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ + BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ + BFQ_BFQQ_FLAG_sync, /* synchronous queue */ + BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ + BFQ_BFQQ_FLAG_IO_bound, /* + * bfqq has timed-out at least once + * having consumed at most 2/10 of + * its budget + */ + BFQ_BFQQ_FLAG_constantly_seeky, /* + * bfqq has proved to be slow and + * seeky until budget timeout + */ + BFQ_BFQQ_FLAG_softrt_update, /* + * may need softrt-next-start + * update + */ + BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ + BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be split */ + BFQ_BFQQ_FLAG_just_split, /* queue has just been split */ +}; + +#define BFQ_BFQQ_FNS(name) \ +static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ +{ \ + (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \ +} \ +static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \ +{ \ + (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \ +} \ +static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ +{ \ + return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ +} + +BFQ_BFQQ_FNS(busy); +BFQ_BFQQ_FNS(wait_request); +BFQ_BFQQ_FNS(must_alloc); +BFQ_BFQQ_FNS(fifo_expire); +BFQ_BFQQ_FNS(idle_window); +BFQ_BFQQ_FNS(prio_changed); +BFQ_BFQQ_FNS(sync); +BFQ_BFQQ_FNS(budget_new); +BFQ_BFQQ_FNS(IO_bound); +BFQ_BFQQ_FNS(constantly_seeky); +BFQ_BFQQ_FNS(coop); +BFQ_BFQQ_FNS(split_coop); +BFQ_BFQQ_FNS(just_split); +BFQ_BFQQ_FNS(softrt_update); +#undef BFQ_BFQQ_FNS + +/* Logging facilities. */ +#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ + blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args) + +#define bfq_log(bfqd, fmt, args...) \ + blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) + +/* Expiration reasons. */ +enum bfqq_expiration { + BFQ_BFQQ_TOO_IDLE = 0, /* + * queue has been idling for + * too long + */ + BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ + BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ + BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ +}; + +#ifdef CONFIG_CGROUP_BFQIO +/** + * struct bfq_group - per (device, cgroup) data structure. + * @entity: schedulable entity to insert into the parent group sched_data. + * @sched_data: own sched_data, to contain child entities (they may be + * both bfq_queues and bfq_groups). + * @group_node: node to be inserted into the bfqio_cgroup->group_data + * list of the containing cgroup's bfqio_cgroup. + * @bfqd_node: node to be inserted into the @bfqd->group_list list + * of the groups active on the same device; used for cleanup. + * @bfqd: the bfq_data for the device this group acts upon. + * @async_bfqq: array of async queues for all the tasks belonging to + * the group, one queue per ioprio value per ioprio_class, + * except for the idle class that has only one queue. + * @async_idle_bfqq: async queue for the idle class (ioprio is ignored). + * @my_entity: pointer to @entity, %NULL for the toplevel group; used + * to avoid too many special cases during group creation/ + * migration. + * @active_entities: number of active entities belonging to the group; + * unused for the root group. Used to know whether there + * are groups with more than one active @bfq_entity + * (see the comments to the function + * bfq_bfqq_must_not_expire()). + * + * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup + * there is a set of bfq_groups, each one collecting the lower-level + * entities belonging to the group that are acting on the same device. + * + * Locking works as follows: + * o @group_node is protected by the bfqio_cgroup lock, and is accessed + * via RCU from its readers. + * o @bfqd is protected by the queue lock, RCU is used to access it + * from the readers. + * o All the other fields are protected by the @bfqd queue lock. + */ +struct bfq_group { + struct bfq_entity entity; + struct bfq_sched_data sched_data; + + struct hlist_node group_node; + struct hlist_node bfqd_node; + + void *bfqd; + + struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; + struct bfq_queue *async_idle_bfqq; + + struct bfq_entity *my_entity; + + int active_entities; +}; + +/** + * struct bfqio_cgroup - bfq cgroup data structure. + * @css: subsystem state for bfq in the containing cgroup. + * @weight: cgroup weight. + * @ioprio: cgroup ioprio. + * @ioprio_class: cgroup ioprio_class. + * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data. + * @group_data: list containing the bfq_group belonging to this cgroup. + * + * @group_data is accessed using RCU, with @lock protecting the updates, + * @ioprio and @ioprio_class are protected by @lock. + */ +struct bfqio_cgroup { + struct cgroup_subsys_state css; + + unsigned short weight, ioprio, ioprio_class; + + spinlock_t lock; + struct hlist_head group_data; +}; +#else +struct bfq_group { + struct bfq_sched_data sched_data; + + struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; + struct bfq_queue *async_idle_bfqq; +}; +#endif + +static inline struct bfq_service_tree * +bfq_entity_service_tree(struct bfq_entity *entity) +{ + struct bfq_sched_data *sched_data = entity->sched_data; + unsigned int idx = entity->ioprio_class - 1; + + BUG_ON(idx >= BFQ_IOPRIO_CLASSES); + BUG_ON(sched_data == NULL); + + return sched_data->service_tree + idx; +} + +static inline struct bfq_queue *cic_to_bfqq(struct cfq_io_context *cic, + int is_sync) +{ + return cic->cfqq[!!is_sync]; +} + +static inline void cic_set_bfqq(struct cfq_io_context *cic, + struct bfq_queue *bfqq, int is_sync) +{ + cic->cfqq[!!is_sync] = bfqq; +} + +static inline void call_for_each_cic(struct io_context *ioc, + void (*func)(struct io_context *, + struct cfq_io_context *)) +{ + struct cfq_io_context *cic; + struct hlist_node *n; + + rcu_read_lock(); + hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list) + func(ioc, cic); + rcu_read_unlock(); +} + +#define CIC_DEAD_KEY 1ul +#define CIC_DEAD_INDEX_SHIFT 1 + +static inline void *bfqd_dead_key(struct bfq_data *bfqd) +{ + return (void *)(bfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); +} + +/** + * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer. + * @ptr: a pointer to a bfqd. + * @flags: storage for the flags to be saved. + * + * This function allows cic->key and bfqg->bfqd to be protected by the + * queue lock of the bfqd they reference; the pointer is dereferenced + * under RCU, so the storage for bfqd is assured to be safe as long + * as the RCU read side critical section does not end. After the + * bfqd->queue->queue_lock is taken the pointer is rechecked, to be + * sure that no other writer accessed it. If we raced with a writer, + * the function returns NULL, with the queue unlocked, otherwise it + * returns the dereferenced pointer, with the queue locked. + */ +static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr, + unsigned long *flags) +{ + struct bfq_data *bfqd; + + rcu_read_lock(); + bfqd = rcu_dereference(*(struct bfq_data **)ptr); + + if (bfqd != NULL && !((unsigned long) bfqd & CIC_DEAD_KEY)) { + spin_lock_irqsave(bfqd->queue->queue_lock, *flags); + if (*ptr == bfqd) + goto out; + spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); + } + + bfqd = NULL; +out: + rcu_read_unlock(); + return bfqd; +} + +static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd, + unsigned long *flags) +{ + spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); +} + +static void bfq_changed_ioprio(struct io_context *ioc, + struct cfq_io_context *cic); +static void bfq_put_queue(struct bfq_queue *bfqq); +static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); +static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, + struct bfq_group *bfqg, int is_sync, + struct io_context *ioc, gfp_t gfp_mask); +static void bfq_end_wr_async_queues(struct bfq_data *bfqd, + struct bfq_group *bfqg); +static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); +static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); + +#endif /* _BFQ_H */ diff --git a/block/blk-core.c b/block/blk-core.c index 8fc4ae28a19..745dc60aa84 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -296,13 +296,26 @@ EXPORT_SYMBOL(blk_sync_queue); * Description: * See @blk_run_queue. This variant must be called with the queue lock * held and interrupts disabled. + * Device driver will be notified of an urgent request + * pending under the following conditions: + * 1. The driver and the current scheduler support urgent reques handling + * 2. There is an urgent request pending in the scheduler + * 3. There isn't already an urgent request in flight, meaning previously + * notified urgent request completed (!q->notified_urgent) */ void __blk_run_queue(struct request_queue *q) { if (unlikely(blk_queue_stopped(q))) return; - q->request_fn(q); + if (!q->notified_urgent && + q->elevator->elevator_type->ops.elevator_is_urgent_fn && + q->urgent_request_fn && + q->elevator->elevator_type->ops.elevator_is_urgent_fn(q)) { + q->notified_urgent = true; + q->urgent_request_fn(q); + } else + q->request_fn(q); } EXPORT_SYMBOL(__blk_run_queue); @@ -929,6 +942,50 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) } EXPORT_SYMBOL(blk_requeue_request); +/** + * blk_reinsert_request() - Insert a request back to the scheduler + * @q: request queue + * @rq: request to be inserted + * + * This function inserts the request back to the scheduler as if + * it was never dispatched. + * + * Return: 0 on success, error code on fail + */ +int blk_reinsert_request(struct request_queue *q, struct request *rq) +{ + if (unlikely(!rq) || unlikely(!q)) + return -EIO; + + blk_delete_timer(rq); + blk_clear_rq_complete(rq); + trace_block_rq_requeue(q, rq); + + if (blk_rq_tagged(rq)) + blk_queue_end_tag(q, rq); + + BUG_ON(blk_queued_rq(rq)); + + return elv_reinsert_request(q, rq); +} +EXPORT_SYMBOL(blk_reinsert_request); + +/** + * blk_reinsert_req_sup() - check whether the scheduler supports + * reinsertion of requests + * @q: request queue + * + * Returns true if the current scheduler supports reinserting + * request. False otherwise + */ +bool blk_reinsert_req_sup(struct request_queue *q) +{ + if (unlikely(!q)) + return false; + return q->elevator->elevator_type->ops.elevator_reinsert_req_fn ? true : false; +} +EXPORT_SYMBOL(blk_reinsert_req_sup); + static void add_acct_request(struct request_queue *q, struct request *rq, int where) { @@ -1979,8 +2036,17 @@ struct request *blk_fetch_request(struct request_queue *q) struct request *rq; rq = blk_peek_request(q); - if (rq) + if (rq) { + /* + * Assumption: the next request fetched from scheduler after we + * notified "urgent request pending" - will be the urgent one + */ + if (q->notified_urgent && !q->dispatched_urgent) { + q->dispatched_urgent = true; + (void)blk_mark_rq_urgent(rq); + } blk_start_request(rq); + } return rq; } EXPORT_SYMBOL(blk_fetch_request); diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 6f9bbd97865..d0d16d4a79a 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include /* for max_pfn/max_low_pfn */ #include @@ -16,13 +17,12 @@ */ static struct kmem_cache *iocontext_cachep; -static void cfq_dtor(struct io_context *ioc) +static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list) { - if (!hlist_empty(&ioc->cic_list)) { + if (!hlist_empty(list)) { struct cfq_io_context *cic; - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); + cic = hlist_entry(list->first, struct cfq_io_context, cic_list); cic->dtor(ioc); } } @@ -40,7 +40,9 @@ int put_io_context(struct io_context *ioc) if (atomic_long_dec_and_test(&ioc->refcount)) { rcu_read_lock(); - cfq_dtor(ioc); + + hlist_sched_dtor(ioc, &ioc->cic_list); + hlist_sched_dtor(ioc, &ioc->bfq_cic_list); rcu_read_unlock(); kmem_cache_free(iocontext_cachep, ioc); @@ -50,15 +52,14 @@ int put_io_context(struct io_context *ioc) } EXPORT_SYMBOL(put_io_context); -static void cfq_exit(struct io_context *ioc) +static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list) { rcu_read_lock(); - if (!hlist_empty(&ioc->cic_list)) { + if (!hlist_empty(list)) { struct cfq_io_context *cic; - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); + cic = hlist_entry(list->first, struct cfq_io_context, cic_list); cic->exit(ioc); } rcu_read_unlock(); @@ -74,9 +75,10 @@ void exit_io_context(struct task_struct *task) task->io_context = NULL; task_unlock(task); - if (atomic_dec_and_test(&ioc->nr_tasks)) - cfq_exit(ioc); - + if (atomic_dec_and_test(&ioc->nr_tasks)) { + hlist_sched_exit(ioc, &ioc->cic_list); + hlist_sched_exit(ioc, &ioc->bfq_cic_list); + } put_io_context(ioc); } @@ -89,12 +91,14 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) atomic_long_set(&ioc->refcount, 1); atomic_set(&ioc->nr_tasks, 1); spin_lock_init(&ioc->lock); - ioc->ioprio_changed = 0; + bitmap_zero(ioc->ioprio_changed, IOC_IOPRIO_CHANGED_BITS); ioc->ioprio = 0; ioc->last_waited = 0; /* doesn't matter... */ ioc->nr_batch_requests = 0; /* because this is 0 */ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ioc->cic_list); + INIT_RADIX_TREE(&ioc->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH); + INIT_HLIST_HEAD(&ioc->bfq_cic_list); ioc->ioc_data = NULL; #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) ioc->cgroup_changed = 0; diff --git a/block/blk-settings.c b/block/blk-settings.c index fa1eb0449a0..7d3ee7fa50d 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -99,6 +99,18 @@ void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn) } EXPORT_SYMBOL_GPL(blk_queue_lld_busy); +/** + * blk_urgent_request() - Set an urgent_request handler function for queue + * @q: queue + * @fn: handler for urgent requests + * + */ +void blk_urgent_request(struct request_queue *q, request_fn_proc *fn) +{ + q->urgent_request_fn = fn; +} +EXPORT_SYMBOL(blk_urgent_request); + /** * blk_set_default_limits - reset limits to default values * @lim: the queue_limits structure to reset diff --git a/block/blk.h b/block/blk.h index 20b900a377c..7837cec9097 100644 --- a/block/blk.h +++ b/block/blk.h @@ -30,6 +30,7 @@ void __generic_unplug_device(struct request_queue *); */ enum rq_atomic_flags { REQ_ATOM_COMPLETE = 0, + REQ_ATOM_URGENT = 1, }; /* @@ -46,6 +47,16 @@ static inline void blk_clear_rq_complete(struct request *rq) clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); } +static inline int blk_mark_rq_urgent(struct request *rq) +{ + return test_and_set_bit(REQ_ATOM_URGENT, &rq->atomic_flags); +} + +static inline void blk_clear_rq_urgent(struct request *rq) +{ + clear_bit(REQ_ATOM_URGENT, &rq->atomic_flags); +} + /* * Internal elevator interface */ diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4c12869fcf7..0f60ba0ad87 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -20,18 +20,18 @@ * tunables */ /* max queue in one round of service */ -static const int cfq_quantum = 8; -static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; +static const int cfq_quantum = 4; +static const int cfq_fifo_expire[2] = { 33, 8}; /* maximum backwards seek, in KiB */ -static const int cfq_back_max = 16 * 1024; +static const int cfq_back_max = 12582912; /* penalty of a backwards seek */ -static const int cfq_back_penalty = 2; -static const int cfq_slice_sync = HZ / 10; -static int cfq_slice_async = HZ / 25; +static const int cfq_back_penalty = 1; +static const int cfq_slice_sync = 6; +static int cfq_slice_async = 5; static const int cfq_slice_async_rq = 2; -static int cfq_slice_idle = HZ / 125; -static int cfq_group_idle = HZ / 125; -static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ +static int cfq_slice_idle = 0; +static int cfq_group_idle = 0; +static const int cfq_target_latency = 300; /* 300 ms */ static const int cfq_hist_divisor = 4; /* @@ -2934,7 +2934,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) static void cfq_ioc_set_ioprio(struct io_context *ioc) { call_for_each_cic(ioc, changed_ioprio); - ioc->ioprio_changed = 0; } static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, @@ -3226,8 +3225,13 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) goto err_free; out: - smp_read_barrier_depends(); - if (unlikely(ioc->ioprio_changed)) + /* + * test_and_clear_bit() implies a memory barrier, paired with + * the wmb() in fs/ioprio.c, so the value seen for ioprio is the + * new one. + */ + if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED, + ioc->ioprio_changed))) cfq_ioc_set_ioprio(ioc); #ifdef CONFIG_CFQ_GROUP_IOSCHED @@ -4268,20 +4272,6 @@ static struct blkio_policy_type blkio_policy_cfq; static int __init cfq_init(void) { - /* - * could be 0 on HZ < 1000 setups - */ - if (!cfq_slice_async) - cfq_slice_async = 1; - if (!cfq_slice_idle) - cfq_slice_idle = 1; - -#ifdef CONFIG_CFQ_GROUP_IOSCHED - if (!cfq_group_idle) - cfq_group_idle = 1; -#else - cfq_group_idle = 0; -#endif if (cfq_slab_setup()) return -ENOMEM; diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index c644137d9cd..e39de38f4b5 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -17,11 +17,12 @@ /* * See Documentation/block/deadline-iosched.txt */ -static const int read_expire = HZ / 2; /* max time before a read is submitted. */ -static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ -static const int writes_starved = 2; /* max times reads can starve a write */ -static const int fifo_batch = 16; /* # of sequential requests treated as one - by the above parameters. For throughput. */ +static const int read_expire = 25; /* max time before a read is submitted. */ +static const int write_expire = 250; /* ditto for writes, these limits are SOFT! */ +static const int writes_starved = 1; /* max times reads can starve a write */ +static const int fifo_batch = 8; /* # of sequential requests treated as one + by the above parameters. For throughput. */ +static const int front_merges = 1; struct deadline_data { /* @@ -230,7 +231,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) /* * rq is expired! */ - if (time_after(jiffies, rq_fifo_time(rq))) + if (time_after_eq(jiffies, rq_fifo_time(rq))) return 1; return 0; @@ -352,7 +353,7 @@ static void *deadline_init_queue(struct request_queue *q) dd->fifo_expire[READ] = read_expire; dd->fifo_expire[WRITE] = write_expire; dd->writes_starved = writes_starved; - dd->front_merges = 1; + dd->front_merges = front_merges; dd->fifo_batch = fifo_batch; return dd; } @@ -464,3 +465,4 @@ module_exit(deadline_exit); MODULE_AUTHOR("Jens Axboe"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("deadline IO scheduler"); + diff --git a/block/elevator.c b/block/elevator.c index a3b64bc71d8..eb3056e0ee9 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -522,6 +522,7 @@ static bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) { struct request *__rq; + bool ret; if (blk_queue_nomerges(q)) return false; @@ -535,14 +536,21 @@ static bool elv_attempt_insert_merge(struct request_queue *q, if (blk_queue_noxmerges(q)) return false; + ret = false; /* * See if our hash lookup can find a potential backmerge. */ - __rq = elv_rqhash_find(q, blk_rq_pos(rq)); - if (__rq && blk_attempt_req_merge(q, __rq, rq)) - return true; + while (1) { + __rq = elv_rqhash_find(q, blk_rq_pos(rq)); + if (!__rq || !blk_attempt_req_merge(q, __rq, rq)) + break; - return false; + /* The merged request could be merged with others, try again */ + ret = true; + rq = __rq; + } + + return ret; } void elv_merged_request(struct request_queue *q, struct request *rq, int type) @@ -603,6 +611,41 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE); } +/** + * elv_reinsert_request() - Insert a request back to the scheduler + * @q: request queue where request should be inserted + * @rq: request to be inserted + * + * This function returns the request back to the scheduler to be + * inserted as if it was never dispatched + * + * Return: 0 on success, error code on failure + */ +int elv_reinsert_request(struct request_queue *q, struct request *rq) +{ + int res; + + if (!q->elevator->elevator_type->ops.elevator_reinsert_req_fn) + return -EPERM; + + res = q->elevator->elevator_type->ops.elevator_reinsert_req_fn(q, rq); + if (!res) { + /* + * it already went through dequeue, we need to decrement the + * in_flight count again + */ + if (blk_account_rq(rq)) { + q->in_flight[rq_is_sync(rq)]--; + if (rq->cmd_flags & REQ_SORTED) + elv_deactivate_rq(q, rq); + } + rq->cmd_flags &= ~REQ_STARTED; + q->nr_sorted++; + } + + return res; +} + void elv_drain_elevator(struct request_queue *q) { static int printed; @@ -807,6 +850,11 @@ void elv_completed_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; + if (test_bit(REQ_ATOM_URGENT, &rq->atomic_flags)) { + q->notified_urgent = false; + q->dispatched_urgent = false; + blk_clear_rq_urgent(rq); + } /* * request is released from the driver, io must be done */ diff --git a/block/genhd.c b/block/genhd.c index d3834710b95..60c7561aba0 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1493,9 +1493,9 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now) intv = disk_events_poll_jiffies(disk); set_timer_slack(&ev->dwork.timer, intv / 4); if (check_now) - queue_delayed_work(system_nrt_wq, &ev->dwork, 0); + queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); else if (intv) - queue_delayed_work(system_nrt_wq, &ev->dwork, intv); + queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv); out_unlock: spin_unlock_irqrestore(&ev->lock, flags); } @@ -1539,7 +1539,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask) ev->clearing |= mask; if (!ev->block) { cancel_delayed_work(&ev->dwork); - queue_delayed_work(system_nrt_wq, &ev->dwork, 0); + queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); } spin_unlock_irq(&ev->lock); } @@ -1576,7 +1576,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) /* uncondtionally schedule event check and wait for it to finish */ disk_block_events(disk); - queue_delayed_work(system_nrt_wq, &ev->dwork, 0); + queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); flush_delayed_work(&ev->dwork); __disk_unblock_events(disk, false); @@ -1613,7 +1613,7 @@ static void disk_events_workfn(struct work_struct *work) intv = disk_events_poll_jiffies(disk); if (!ev->block && intv) - queue_delayed_work(system_nrt_wq, &ev->dwork, intv); + queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv); spin_unlock_irq(&ev->lock); diff --git a/block/row-iosched.c b/block/row-iosched.c new file mode 100644 index 00000000000..0ebcbdf6c91 --- /dev/null +++ b/block/row-iosched.c @@ -0,0 +1,791 @@ +/* + * ROW (Read Over Write) I/O scheduler. + * + * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* See Documentation/block/row-iosched.txt */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * enum row_queue_prio - Priorities of the ROW queues + * + * This enum defines the priorities (and the number of queues) + * the requests will be disptributed to. The higher priority - + * the bigger is the dispatch quantum given to that queue. + * ROWQ_PRIO_HIGH_READ - is the higher priority queue. + * + */ +enum row_queue_prio { + ROWQ_PRIO_HIGH_READ = 0, + ROWQ_PRIO_REG_READ, + ROWQ_PRIO_HIGH_SWRITE, + ROWQ_PRIO_REG_SWRITE, + ROWQ_PRIO_REG_WRITE, + ROWQ_PRIO_LOW_READ, + ROWQ_PRIO_LOW_SWRITE, + ROWQ_MAX_PRIO, +}; + +/** + * struct row_queue_params - ROW queue parameters + * @idling_enabled: Flag indicating whether idling is enable on + * the queue + * @quantum: Number of requests to be dispatched from this queue + * in a dispatch cycle + * @is_urgent: Flags indicating whether the queue can notify on + * urgent requests + * + */ +struct row_queue_params { + bool idling_enabled; + int quantum; + bool is_urgent; +}; + +/* + * This array holds the default values of the different configurables + * for each ROW queue. Each row of the array holds the following values: + * {idling_enabled, quantum, is_urgent} + * Each row corresponds to a queue with the same index (according to + * enum row_queue_prio) + */ +static const struct row_queue_params row_queues_def[] = { +/* idling_enabled, quantum, is_urgent */ + {true, 100, true}, /* ROWQ_PRIO_HIGH_READ */ + {true, 100, true}, /* ROWQ_PRIO_REG_READ */ + {false, 2, false}, /* ROWQ_PRIO_HIGH_SWRITE */ + {false, 1, false}, /* ROWQ_PRIO_REG_SWRITE */ + {false, 1, false}, /* ROWQ_PRIO_REG_WRITE */ + {false, 1, false}, /* ROWQ_PRIO_LOW_READ */ + {false, 1, false} /* ROWQ_PRIO_LOW_SWRITE */ +}; + +/* Default values for idling on read queues (in msec) */ +#define ROW_IDLE_TIME_MSEC 5 +#define ROW_READ_FREQ_MSEC 20 + +/** + * struct rowq_idling_data - parameters for idling on the queue + * @last_insert_time: time the last request was inserted + * to the queue + * @begin_idling: flag indicating wether we should idle + * + */ +struct rowq_idling_data { + ktime_t last_insert_time; + bool begin_idling; +}; + +/** + * struct row_queue - requests grouping structure + * @rdata: parent row_data structure + * @fifo: fifo of requests + * @prio: queue priority (enum row_queue_prio) + * @nr_dispatched: number of requests already dispatched in + * the current dispatch cycle + * @slice: number of requests to dispatch in a cycle + * @nr_req: number of requests in queue + * @dispatch quantum: number of requests this queue may + * dispatch in a dispatch cycle + * @idle_data: data for idling on queues + * + */ +struct row_queue { + struct row_data *rdata; + struct list_head fifo; + enum row_queue_prio prio; + + unsigned int nr_dispatched; + unsigned int slice; + + unsigned int nr_req; + int disp_quantum; + + /* used only for READ queues */ + struct rowq_idling_data idle_data; +}; + +/** + * struct idling_data - data for idling on empty rqueue + * @idle_time: idling duration (jiffies) + * @freq: min time between two requests that + * triger idling (msec) + * @idle_work: pointer to struct delayed_work + * + */ +struct idling_data { + unsigned long idle_time; + u32 freq; + + struct workqueue_struct *idle_workqueue; + struct delayed_work idle_work; +}; + +/** + * struct row_queue - Per block device rqueue structure + * @dispatch_queue: dispatch rqueue + * @row_queues: array of priority request queues + * @curr_queue: index in the row_queues array of the + * currently serviced rqueue + * @read_idle: data for idling after READ request + * @nr_reqs: nr_reqs[0] holds the number of all READ requests in + * scheduler, nr_reqs[1] holds the number of all WRITE + * requests in scheduler + * @cycle_flags: used for marking unserved queueus + * + */ +struct row_data { + struct request_queue *dispatch_queue; + + struct row_queue row_queues[ROWQ_MAX_PRIO]; + + enum row_queue_prio curr_queue; + + struct idling_data read_idle; + unsigned int nr_reqs[2]; + + unsigned int cycle_flags; +}; + +#define RQ_ROWQ(rq) ((struct row_queue *) ((rq)->elevator_private[0])) + +#define row_log(q, fmt, args...) \ + blk_add_trace_msg(q, "%s():" fmt , __func__, ##args) +#define row_log_rowq(rdata, rowq_id, fmt, args...) \ + blk_add_trace_msg(rdata->dispatch_queue, "rowq%d " fmt, \ + rowq_id, ##args) + +static inline void row_mark_rowq_unserved(struct row_data *rd, + enum row_queue_prio qnum) +{ + rd->cycle_flags |= (1 << qnum); +} + +static inline void row_clear_rowq_unserved(struct row_data *rd, + enum row_queue_prio qnum) +{ + rd->cycle_flags &= ~(1 << qnum); +} + +static inline int row_rowq_unserved(struct row_data *rd, + enum row_queue_prio qnum) +{ + return rd->cycle_flags & (1 << qnum); +} + +static inline void __maybe_unused row_dump_queues_stat(struct row_data *rd) +{ + int i; + + row_log(rd->dispatch_queue, " Queues status:"); + for (i = 0; i < ROWQ_MAX_PRIO; i++) + row_log(rd->dispatch_queue, + "queue%d: dispatched= %d, nr_req=%d", i, + rd->row_queues[i].nr_dispatched, + rd->row_queues[i].nr_req); +} + +/******************** Static helper functions ***********************/ +/* + * kick_queue() - Wake up device driver queue thread + * @work: pointer to struct work_struct + * + * This is a idling delayed work function. It's purpose is to wake up the + * device driver in order for it to start fetching requests. + * + */ +static void kick_queue(struct work_struct *work) +{ + struct delayed_work *idle_work = to_delayed_work(work); + struct idling_data *read_data = + container_of(idle_work, struct idling_data, idle_work); + struct row_data *rd = + container_of(read_data, struct row_data, read_idle); + + row_log_rowq(rd, rd->curr_queue, "Performing delayed work"); + /* Mark idling process as done */ + rd->row_queues[rd->curr_queue].idle_data.begin_idling = false; + + if (!(rd->nr_reqs[0] + rd->nr_reqs[1])) + row_log(rd->dispatch_queue, "No requests in scheduler"); + else { + spin_lock_irq(rd->dispatch_queue->queue_lock); + __blk_run_queue(rd->dispatch_queue); + spin_unlock_irq(rd->dispatch_queue->queue_lock); + } +} + +/* + * row_restart_disp_cycle() - Restart the dispatch cycle + * @rd: pointer to struct row_data + * + * This function restarts the dispatch cycle by: + * - Setting current queue to ROWQ_PRIO_HIGH_READ + * - For each queue: reset the number of requests dispatched in + * the cycle + */ +static inline void row_restart_disp_cycle(struct row_data *rd) +{ + int i; + + for (i = 0; i < ROWQ_MAX_PRIO; i++) + rd->row_queues[i].nr_dispatched = 0; + + rd->curr_queue = ROWQ_PRIO_HIGH_READ; + row_log(rd->dispatch_queue, "Restarting cycle"); +} + +static inline void row_get_next_queue(struct row_data *rd) +{ + rd->curr_queue++; + if (rd->curr_queue == ROWQ_MAX_PRIO) + row_restart_disp_cycle(rd); +} + +/******************* Elevator callback functions *********************/ + +/* + * row_add_request() - Add request to the scheduler + * @q: requests queue + * @rq: request to add + * + */ +static void row_add_request(struct request_queue *q, + struct request *rq) +{ + struct row_data *rd = (struct row_data *)q->elevator->elevator_data; + struct row_queue *rqueue = RQ_ROWQ(rq); + + list_add_tail(&rq->queuelist, &rqueue->fifo); + rd->nr_reqs[rq_data_dir(rq)]++; + rqueue->nr_req++; + rq_set_fifo_time(rq, jiffies); /* for statistics*/ + + if (row_queues_def[rqueue->prio].idling_enabled) { + if (delayed_work_pending(&rd->read_idle.idle_work)) + (void)cancel_delayed_work( + &rd->read_idle.idle_work); + if (ktime_to_ms(ktime_sub(ktime_get(), + rqueue->idle_data.last_insert_time)) < + rd->read_idle.freq) { + rqueue->idle_data.begin_idling = true; + row_log_rowq(rd, rqueue->prio, "Enable idling"); + } else { + rqueue->idle_data.begin_idling = false; + row_log_rowq(rd, rqueue->prio, "Disable idling"); + } + + rqueue->idle_data.last_insert_time = ktime_get(); + } + if (row_queues_def[rqueue->prio].is_urgent && + row_rowq_unserved(rd, rqueue->prio)) { + row_log_rowq(rd, rqueue->prio, + "added urgent request (total on queue=%d)", + rqueue->nr_req); + } else + row_log_rowq(rd, rqueue->prio, + "added request (total on queue=%d)", rqueue->nr_req); +} + +/** + * row_reinsert_req() - Reinsert request back to the scheduler + * @q: requests queue + * @rq: request to add + * + * Reinsert the given request back to the queue it was + * dispatched from as if it was never dispatched. + * + * Returns 0 on success, error code otherwise + */ +static int row_reinsert_req(struct request_queue *q, + struct request *rq) +{ + struct row_data *rd = q->elevator->elevator_data; + struct row_queue *rqueue = RQ_ROWQ(rq); + + /* Verify rqueue is legitimate */ + if (rqueue->prio >= ROWQ_MAX_PRIO) { + pr_err("\n\nROW BUG: row_reinsert_req() rqueue->prio = %d\n", + rqueue->prio); + blk_dump_rq_flags(rq, ""); + return -EIO; + } + + list_add(&rq->queuelist, &rqueue->fifo); + rd->nr_reqs[rq_data_dir(rq)]++; + rqueue->nr_req++; + + row_log_rowq(rd, rqueue->prio, + "request reinserted (total on queue=%d)", rqueue->nr_req); + + return 0; +} + +/* + * row_urgent_pending() - Return TRUE if there is an urgent + * request on scheduler + * @q: requests queue + * + */ +static bool row_urgent_pending(struct request_queue *q) +{ + struct row_data *rd = q->elevator->elevator_data; + int i; + + for (i = 0; i < ROWQ_MAX_PRIO; i++) + if (row_queues_def[i].is_urgent && row_rowq_unserved(rd, i) && + !list_empty(&rd->row_queues[i].fifo)) { + row_log_rowq(rd, i, + "Urgent request pending (curr=%i)", + rd->curr_queue); + return true; + } + + return false; +} + +/** + * row_remove_request() - Remove given request from scheduler + * @q: requests queue + * @rq: request to remove + * + */ +static void row_remove_request(struct request_queue *q, + struct request *rq) +{ + struct row_data *rd = (struct row_data *)q->elevator->elevator_data; + struct row_queue *rqueue = RQ_ROWQ(rq); + + rq_fifo_clear(rq); + rqueue->nr_req--; + rd->nr_reqs[rq_data_dir(rq)]--; +} + +/* + * row_dispatch_insert() - move request to dispatch queue + * @rd: pointer to struct row_data + * + * This function moves the next request to dispatch from + * rd->curr_queue to the dispatch queue + * + */ +static void row_dispatch_insert(struct row_data *rd) +{ + struct request *rq; + + rq = rq_entry_fifo(rd->row_queues[rd->curr_queue].fifo.next); + row_remove_request(rd->dispatch_queue, rq); + elv_dispatch_add_tail(rd->dispatch_queue, rq); + rd->row_queues[rd->curr_queue].nr_dispatched++; + row_clear_rowq_unserved(rd, rd->curr_queue); + row_log_rowq(rd, rd->curr_queue, " Dispatched request nr_disp = %d", + rd->row_queues[rd->curr_queue].nr_dispatched); +} + +/* + * row_choose_queue() - choose the next queue to dispatch from + * @rd: pointer to struct row_data + * + * Updates rd->curr_queue. Returns 1 if there are requests to + * dispatch, 0 if there are no requests in scheduler + * + */ +static int row_choose_queue(struct row_data *rd) +{ + int prev_curr_queue = rd->curr_queue; + + if (!(rd->nr_reqs[0] + rd->nr_reqs[1])) { + row_log(rd->dispatch_queue, "No more requests in scheduler"); + return 0; + } + + row_get_next_queue(rd); + + /* + * Loop over all queues to find the next queue that is not empty. + * Stop when you get back to curr_queue + */ + while (list_empty(&rd->row_queues[rd->curr_queue].fifo) + && rd->curr_queue != prev_curr_queue) { + /* Mark rqueue as unserved */ + row_mark_rowq_unserved(rd, rd->curr_queue); + row_get_next_queue(rd); + } + + return 1; +} + +/* + * row_dispatch_requests() - selects the next request to dispatch + * @q: requests queue + * @force: ignored + * + * Return 0 if no requests were moved to the dispatch queue. + * 1 otherwise + * + */ +static int row_dispatch_requests(struct request_queue *q, int force) +{ + struct row_data *rd = (struct row_data *)q->elevator->elevator_data; + int ret = 0, currq, i; + + currq = rd->curr_queue; + + /* + * Find the first unserved queue (with higher priority then currq) + * that is not empty + */ + for (i = 0; i < currq; i++) { + if (row_rowq_unserved(rd, i) && + !list_empty(&rd->row_queues[i].fifo)) { + row_log_rowq(rd, currq, + " Preemting for unserved rowq%d. (nr_req=%u)", + i, rd->row_queues[currq].nr_req); + rd->curr_queue = i; + row_dispatch_insert(rd); + ret = 1; + goto done; + } + } + + if (rd->row_queues[currq].nr_dispatched >= + rd->row_queues[currq].disp_quantum) { + rd->row_queues[currq].nr_dispatched = 0; + row_log_rowq(rd, currq, "Expiring rqueue"); + ret = row_choose_queue(rd); + if (ret) + row_dispatch_insert(rd); + goto done; + } + + /* Dispatch from curr_queue */ + if (list_empty(&rd->row_queues[currq].fifo)) { + /* check idling */ + if (delayed_work_pending(&rd->read_idle.idle_work)) { + if (force) { + (void)cancel_delayed_work( + &rd->read_idle.idle_work); + row_log_rowq(rd, currq, + "Canceled delayed work - forced dispatch"); + } else { + row_log_rowq(rd, currq, + "Delayed work pending. Exiting"); + goto done; + } + } + + if (!force && row_queues_def[currq].idling_enabled && + rd->row_queues[currq].idle_data.begin_idling) { + if (!queue_delayed_work(rd->read_idle.idle_workqueue, + &rd->read_idle.idle_work, + rd->read_idle.idle_time)) { + row_log_rowq(rd, currq, + "Work already on queue!"); + pr_err("ROW_BUG: Work already on queue!"); + } else + row_log_rowq(rd, currq, + "Scheduled delayed work. exiting"); + goto done; + } else { + row_log_rowq(rd, currq, + "Currq empty. Choose next queue"); + ret = row_choose_queue(rd); + if (!ret) + goto done; + } + } + + ret = 1; + row_dispatch_insert(rd); + +done: + return ret; +} + +/* + * row_init_queue() - Init scheduler data structures + * @q: requests queue + * + * Return pointer to struct row_data to be saved in elevator for + * this dispatch queue + * + */ +static void *row_init_queue(struct request_queue *q) +{ + + struct row_data *rdata; + int i; + + rdata = kmalloc_node(sizeof(*rdata), + GFP_KERNEL | __GFP_ZERO, q->node); + if (!rdata) + return NULL; + + for (i = 0; i < ROWQ_MAX_PRIO; i++) { + INIT_LIST_HEAD(&rdata->row_queues[i].fifo); + rdata->row_queues[i].disp_quantum = row_queues_def[i].quantum; + rdata->row_queues[i].rdata = rdata; + rdata->row_queues[i].prio = i; + rdata->row_queues[i].idle_data.begin_idling = false; + rdata->row_queues[i].idle_data.last_insert_time = + ktime_set(0, 0); + } + + /* + * Currently idling is enabled only for READ queues. If we want to + * enable it for write queues also, note that idling frequency will + * be the same in both cases + */ + rdata->read_idle.idle_time = msecs_to_jiffies(ROW_IDLE_TIME_MSEC); + /* Maybe 0 on some platforms */ + if (!rdata->read_idle.idle_time) + rdata->read_idle.idle_time = 1; + rdata->read_idle.freq = ROW_READ_FREQ_MSEC; + rdata->read_idle.idle_workqueue = alloc_workqueue("row_idle_work", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + if (!rdata->read_idle.idle_workqueue) + panic("Failed to create idle workqueue\n"); + INIT_DELAYED_WORK(&rdata->read_idle.idle_work, kick_queue); + + rdata->curr_queue = ROWQ_PRIO_HIGH_READ; + rdata->dispatch_queue = q; + + rdata->nr_reqs[READ] = rdata->nr_reqs[WRITE] = 0; + + return rdata; +} + +/* + * row_exit_queue() - called on unloading the RAW scheduler + * @e: poiner to struct elevator_queue + * + */ +static void row_exit_queue(struct elevator_queue *e) +{ + struct row_data *rd = (struct row_data *)e->elevator_data; + int i; + + for (i = 0; i < ROWQ_MAX_PRIO; i++) + BUG_ON(!list_empty(&rd->row_queues[i].fifo)); + (void)cancel_delayed_work_sync(&rd->read_idle.idle_work); + BUG_ON(delayed_work_pending(&rd->read_idle.idle_work)); + destroy_workqueue(rd->read_idle.idle_workqueue); + kfree(rd); +} + +/* + * row_merged_requests() - Called when 2 requests are merged + * @q: requests queue + * @rq: request the two requests were merged into + * @next: request that was merged + */ +static void row_merged_requests(struct request_queue *q, struct request *rq, + struct request *next) +{ + struct row_queue *rqueue = RQ_ROWQ(next); + + list_del_init(&next->queuelist); + rqueue->nr_req--; + + rqueue->rdata->nr_reqs[rq_data_dir(rq)]--; +} + +/* + * get_queue_type() - Get queue type for a given request + * + * This is a helping function which purpose is to determine what + * ROW queue the given request should be added to (and + * dispatched from leter on) + * + * TODO: Right now only 3 queues are used REG_READ, REG_WRITE + * and REG_SWRITE + */ +static enum row_queue_prio get_queue_type(struct request *rq) +{ + const int data_dir = rq_data_dir(rq); + const bool is_sync = rq_is_sync(rq); + + if (data_dir == READ) + return ROWQ_PRIO_REG_READ; + else if (is_sync) + return ROWQ_PRIO_REG_SWRITE; + else + return ROWQ_PRIO_REG_WRITE; +} + +/* + * row_set_request() - Set ROW data structures associated with this request. + * @q: requests queue + * @rq: pointer to the request + * @gfp_mask: ignored + * + */ +static int +row_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) +{ + struct row_data *rd = (struct row_data *)q->elevator->elevator_data; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + rq->elevator_private[0] = + (void *)(&rd->row_queues[get_queue_type(rq)]); + spin_unlock_irqrestore(q->queue_lock, flags); + + return 0; +} + +/********** Helping sysfs functions/defenitions for ROW attributes ******/ +static ssize_t row_var_show(int var, char *page) +{ + return snprintf(page, 100, "%d\n", var); +} + +static ssize_t row_var_store(int *var, const char *page, size_t count) +{ + int err; + err = kstrtoul(page, 10, (unsigned long *)var); + + return count; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ + struct row_data *rowd = e->elevator_data; \ + int __data = __VAR; \ + if (__CONV) \ + __data = jiffies_to_msecs(__data); \ + return row_var_show(__data, (page)); \ +} +SHOW_FUNCTION(row_hp_read_quantum_show, + rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum, 0); +SHOW_FUNCTION(row_rp_read_quantum_show, + rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum, 0); +SHOW_FUNCTION(row_hp_swrite_quantum_show, + rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum, 0); +SHOW_FUNCTION(row_rp_swrite_quantum_show, + rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum, 0); +SHOW_FUNCTION(row_rp_write_quantum_show, + rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum, 0); +SHOW_FUNCTION(row_lp_read_quantum_show, + rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum, 0); +SHOW_FUNCTION(row_lp_swrite_quantum_show, + rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum, 0); +SHOW_FUNCTION(row_read_idle_show, rowd->read_idle.idle_time, 0); +SHOW_FUNCTION(row_read_idle_freq_show, rowd->read_idle.freq, 0); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, \ + const char *page, size_t count) \ +{ \ + struct row_data *rowd = e->elevator_data; \ + int __data; \ + int ret = row_var_store(&__data, (page), count); \ + if (__CONV) \ + __data = (int)msecs_to_jiffies(__data); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + *(__PTR) = __data; \ + return ret; \ +} +STORE_FUNCTION(row_hp_read_quantum_store, +&rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum, 1, INT_MAX, 0); +STORE_FUNCTION(row_rp_read_quantum_store, + &rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum, + 1, INT_MAX, 0); +STORE_FUNCTION(row_hp_swrite_quantum_store, + &rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum, + 1, INT_MAX, 0); +STORE_FUNCTION(row_rp_swrite_quantum_store, + &rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum, + 1, INT_MAX, 0); +STORE_FUNCTION(row_rp_write_quantum_store, + &rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum, + 1, INT_MAX, 0); +STORE_FUNCTION(row_lp_read_quantum_store, + &rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum, + 1, INT_MAX, 0); +STORE_FUNCTION(row_lp_swrite_quantum_store, + &rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum, + 1, INT_MAX, 1); +STORE_FUNCTION(row_read_idle_store, &rowd->read_idle.idle_time, 1, INT_MAX, 0); +STORE_FUNCTION(row_read_idle_freq_store, &rowd->read_idle.freq, 1, INT_MAX, 0); + +#undef STORE_FUNCTION + +#define ROW_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, row_##name##_show, \ + row_##name##_store) + +static struct elv_fs_entry row_attrs[] = { + ROW_ATTR(hp_read_quantum), + ROW_ATTR(rp_read_quantum), + ROW_ATTR(hp_swrite_quantum), + ROW_ATTR(rp_swrite_quantum), + ROW_ATTR(rp_write_quantum), + ROW_ATTR(lp_read_quantum), + ROW_ATTR(lp_swrite_quantum), + ROW_ATTR(read_idle), + ROW_ATTR(read_idle_freq), + __ATTR_NULL +}; + +static struct elevator_type iosched_row = { + .ops = { + .elevator_merge_req_fn = row_merged_requests, + .elevator_dispatch_fn = row_dispatch_requests, + .elevator_add_req_fn = row_add_request, + .elevator_reinsert_req_fn = row_reinsert_req, + .elevator_is_urgent_fn = row_urgent_pending, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, + .elevator_set_req_fn = row_set_request, + .elevator_init_fn = row_init_queue, + .elevator_exit_fn = row_exit_queue, + }, + + .elevator_attrs = row_attrs, + .elevator_name = "row", + .elevator_owner = THIS_MODULE, +}; + +static int __init row_init(void) +{ + elv_register(&iosched_row); + return 0; +} + +static void __exit row_exit(void) +{ + elv_unregister(&iosched_row); +} + +module_init(row_init); +module_exit(row_exit); + +MODULE_LICENSE("GPLv2"); +MODULE_DESCRIPTION("Read Over Write IO scheduler"); diff --git a/block/sio-iosched.c b/block/sio-iosched.c new file mode 100644 index 00000000000..c52a67c554c --- /dev/null +++ b/block/sio-iosched.c @@ -0,0 +1,399 @@ +/* + * Simple IO scheduler + * Based on Noop, Deadline and V(R) IO schedulers. + * + * Copyright (C) 2012 Miguel Boton + * + * + * This algorithm does not do any kind of sorting, as it is aimed for + * aleatory access devices, but it does some basic merging. We try to + * keep minimum overhead to achieve low latency. + * + * Asynchronous and synchronous requests are not treated separately, but + * we relay on deadlines to ensure fairness. + * + */ +#include +#include +#include +#include +#include +#include + +enum { ASYNC, SYNC }; + +/* Tunables */ +static const int sync_read_expire = HZ / 2; /* max time before a sync read is submitted. */ +static const int sync_write_expire = 2 * HZ; /* max time before a sync write is submitted. */ + +static const int async_read_expire = 4 * HZ; /* ditto for async, these limits are SOFT! */ +static const int async_write_expire = 16 * HZ; /* ditto for async, these limits are SOFT! */ + +static const int writes_starved = 2; /* max times reads can starve a write */ +static const int fifo_batch = 8; /* # of sequential requests treated as one + by the above parameters. For throughput. */ + +/* Elevator data */ +struct sio_data { + /* Request queues */ + struct list_head fifo_list[2][2]; + + /* Attributes */ + unsigned int batched; + unsigned int starved; + + /* Settings */ + int fifo_expire[2][2]; + int fifo_batch; + int writes_starved; +}; + +static void +sio_merged_requests(struct request_queue *q, struct request *rq, + struct request *next) +{ + /* + * If next expires before rq, assign its expire time to rq + * and move into next position (next will be deleted) in fifo. + */ + if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) { + if (time_before(rq_fifo_time(next), rq_fifo_time(rq))) { + list_move(&rq->queuelist, &next->queuelist); + rq_set_fifo_time(rq, rq_fifo_time(next)); + } + } + + /* Delete next request */ + rq_fifo_clear(next); +} + +static void +sio_add_request(struct request_queue *q, struct request *rq) +{ + struct sio_data *sd = q->elevator->elevator_data; + const int sync = rq_is_sync(rq); + const int data_dir = rq_data_dir(rq); + + /* + * Add request to the proper fifo list and set its + * expire time. + */ + rq_set_fifo_time(rq, jiffies + sd->fifo_expire[sync][data_dir]); + list_add_tail(&rq->queuelist, &sd->fifo_list[sync][data_dir]); +} + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38) +static int +sio_queue_empty(struct request_queue *q) +{ + struct sio_data *sd = q->elevator->elevator_data; + + /* Check if fifo lists are empty */ + return list_empty(&sd->fifo_list[SYNC][READ]) && list_empty(&sd->fifo_list[SYNC][WRITE]) && + list_empty(&sd->fifo_list[ASYNC][READ]) && list_empty(&sd->fifo_list[ASYNC][WRITE]); +} +#endif + +static struct request * +sio_expired_request(struct sio_data *sd, int sync, int data_dir) +{ + struct list_head *list = &sd->fifo_list[sync][data_dir]; + struct request *rq; + + if (list_empty(list)) + return NULL; + + /* Retrieve request */ + rq = rq_entry_fifo(list->next); + + /* Request has expired */ + if (time_after(jiffies, rq_fifo_time(rq))) + return rq; + + return NULL; +} + +static struct request * +sio_choose_expired_request(struct sio_data *sd) +{ + struct request *rq; + + /* + * Check expired requests. + * Asynchronous requests have priority over synchronous. + * Write requests have priority over read. + */ + rq = sio_expired_request(sd, ASYNC, WRITE); + if (rq) + return rq; + rq = sio_expired_request(sd, ASYNC, READ); + if (rq) + return rq; + + rq = sio_expired_request(sd, SYNC, WRITE); + if (rq) + return rq; + rq = sio_expired_request(sd, SYNC, READ); + if (rq) + return rq; + + return NULL; +} + +static struct request * +sio_choose_request(struct sio_data *sd, int data_dir) +{ + struct list_head *sync = sd->fifo_list[SYNC]; + struct list_head *async = sd->fifo_list[ASYNC]; + + /* + * Retrieve request from available fifo list. + * Synchronous requests have priority over asynchronous. + * Read requests have priority over write. + */ + if (!list_empty(&sync[data_dir])) + return rq_entry_fifo(sync[data_dir].next); + if (!list_empty(&async[data_dir])) + return rq_entry_fifo(async[data_dir].next); + + if (!list_empty(&sync[!data_dir])) + return rq_entry_fifo(sync[!data_dir].next); + if (!list_empty(&async[!data_dir])) + return rq_entry_fifo(async[!data_dir].next); + + return NULL; +} + +static inline void +sio_dispatch_request(struct sio_data *sd, struct request *rq) +{ + /* + * Remove the request from the fifo list + * and dispatch it. + */ + rq_fifo_clear(rq); + elv_dispatch_add_tail(rq->q, rq); + + sd->batched++; + + if (rq_data_dir(rq)) + sd->starved = 0; + else + sd->starved++; +} + +static int +sio_dispatch_requests(struct request_queue *q, int force) +{ + struct sio_data *sd = q->elevator->elevator_data; + struct request *rq = NULL; + int data_dir = READ; + + /* + * Retrieve any expired request after a batch of + * sequential requests. + */ + if (sd->batched > sd->fifo_batch) { + sd->batched = 0; + rq = sio_choose_expired_request(sd); + } + + /* Retrieve request */ + if (!rq) { + if (sd->starved > sd->writes_starved) + data_dir = WRITE; + + rq = sio_choose_request(sd, data_dir); + if (!rq) + return 0; + } + + /* Dispatch request */ + sio_dispatch_request(sd, rq); + + return 1; +} + +static struct request * +sio_former_request(struct request_queue *q, struct request *rq) +{ + struct sio_data *sd = q->elevator->elevator_data; + const int sync = rq_is_sync(rq); + const int data_dir = rq_data_dir(rq); + + if (rq->queuelist.prev == &sd->fifo_list[sync][data_dir]) + return NULL; + + /* Return former request */ + return list_entry(rq->queuelist.prev, struct request, queuelist); +} + +static struct request * +sio_latter_request(struct request_queue *q, struct request *rq) +{ + struct sio_data *sd = q->elevator->elevator_data; + const int sync = rq_is_sync(rq); + const int data_dir = rq_data_dir(rq); + + if (rq->queuelist.next == &sd->fifo_list[sync][data_dir]) + return NULL; + + /* Return latter request */ + return list_entry(rq->queuelist.next, struct request, queuelist); +} + +static void * +sio_init_queue(struct request_queue *q) +{ + struct sio_data *sd; + + /* Allocate structure */ + sd = kmalloc_node(sizeof(*sd), GFP_KERNEL, q->node); + if (!sd) + return NULL; + + /* Initialize fifo lists */ + INIT_LIST_HEAD(&sd->fifo_list[SYNC][READ]); + INIT_LIST_HEAD(&sd->fifo_list[SYNC][WRITE]); + INIT_LIST_HEAD(&sd->fifo_list[ASYNC][READ]); + INIT_LIST_HEAD(&sd->fifo_list[ASYNC][WRITE]); + + /* Initialize data */ + sd->batched = 0; + sd->fifo_expire[SYNC][READ] = sync_read_expire; + sd->fifo_expire[SYNC][WRITE] = sync_write_expire; + sd->fifo_expire[ASYNC][READ] = async_read_expire; + sd->fifo_expire[ASYNC][WRITE] = async_write_expire; + sd->fifo_batch = fifo_batch; + + return sd; +} + +static void +sio_exit_queue(struct elevator_queue *e) +{ + struct sio_data *sd = e->elevator_data; + + BUG_ON(!list_empty(&sd->fifo_list[SYNC][READ])); + BUG_ON(!list_empty(&sd->fifo_list[SYNC][WRITE])); + BUG_ON(!list_empty(&sd->fifo_list[ASYNC][READ])); + BUG_ON(!list_empty(&sd->fifo_list[ASYNC][WRITE])); + + /* Free structure */ + kfree(sd); +} + +/* + * sysfs code + */ + +static ssize_t +sio_var_show(int var, char *page) +{ + return sprintf(page, "%d\n", var); +} + +static ssize_t +sio_var_store(int *var, const char *page, size_t count) +{ + char *p = (char *) page; + + *var = simple_strtol(p, &p, 10); + return count; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ + struct sio_data *sd = e->elevator_data; \ + int __data = __VAR; \ + if (__CONV) \ + __data = jiffies_to_msecs(__data); \ + return sio_var_show(__data, (page)); \ +} +SHOW_FUNCTION(sio_sync_read_expire_show, sd->fifo_expire[SYNC][READ], 1); +SHOW_FUNCTION(sio_sync_write_expire_show, sd->fifo_expire[SYNC][WRITE], 1); +SHOW_FUNCTION(sio_async_read_expire_show, sd->fifo_expire[ASYNC][READ], 1); +SHOW_FUNCTION(sio_async_write_expire_show, sd->fifo_expire[ASYNC][WRITE], 1); +SHOW_FUNCTION(sio_fifo_batch_show, sd->fifo_batch, 0); +SHOW_FUNCTION(sio_writes_starved_show, sd->writes_starved, 0); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ +{ \ + struct sio_data *sd = e->elevator_data; \ + int __data; \ + int ret = sio_var_store(&__data, (page), count); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + if (__CONV) \ + *(__PTR) = msecs_to_jiffies(__data); \ + else \ + *(__PTR) = __data; \ + return ret; \ +} +STORE_FUNCTION(sio_sync_read_expire_store, &sd->fifo_expire[SYNC][READ], 0, INT_MAX, 1); +STORE_FUNCTION(sio_sync_write_expire_store, &sd->fifo_expire[SYNC][WRITE], 0, INT_MAX, 1); +STORE_FUNCTION(sio_async_read_expire_store, &sd->fifo_expire[ASYNC][READ], 0, INT_MAX, 1); +STORE_FUNCTION(sio_async_write_expire_store, &sd->fifo_expire[ASYNC][WRITE], 0, INT_MAX, 1); +STORE_FUNCTION(sio_fifo_batch_store, &sd->fifo_batch, 0, INT_MAX, 0); +STORE_FUNCTION(sio_writes_starved_store, &sd->writes_starved, 0, INT_MAX, 0); +#undef STORE_FUNCTION + +#define DD_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, sio_##name##_show, \ + sio_##name##_store) + +static struct elv_fs_entry sio_attrs[] = { + DD_ATTR(sync_read_expire), + DD_ATTR(sync_write_expire), + DD_ATTR(async_read_expire), + DD_ATTR(async_write_expire), + DD_ATTR(fifo_batch), + DD_ATTR(writes_starved), + __ATTR_NULL +}; + +static struct elevator_type iosched_sio = { + .ops = { + .elevator_merge_req_fn = sio_merged_requests, + .elevator_dispatch_fn = sio_dispatch_requests, + .elevator_add_req_fn = sio_add_request, +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38) + .elevator_queue_empty_fn = sio_queue_empty, +#endif + .elevator_former_req_fn = sio_former_request, + .elevator_latter_req_fn = sio_latter_request, + .elevator_init_fn = sio_init_queue, + .elevator_exit_fn = sio_exit_queue, + }, + + .elevator_attrs = sio_attrs, + .elevator_name = "sio", + .elevator_owner = THIS_MODULE, +}; + +static int __init sio_init(void) +{ + /* Register elevator */ + elv_register(&iosched_sio); + + return 0; +} + +static void __exit sio_exit(void) +{ + /* Unregister elevator */ + elv_unregister(&iosched_sio); +} + +module_init(sio_init); +module_exit(sio_exit); + +MODULE_AUTHOR("Miguel Boton"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Simple IO scheduler"); +MODULE_VERSION("0.2"); diff --git a/block/vr-iosched.c b/block/vr-iosched.c new file mode 100644 index 00000000000..459ab721604 --- /dev/null +++ b/block/vr-iosched.c @@ -0,0 +1,452 @@ +/* +* V(R) I/O Scheduler +* +* Copyright (C) 2007 Aaron Carroll +* +* +* The algorithm: +* +* The next request is decided based on its distance from the last +* request, with a multiplicative penalty of `rev_penalty' applied +* for reversing the head direction. A rev_penalty of 1 means SSTF +* behaviour. As this variable is increased, the algorithm approaches +* pure SCAN. Setting rev_penalty to 0 forces SCAN. +* +* Async and synch requests are not treated seperately. Instead we +* rely on deadlines to ensure fairness. +* +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +enum vr_data_dir { +ASYNC, +SYNC, +}; + +enum vr_head_dir { +FORWARD, +BACKWARD, +}; + +static const int sync_expire = HZ / 2; /* max time before a sync is submitted. */ +static const int async_expire = 5 * HZ; /* ditto for async, these limits are SOFT! */ +static const int fifo_batch = 1; +static const int rev_penalty = 1; /* penalty for reversing head direction */ + +struct vr_data { +struct rb_root sort_list; +struct list_head fifo_list[2]; + +struct request *next_rq; +struct request *prev_rq; + +unsigned int nbatched; +sector_t last_sector; /* head position */ +int head_dir; + +/* tunables */ +int fifo_expire[2]; +int fifo_batch; +int rev_penalty; +}; + +static void vr_move_request(struct vr_data *, struct request *); + +static inline struct vr_data * +vr_get_data(struct request_queue *q) +{ +return q->elevator->elevator_data; +} + +static void +vr_add_rq_rb(struct vr_data *vd, struct request *rq) +{ +//struct request *alias = elv_rb_add(&vd->sort_list, rq); +// +//if (unlikely(alias)) { +//vr_move_request(vd, alias); +//alias = elv_rb_add(&vd->sort_list, rq); +//BUG_ON(alias); +//} +elv_rb_add(&vd->sort_list, rq); +if (blk_rq_pos(rq) >= vd->last_sector) { +if (!vd->next_rq || blk_rq_pos(vd->next_rq) > blk_rq_pos(rq)) +vd->next_rq = rq; +} +else { +if (!vd->prev_rq || blk_rq_pos(vd->prev_rq) < blk_rq_pos(rq)) +vd->prev_rq = rq; +} + +BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq); +BUG_ON(vd->next_rq && vd->prev_rq && blk_rq_pos(vd->next_rq) < blk_rq_pos(vd->prev_rq)); +} + +static void +vr_del_rq_rb(struct vr_data *vd, struct request *rq) +{ +/* +* We might be deleting our cached next request. +* If so, find its sucessor. +*/ + +if (vd->next_rq == rq) +vd->next_rq = elv_rb_latter_request(NULL, rq); +else if (vd->prev_rq == rq) +vd->prev_rq = elv_rb_former_request(NULL, rq); + +BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq); +BUG_ON(vd->next_rq && vd->prev_rq && blk_rq_pos(vd->next_rq) < blk_rq_pos(vd->prev_rq)); + +elv_rb_del(&vd->sort_list, rq); +} + +/* +* add rq to rbtree and fifo +*/ +static void +vr_add_request(struct request_queue *q, struct request *rq) +{ +struct vr_data *vd = vr_get_data(q); +const int dir = rq_is_sync(rq); + +vr_add_rq_rb(vd, rq); + +if (vd->fifo_expire[dir]) { +rq_set_fifo_time(rq, jiffies + vd->fifo_expire[dir]); +list_add_tail(&rq->queuelist, &vd->fifo_list[dir]); +} +} + +/* +* remove rq from rbtree and fifo. +*/ +static void +vr_remove_request(struct request_queue *q, struct request *rq) +{ +struct vr_data *vd = vr_get_data(q); + +rq_fifo_clear(rq); +vr_del_rq_rb(vd, rq); +} + +static int +vr_merge(struct request_queue *q, struct request **rqp, struct bio *bio) +{ +sector_t sector = bio->bi_sector + bio_sectors(bio); +struct vr_data *vd = vr_get_data(q); +struct request *rq = elv_rb_find(&vd->sort_list, sector); + +if (rq && elv_rq_merge_ok(rq, bio)) { +*rqp = rq; +return ELEVATOR_FRONT_MERGE; +} +return ELEVATOR_NO_MERGE; +} + +static void +vr_merged_request(struct request_queue *q, struct request *req, int type) +{ +struct vr_data *vd = vr_get_data(q); + +/* +* if the merge was a front merge, we need to reposition request +*/ +if (type == ELEVATOR_FRONT_MERGE) { +vr_del_rq_rb(vd, req); +vr_add_rq_rb(vd, req); +} +} + +static void +vr_merged_requests(struct request_queue *q, struct request *rq, +struct request *next) +{ +/* +* if next expires before rq, assign its expire time to rq +* and move into next position (next will be deleted) in fifo +*/ +if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) { +if (time_before(rq_fifo_time(next), rq_fifo_time(rq))) { +list_move(&rq->queuelist, &next->queuelist); +rq_set_fifo_time(rq, rq_fifo_time(next)); +} +} + +vr_remove_request(q, next); +} + +/* +* move an entry to dispatch queue +*/ +static void +vr_move_request(struct vr_data *vd, struct request *rq) +{ +struct request_queue *q = rq->q; + +if (blk_rq_pos(rq) > vd->last_sector) +vd->head_dir = FORWARD; +else +vd->head_dir = BACKWARD; + +vd->last_sector = blk_rq_pos(rq); +vd->next_rq = elv_rb_latter_request(NULL, rq); +vd->prev_rq = elv_rb_former_request(NULL, rq); + +BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq); + +vr_remove_request(q, rq); +elv_dispatch_add_tail(q, rq); +vd->nbatched++; +} + +/* +* get the first expired request in direction ddir +*/ +static struct request * +vr_expired_request(struct vr_data *vd, int ddir) +{ +struct request *rq; + +if (list_empty(&vd->fifo_list[ddir])) +return NULL; + +rq = rq_entry_fifo(vd->fifo_list[ddir].next); +if (time_after(jiffies, rq_fifo_time(rq))) +return rq; + +return NULL; +} + +/* +* Returns the oldest expired request +*/ +static struct request * +vr_check_fifo(struct vr_data *vd) +{ +struct request *rq_sync = vr_expired_request(vd, SYNC); +struct request *rq_async = vr_expired_request(vd, ASYNC); + +if (rq_async && rq_sync) { +if (time_after(rq_fifo_time(rq_async), rq_fifo_time(rq_sync))) +return rq_sync; +} +else if (rq_sync) +return rq_sync; + +return rq_async; +} + +/* +* Return the request with the lowest penalty +*/ +static struct request * +vr_choose_request(struct vr_data *vd) +{ +int penalty = (vd->rev_penalty) ? : INT_MAX; +struct request *next = vd->next_rq; +struct request *prev = vd->prev_rq; +sector_t next_pen, prev_pen; + +BUG_ON(prev && prev == next); + +if (!prev) +return next; +else if (!next) +return prev; + +/* At this point both prev and next are defined and distinct */ + +next_pen = blk_rq_pos(next) - vd->last_sector; +prev_pen = vd->last_sector - blk_rq_pos(prev); + +if (vd->head_dir == FORWARD) +next_pen = do_div(next_pen, penalty); +else +prev_pen = do_div(prev_pen, penalty); + +if (next_pen <= prev_pen) +return next; + +return prev; +} + +static int +vr_dispatch_requests(struct request_queue *q, int force) +{ +struct vr_data *vd = vr_get_data(q); +struct request *rq = NULL; + +/* Check for and issue expired requests */ +if (vd->nbatched > vd->fifo_batch) { +vd->nbatched = 0; +rq = vr_check_fifo(vd); +} + +if (!rq) { +rq = vr_choose_request(vd); +if (!rq) +return 0; +} + +vr_move_request(vd, rq); + +return 1; +} + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38) +static int +vr_queue_empty(struct request_queue *q) +{ +struct vr_data *vd = vr_get_data(q); +return RB_EMPTY_ROOT(&vd->sort_list); +} +#endif + +static void +vr_exit_queue(struct elevator_queue *e) +{ +struct vr_data *vd = e->elevator_data; +BUG_ON(!RB_EMPTY_ROOT(&vd->sort_list)); +kfree(vd); +} + +/* +* initialize elevator private data (vr_data). +*/ +static void *vr_init_queue(struct request_queue *q) +{ +struct vr_data *vd; + +vd = kmalloc_node(sizeof(*vd), GFP_KERNEL | __GFP_ZERO, q->node); +if (!vd) +return NULL; + +INIT_LIST_HEAD(&vd->fifo_list[SYNC]); +INIT_LIST_HEAD(&vd->fifo_list[ASYNC]); +vd->sort_list = RB_ROOT; +vd->fifo_expire[SYNC] = sync_expire; +vd->fifo_expire[ASYNC] = async_expire; +vd->fifo_batch = fifo_batch; +vd->rev_penalty = rev_penalty; +return vd; +} + +/* +* sysfs parts below +*/ + +static ssize_t +vr_var_show(int var, char *page) +{ +return sprintf(page, "%d\n", var); +} + +static ssize_t +vr_var_store(int *var, const char *page, size_t count) +{ +*var = simple_strtol(page, NULL, 10); +return count; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ +struct vr_data *vd = e->elevator_data; \ +int __data = __VAR; \ +if (__CONV) \ +__data = jiffies_to_msecs(__data); \ +return vr_var_show(__data, (page)); \ +} +SHOW_FUNCTION(vr_sync_expire_show, vd->fifo_expire[SYNC], 1); +SHOW_FUNCTION(vr_async_expire_show, vd->fifo_expire[ASYNC], 1); +SHOW_FUNCTION(vr_fifo_batch_show, vd->fifo_batch, 0); +SHOW_FUNCTION(vr_rev_penalty_show, vd->rev_penalty, 0); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ +{ \ +struct vr_data *vd = e->elevator_data; \ +int __data; \ +int ret = vr_var_store(&__data, (page), count); \ +if (__data < (MIN)) \ +__data = (MIN); \ +else if (__data > (MAX)) \ +__data = (MAX); \ +if (__CONV) \ +*(__PTR) = msecs_to_jiffies(__data); \ +else \ +*(__PTR) = __data; \ +return ret; \ +} +STORE_FUNCTION(vr_sync_expire_store, &vd->fifo_expire[SYNC], 0, INT_MAX, 1); +STORE_FUNCTION(vr_async_expire_store, &vd->fifo_expire[ASYNC], 0, INT_MAX, 1); +STORE_FUNCTION(vr_fifo_batch_store, &vd->fifo_batch, 0, INT_MAX, 0); +STORE_FUNCTION(vr_rev_penalty_store, &vd->rev_penalty, 0, INT_MAX, 0); +#undef STORE_FUNCTION + +#define DD_ATTR(name) \ +__ATTR(name, S_IRUGO|S_IWUSR, vr_##name##_show, \ +vr_##name##_store) + +static struct elv_fs_entry vr_attrs[] = { +DD_ATTR(sync_expire), +DD_ATTR(async_expire), +DD_ATTR(fifo_batch), +DD_ATTR(rev_penalty), +__ATTR_NULL +}; + +static struct elevator_type iosched_vr = { +.ops = { +.elevator_merge_fn = vr_merge, +.elevator_merged_fn = vr_merged_request, +.elevator_merge_req_fn = vr_merged_requests, +.elevator_dispatch_fn = vr_dispatch_requests, +.elevator_add_req_fn = vr_add_request, +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38) +.elevator_queue_empty_fn = vr_queue_empty, +#endif +.elevator_former_req_fn = elv_rb_former_request, +.elevator_latter_req_fn = elv_rb_latter_request, +.elevator_init_fn = vr_init_queue, +.elevator_exit_fn = vr_exit_queue, +}, + +.elevator_attrs = vr_attrs, +.elevator_name = "vr", +.elevator_owner = THIS_MODULE, +}; + +static int __init vr_init(void) +{ +elv_register(&iosched_vr); + +return 0; +} + +static void __exit vr_exit(void) +{ +elv_unregister(&iosched_vr); +} + +module_init(vr_init); +module_exit(vr_exit); + +MODULE_AUTHOR("Aaron Carroll"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("V(R) IO scheduler"); + diff --git a/crypto/Kconfig b/crypto/Kconfig index ae27b7534ea..823db834818 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -407,6 +407,15 @@ config CRYPTO_SHA1 help SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). +config CRYPTO_SHA1_ARM + tristate "SHA1 digest algorithm (ARM-asm)" + depends on ARM + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using optimized ARM assembler. + config CRYPTO_SHA256 tristate "SHA224 and SHA256 digest algorithm" select CRYPTO_HASH @@ -562,6 +571,30 @@ config CRYPTO_AES_NI_INTEL ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional acceleration for CTR. +config CRYPTO_AES_ARM + tristate "AES cipher algorithms (ARM-asm)" + depends on ARM + select CRYPTO_ALGAPI + select CRYPTO_AES + help + Use optimized AES assembler routines for ARM platforms. + + AES cipher algorithms (FIPS-197). AES uses the Rijndael + algorithm. + + Rijndael appears to be consistently a very good performer in + both hardware and software across a wide range of computing + environments regardless of its use in feedback or non-feedback + modes. Its key setup time is excellent, and its key agility is + good. Rijndael's very low memory requirements make it very well + suited for restricted-space environments, in which it also + demonstrates excellent performance. Rijndael's operations are + among the easiest to defend against power and timing attacks. + + The AES specifies three key sizes: 128, 192 and 256 bits + + See for more information. + config CRYPTO_ANUBIS tristate "Anubis cipher algorithm" select CRYPTO_ALGAPI @@ -823,6 +856,22 @@ config CRYPTO_LZO help This is the LZO algorithm. +config CRYPTO_LZ4 + tristate "LZ4 compression algorithm" + select CRYPTO_ALGAPI + select LZ4_COMPRESS + select LZ4_DECOMPRESS + help + This is the LZ4 algorithm. + +config CRYPTO_LZ4HC + tristate "LZ4HC compression algorithm" + select CRYPTO_ALGAPI + select LZ4HC_COMPRESS + select LZ4_DECOMPRESS + help + This is the LZ4 high compression mode algorithm. + comment "Random Number Generation" config CRYPTO_ANSI_CPRNG diff --git a/crypto/Makefile b/crypto/Makefile index ce5a813d363..4ab082dc51f 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -80,6 +80,8 @@ obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o +obj-$(CONFIG_CRYPTO_LZ4) += lz4.o +obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o obj-$(CONFIG_CRYPTO_RNG2) += krng.o obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o diff --git a/crypto/lz4.c b/crypto/lz4.c new file mode 100644 index 00000000000..98bfdd71e78 --- /dev/null +++ b/crypto/lz4.c @@ -0,0 +1,105 @@ +/* + * Cryptographic API. + * + * Copyright (c) 2013 Chanho Min + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include + +struct lz4_ctx { + void *lz4_comp_mem; +}; + +static int lz4_init(struct crypto_tfm *tfm) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->lz4_comp_mem = vmalloc(LZ4_MEM_COMPRESS); + if (!ctx->lz4_comp_mem) + return -ENOMEM; + + return 0; +} + +static void lz4_exit(struct crypto_tfm *tfm) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + vfree(ctx->lz4_comp_mem); +} + +static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + size_t tmp_len = *dlen; + int err; + + err = lz4_compress(src, slen, dst, &tmp_len, ctx->lz4_comp_mem); + + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return 0; +} + +static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + int err; + size_t tmp_len = *dlen; + + err = lz4_decompress(src, &slen, dst, tmp_len); + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return err; +} + +static struct crypto_alg alg_lz4 = { + .cra_name = "lz4", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lz4_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg_lz4.cra_list), + .cra_init = lz4_init, + .cra_exit = lz4_exit, + .cra_u = { .compress = { + .coa_compress = lz4_compress_crypto, + .coa_decompress = lz4_decompress_crypto } } +}; + +static int __init lz4_mod_init(void) +{ + return crypto_register_alg(&alg_lz4); +} + +static void __exit lz4_mod_fini(void) +{ + crypto_unregister_alg(&alg_lz4); +} + +module_init(lz4_mod_init); +module_exit(lz4_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 Compression Algorithm"); diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c new file mode 100644 index 00000000000..d3c9625917c --- /dev/null +++ b/crypto/lz4hc.c @@ -0,0 +1,105 @@ +/* + * Cryptographic API. + * + * Copyright (c) 2013 Chanho Min + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ +#include +#include +#include +#include +#include + +struct lz4hc_ctx { + void *lz4hc_comp_mem; +}; + +static int lz4hc_init(struct crypto_tfm *tfm) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->lz4hc_comp_mem = vmalloc(LZ4HC_MEM_COMPRESS); + if (!ctx->lz4hc_comp_mem) + return -ENOMEM; + + return 0; +} + +static void lz4hc_exit(struct crypto_tfm *tfm) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + + vfree(ctx->lz4hc_comp_mem); +} + +static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + size_t tmp_len = *dlen; + int err; + + err = lz4hc_compress(src, slen, dst, &tmp_len, ctx->lz4hc_comp_mem); + + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return 0; +} + +static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + int err; + size_t tmp_len = *dlen; + + err = lz4_decompress(src, &slen, dst, tmp_len); + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return err; +} + +static struct crypto_alg alg_lz4hc = { + .cra_name = "lz4hc", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lz4hc_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg_lz4hc.cra_list), + .cra_init = lz4hc_init, + .cra_exit = lz4hc_exit, + .cra_u = { .compress = { + .coa_compress = lz4hc_compress_crypto, + .coa_decompress = lz4hc_decompress_crypto } } +}; + +static int __init lz4hc_mod_init(void) +{ + return crypto_register_alg(&alg_lz4hc); +} + +static void __exit lz4hc_mod_fini(void) +{ + crypto_unregister_alg(&alg_lz4hc); +} + +module_init(lz4hc_mod_init); +module_exit(lz4hc_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4HC Compression Algorithm"); diff --git a/drivers/Makefile b/drivers/Makefile index 6357e71dcc0..bad062b86c6 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -5,6 +5,7 @@ # Rewritten to use lists instead of if-statements. # +obj-$(CONFIG_CPUQUIET_FRAMEWORK)+= cpuquiet/ obj-y += gpio/ obj-$(CONFIG_PCI) += pci/ obj-$(CONFIG_PARISC) += parisc/ @@ -16,7 +17,7 @@ obj-$(CONFIG_SFI) += sfi/ # PnP must come after ACPI since it will eventually need to check if acpi # was used and do nothing if so obj-$(CONFIG_PNP) += pnp/ -obj-$(CONFIG_ARM_AMBA) += amba/ +obj-$(CONFIG_ARM) += amba/ # Many drivers will want to use DMA so this has to be made available # really early. obj-$(CONFIG_DMA_ENGINE) += dma/ diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index e3f47872ec2..f0c1ce95a0e 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -14,7 +14,6 @@ config ACPI_APEI_GHES depends on ACPI_APEI && X86 select ACPI_HED select IRQ_WORK - select LLIST select GENERIC_ALLOCATOR help Generic Hardware Error Source provides a way to report diff --git a/drivers/amba/Makefile b/drivers/amba/Makefile index 40fe74097be..66e81c2f1e3 100644 --- a/drivers/amba/Makefile +++ b/drivers/amba/Makefile @@ -1,2 +1,2 @@ -obj-y += bus.o - +obj-$(CONFIG_ARM_AMBA) += bus.o +obj-$(CONFIG_TEGRA_AHB) += tegra-ahb.o diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c new file mode 100644 index 00000000000..66a63d50048 --- /dev/null +++ b/drivers/amba/tegra-ahb.c @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * Copyright (C) 2011 Google, Inc. + * + * Author: + * Jay Cheng + * James Wylder + * Benoit Goby + * Colin Cross + * Hiroshi DOYU + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include + +#define DRV_NAME "tegra-ahb" + +#define AHB_ARBITRATION_DISABLE 0x00 +#define AHB_ARBITRATION_PRIORITY_CTRL 0x04 +#define AHB_PRIORITY_WEIGHT(x) (((x) & 0x7) << 29) +#define PRIORITY_SELECT_USB BIT(6) +#define PRIORITY_SELECT_USB2 BIT(18) +#define PRIORITY_SELECT_USB3 BIT(17) + +#define AHB_GIZMO_AHB_MEM 0x0c +#define ENB_FAST_REARBITRATE BIT(2) +#define DONT_SPLIT_AHB_WR BIT(7) + +#define AHB_GIZMO_APB_DMA 0x10 +#define AHB_GIZMO_IDE 0x18 +#define AHB_GIZMO_USB 0x1c +#define AHB_GIZMO_AHB_XBAR_BRIDGE 0x20 +#define AHB_GIZMO_CPU_AHB_BRIDGE 0x24 +#define AHB_GIZMO_COP_AHB_BRIDGE 0x28 +#define AHB_GIZMO_XBAR_APB_CTLR 0x2c +#define AHB_GIZMO_VCP_AHB_BRIDGE 0x30 +#define AHB_GIZMO_NAND 0x3c +#define AHB_GIZMO_SDMMC4 0x44 +#define AHB_GIZMO_XIO 0x48 +#define AHB_GIZMO_BSEV 0x60 +#define AHB_GIZMO_BSEA 0x70 +#define AHB_GIZMO_NOR 0x74 +#define AHB_GIZMO_USB2 0x78 +#define AHB_GIZMO_USB3 0x7c +#define IMMEDIATE BIT(18) + +#define AHB_GIZMO_SDMMC1 0x80 +#define AHB_GIZMO_SDMMC2 0x84 +#define AHB_GIZMO_SDMMC3 0x88 +#define AHB_MEM_PREFETCH_CFG_X 0xd8 +#define AHB_ARBITRATION_XBAR_CTRL 0xdc +#define AHB_MEM_PREFETCH_CFG3 0xe0 +#define AHB_MEM_PREFETCH_CFG4 0xe4 +#define AHB_MEM_PREFETCH_CFG1 0xec +#define AHB_MEM_PREFETCH_CFG2 0xf0 +#define PREFETCH_ENB BIT(31) +#define MST_ID(x) (((x) & 0x1f) << 26) +#define AHBDMA_MST_ID MST_ID(5) +#define USB_MST_ID MST_ID(6) +#define USB2_MST_ID MST_ID(18) +#define USB3_MST_ID MST_ID(17) +#define ADDR_BNDRY(x) (((x) & 0xf) << 21) +#define INACTIVITY_TIMEOUT(x) (((x) & 0xffff) << 0) + +#define AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID 0xf8 + +#define AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE BIT(17) + +static struct platform_driver tegra_ahb_driver; + +static const u32 tegra_ahb_gizmo[] = { + AHB_ARBITRATION_DISABLE, + AHB_ARBITRATION_PRIORITY_CTRL, + AHB_GIZMO_AHB_MEM, + AHB_GIZMO_APB_DMA, + AHB_GIZMO_IDE, + AHB_GIZMO_USB, + AHB_GIZMO_AHB_XBAR_BRIDGE, + AHB_GIZMO_CPU_AHB_BRIDGE, + AHB_GIZMO_COP_AHB_BRIDGE, + AHB_GIZMO_XBAR_APB_CTLR, + AHB_GIZMO_VCP_AHB_BRIDGE, + AHB_GIZMO_NAND, + AHB_GIZMO_SDMMC4, + AHB_GIZMO_XIO, + AHB_GIZMO_BSEV, + AHB_GIZMO_BSEA, + AHB_GIZMO_NOR, + AHB_GIZMO_USB2, + AHB_GIZMO_USB3, + AHB_GIZMO_SDMMC1, + AHB_GIZMO_SDMMC2, + AHB_GIZMO_SDMMC3, + AHB_MEM_PREFETCH_CFG_X, + AHB_ARBITRATION_XBAR_CTRL, + AHB_MEM_PREFETCH_CFG3, + AHB_MEM_PREFETCH_CFG4, + AHB_MEM_PREFETCH_CFG1, + AHB_MEM_PREFETCH_CFG2, + AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID, +}; + +struct tegra_ahb { + void __iomem *regs; + struct device *dev; + u32 ctx[0]; +}; + +static inline u32 gizmo_readl(struct tegra_ahb *ahb, u32 offset) +{ + return readl(ahb->regs + offset); +} + +static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) +{ + writel(value, ahb->regs + offset); +} + +#ifdef CONFIG_ARCH_TEGRA_3x_SOC +static int tegra_ahb_match_by_smmu(struct device *dev, void *data) +{ + struct tegra_ahb *ahb = dev_get_drvdata(dev); + struct device_node *dn = data; + + return (ahb->dev->of_node == dn) ? 1 : 0; +} + +int tegra_ahb_enable_smmu(struct device_node *dn) +{ + struct device *dev; + u32 val; + struct tegra_ahb *ahb; + + dev = driver_find_device(&tegra_ahb_driver.driver, NULL, dn, + tegra_ahb_match_by_smmu); + if (!dev) + return -EPROBE_DEFER; + ahb = dev_get_drvdata(dev); + val = gizmo_readl(ahb, AHB_ARBITRATION_XBAR_CTRL); + val |= AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE; + gizmo_writel(ahb, val, AHB_ARBITRATION_XBAR_CTRL); + return 0; +} +EXPORT_SYMBOL(tegra_ahb_enable_smmu); +#endif + +static int tegra_ahb_suspend(struct device *dev) +{ + int i; + struct tegra_ahb *ahb = dev_get_drvdata(dev); + + for (i = 0; i < ARRAY_SIZE(tegra_ahb_gizmo); i++) + ahb->ctx[i] = gizmo_readl(ahb, tegra_ahb_gizmo[i]); + return 0; +} + +static int tegra_ahb_resume(struct device *dev) +{ + int i; + struct tegra_ahb *ahb = dev_get_drvdata(dev); + + for (i = 0; i < ARRAY_SIZE(tegra_ahb_gizmo); i++) + gizmo_writel(ahb, ahb->ctx[i], tegra_ahb_gizmo[i]); + return 0; +} + +static UNIVERSAL_DEV_PM_OPS(tegra_ahb_pm, + tegra_ahb_suspend, + tegra_ahb_resume, NULL); + +static void tegra_ahb_gizmo_init(struct tegra_ahb *ahb) +{ + u32 val; + + val = gizmo_readl(ahb, AHB_GIZMO_AHB_MEM); + val |= ENB_FAST_REARBITRATE | IMMEDIATE | DONT_SPLIT_AHB_WR; + gizmo_writel(ahb, val, AHB_GIZMO_AHB_MEM); + + val = gizmo_readl(ahb, AHB_GIZMO_USB); + val |= IMMEDIATE; + gizmo_writel(ahb, val, AHB_GIZMO_USB); + + val = gizmo_readl(ahb, AHB_GIZMO_USB2); + val |= IMMEDIATE; + gizmo_writel(ahb, val, AHB_GIZMO_USB2); + + val = gizmo_readl(ahb, AHB_GIZMO_USB3); + val |= IMMEDIATE; + gizmo_writel(ahb, val, AHB_GIZMO_USB3); + + val = gizmo_readl(ahb, AHB_ARBITRATION_PRIORITY_CTRL); + val |= PRIORITY_SELECT_USB | + PRIORITY_SELECT_USB2 | + PRIORITY_SELECT_USB3 | + AHB_PRIORITY_WEIGHT(7); + gizmo_writel(ahb, val, AHB_ARBITRATION_PRIORITY_CTRL); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG1); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + AHBDMA_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG1); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG2); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + USB_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG2); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG3); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + USB3_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG3); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG4); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + USB2_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG4); +} + +static int __devinit tegra_ahb_probe(struct platform_device *pdev) +{ + struct resource *res; + struct tegra_ahb *ahb; + size_t bytes; + + bytes = sizeof(*ahb) + sizeof(u32) * ARRAY_SIZE(tegra_ahb_gizmo); + ahb = devm_kzalloc(&pdev->dev, bytes, GFP_KERNEL); + if (!ahb) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + ahb->regs = devm_request_and_ioremap(&pdev->dev, res); + if (!ahb->regs) + return -EBUSY; + + ahb->dev = &pdev->dev; + platform_set_drvdata(pdev, ahb); + tegra_ahb_gizmo_init(ahb); + return 0; +} + +static const struct of_device_id tegra_ahb_of_match[] __devinitconst = { + { .compatible = "nvidia,tegra30-ahb", }, + { .compatible = "nvidia,tegra20-ahb", }, + {}, +}; + +static struct platform_driver tegra_ahb_driver = { + .probe = tegra_ahb_probe, + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .of_match_table = tegra_ahb_of_match, + .pm = &tegra_ahb_pm, + }, +}; +module_platform_driver(tegra_ahb_driver); + +MODULE_AUTHOR("Hiroshi DOYU "); +MODULE_DESCRIPTION("Tegra AHB driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 57f96ebbce4..f73594fe8b6 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -88,6 +88,18 @@ config CPU_FREQ_DEFAULT_GOV_ONDEMAND governor. If unsure have a look at the help section of the driver. Fallback governor will be the performance governor. +config CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND + bool "touchdemand" + select CPU_FREQ_GOV_TOUCHDEMAND + select CPU_FREQ_GOV_PERFORMANCE + help + Use the CPUFreq governor 'touchdemand' as default. This allows + you to get a full dynamic frequency capable system by simply + loading your cpufreq low-level hardware driver. + Be aware that not all cpufreq drivers support the touchdemand + governor. If unsure have a look at the help section of the + driver. Fallback governor will be the performance governor. + config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE bool "conservative" select CPU_FREQ_GOV_CONSERVATIVE @@ -166,6 +178,24 @@ config CPU_FREQ_GOV_ONDEMAND If in doubt, say N. +config CPU_FREQ_GOV_TOUCHDEMAND + tristate "'touchdemand' cpufreq policy governor" + select CPU_FREQ_TABLE + help + 'touchdemand' - This driver adds a dynamic cpufreq policy governor. + The governor does a periodic polling and + changes frequency based on the CPU utilization. + The support for this governor depends on CPU capability to + do fast frequency switching (i.e, very low latency frequency + transitions). + + To compile this driver as a module, choose M here: the + module will be called cpufreq_touchdemand. + + For details, take a look at linux/Documentation/cpu-freq. + + If in doubt, say N. + config CPU_FREQ_GOV_INTERACTIVE tristate "'interactive' cpufreq policy governor" help @@ -206,6 +236,20 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. +config CPU_FREQ_GOV_LULZACTIVE + tristate "'lulzactive' cpufreq governor" + depends on CPU_FREQ + help + 'lulzactive' - a new interactive governor by Tegrak! + + If in doubt, say N. + +config CPU_FREQ_GOV_PEGASUSQ + tristate "'pegasusq' cpufreq policy governor" + depends on CPU_FREQ + help + 'pegasusq' - governor by Samsung + menu "x86 CPU frequency scaling drivers" depends on X86 source "drivers/cpufreq/Kconfig.x86" diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index d43b39150ef..d20361e0116 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -8,8 +8,11 @@ obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o +obj-$(CONFIG_CPU_FREQ_GOV_TOUCHDEMAND) += cpufreq_touchdemand.o obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o +obj-$(CONFIG_CPU_FREQ_GOV_PEGASUSQ) += cpufreq_pegasusq.o +obj-$(CONFIG_CPU_FREQ_GOV_LULZACTIVE) += cpufreq_lulzactive.o # CPUfreq cross-arch helpers obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c old mode 100755 new mode 100644 index 118a94575ca..50273130257 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -31,8 +31,14 @@ #include #include +#include "../../arch/arm/mach-tegra/dvfs.h" +#include "../../arch/arm/mach-tegra/clock.h" + #include +static DEFINE_MUTEX(dvfs_lock); +static DEFINE_MUTEX(cpu_lp_lock); + /** * The "cpufreq driver" - the arch- or hardware-dependent low * level driver of CPUFreq support, and its spinlock. This lock @@ -69,7 +75,7 @@ static DEFINE_PER_CPU(int, cpufreq_policy_cpu); static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); #define lock_policy_rwsem(mode, cpu) \ -static int lock_policy_rwsem_##mode \ +int lock_policy_rwsem_##mode \ (int cpu) \ { \ int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \ @@ -87,14 +93,14 @@ lock_policy_rwsem(read, cpu); lock_policy_rwsem(write, cpu); -static void unlock_policy_rwsem_read(int cpu) +void unlock_policy_rwsem_read(int cpu) { int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); BUG_ON(policy_cpu == -1); up_read(&per_cpu(cpu_policy_rwsem, policy_cpu)); } -static void unlock_policy_rwsem_write(int cpu) +void unlock_policy_rwsem_write(int cpu) { int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); BUG_ON(policy_cpu == -1); @@ -205,8 +211,7 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) pr_debug("saving %lu as reference value for loops_per_jiffy; " "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq); } - if ((val == CPUFREQ_PRECHANGE && ci->old < ci->new) || - (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) || + if ((val == CPUFREQ_POSTCHANGE && ci->old != ci->new) || (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, ci->new); @@ -596,6 +601,345 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) return sprintf(buf, "%u\n", policy->cpuinfo.max_freq); } +#ifdef CONFIG_VOLTAGE_CONTROL +/* + * Tegra3 voltage control via cpufreq by Paul Reioux (faux123) + * inspired by Michael Huang's voltage control code for OMAP44xx + */ + +#include "../../arch/arm/mach-tegra/dvfs.h" +#include "../../arch/arm/mach-tegra/clock.h" + +extern int user_mv_table[MAX_DVFS_FREQS]; +extern int avp_millivolts[MAX_DVFS_FREQS]; +extern int lp_cpu_millivolts[MAX_DVFS_FREQS]; +extern int emc_millivolts[MAX_DVFS_FREQS]; + +static ssize_t show_UV_mV_table(struct cpufreq_policy *policy, char *buf) +{ + int i = 0; + char *out = buf; + struct clk *cpu_clk_g = tegra_get_clock_by_name("cpu_g"); + + /* find how many actual entries there are */ + i = cpu_clk_g->dvfs->num_freqs; + + for(i--; i >=0; i--) { + out += sprintf(out, "%lumhz: %i mV\n", + cpu_clk_g->dvfs->freqs[i]/1000000, + cpu_clk_g->dvfs->millivolts[i]); + } + + return out - buf; +} + +static ssize_t store_UV_mV_table(struct cpufreq_policy *policy, char *buf, size_t count) +{ + int i = 0; + unsigned long volt_cur; + int ret; + char size_cur[16]; + + struct clk *cpu_clk_g = tegra_get_clock_by_name("cpu_g"); + + /* find how many actual entries there are */ + i = cpu_clk_g->dvfs->num_freqs; + + for(i--; i >= 0; i--) { + + if(cpu_clk_g->dvfs->freqs[i]/1000000 != 0) { + ret = sscanf(buf, "%lu", &volt_cur); + if (ret != 1) + return -EINVAL; + + /* TODO: need some robustness checks */ + user_mv_table[i] = volt_cur; + pr_info("cpu g user mv tbl[%i]: %lu\n", i, volt_cur); + + /* Non-standard sysfs interface: advance buf */ + ret = sscanf(buf, "%s", size_cur); + buf += (strlen(size_cur)+1); + } + } + /* update dvfs table here */ + cpu_clk_g->dvfs->millivolts = user_mv_table; + + return count; +} +static ssize_t show_lp_UV_mV_table(struct cpufreq_policy *policy, char *buf) +{ + int i = 0; + char *out = buf; + struct clk *cpu_clk_lp = tegra_get_clock_by_name("cpu_lp"); + + /* find how many actual entries there are */ + i = cpu_clk_lp->dvfs->num_freqs; + + for(i--; i >=0; i--) { + out += sprintf(out, "%lumhz: %i mV\n", + cpu_clk_lp->dvfs->freqs[i]/1000000, + cpu_clk_lp->dvfs->millivolts[i]); + } + + return out - buf; +} + +static ssize_t store_lp_UV_mV_table(struct cpufreq_policy *policy, const char *buf, size_t count) +{ + int i = 0; + unsigned long volt_cur; + int ret; + char size_cur[16]; + + struct clk *cpu_clk_lp = tegra_get_clock_by_name("cpu_lp"); + + /* find how many actual entries there are */ + i = cpu_clk_lp->dvfs->num_freqs; + + for(i--; i >= 0; i--) { + + if(cpu_clk_lp->dvfs->freqs[i]/1000000 != 0) { + ret = sscanf(buf, "%lu", &volt_cur); + if (ret != 1) + return -EINVAL; + + /* TODO: need some robustness checks */ + lp_cpu_millivolts[i] = volt_cur; + pr_info("cpu lp mv tbl[%i]: %lu\n", i, volt_cur); + + /* Non-standard sysfs interface: advance buf */ + ret = sscanf(buf, "%s", size_cur); + buf += (strlen(size_cur)+1); + } + } + + return count; +} + +static ssize_t show_emc_UV_mV_table(struct cpufreq_policy *policy, char *buf) +{ + int i = 0; + char *out = buf; + struct clk *clk_emc = tegra_get_clock_by_name("emc"); + + /* find how many actual entries there are */ + i = clk_emc->dvfs->num_freqs; + + for(i--; i >=0; i--) { + out += sprintf(out, "%lumhz: %i mV\n", + clk_emc->dvfs->freqs[i]/1000000, + clk_emc->dvfs->millivolts[i]); + } + + return out - buf; +} + +static ssize_t store_emc_UV_mV_table(struct cpufreq_policy *policy, const char *buf, size_t count) +{ + int i = 0; + unsigned long volt_cur; + int ret; + char size_cur[16]; + + struct clk *clk_emc = tegra_get_clock_by_name("emc"); + + /* find how many actual entries there are */ + i = clk_emc->dvfs->num_freqs; + + for(i--; i >= 0; i--) { + + if(clk_emc->dvfs->freqs[i]/1000000 != 0) { + ret = sscanf(buf, "%lu", &volt_cur); + if (ret != 1) + return -EINVAL; + + /* TODO: need some robustness checks */ + emc_millivolts[i] = volt_cur; + pr_info("emc mv tbl[%i]: %lu\n", i, volt_cur); + + /* Non-standard sysfs interface: advance buf */ + ret = sscanf(buf, "%s", size_cur); + buf += (strlen(size_cur)+1); + } + } + + return count; +} + +static ssize_t show_avp_UV_mV_table(struct cpufreq_policy *policy, char *buf) +{ + int i = 0; + char *out = buf; + struct clk *avp_clk = tegra_get_clock_by_name("3d"); + + /* find how many actual entries there are */ + i = avp_clk->dvfs->num_freqs; + + for(i--; i >=0; i--) { + out += sprintf(out, "%lumhz: %i mV\n", + avp_clk->dvfs->freqs[i]/1000000, + avp_clk->dvfs->millivolts[i]); + } + + return out - buf; +} + +static ssize_t store_avp_UV_mV_table(struct cpufreq_policy *policy, const char *buf, size_t count) +{ + int i = 0; + unsigned long volt_cur; + int ret; + char size_cur[16]; + + struct clk *avp_clk = tegra_get_clock_by_name("3d"); + + /* find how many actual entries there are */ + i = avp_clk->dvfs->num_freqs; + + for(i--; i >= 0; i--) { + + if(avp_clk->dvfs->freqs[i]/1000000 != 0) { + ret = sscanf(buf, "%lu", &volt_cur); + if (ret != 1) + return -EINVAL; + + /* TODO: need some robustness checks */ + avp_millivolts[i] = volt_cur; + pr_info("avp mv tbl[%i]: %lu\n", i, volt_cur); + + /* Non-standard sysfs interface: advance buf */ + ret = sscanf(buf, "%s", size_cur); + buf += (strlen(size_cur)+1); + } + } + + return count; +} +#endif + +static ssize_t show_gpu_oc(struct cpufreq_policy *policy, char *buf) +{ + char *c = buf; + struct clk *gpu = tegra_get_clock_by_name("3d"); + unsigned int i = gpu->dvfs->num_freqs; + unsigned long gpu_freq = 0; + + if (i <= 0) + gpu_freq = -1;; + + if (i >= 1) + gpu_freq = gpu->dvfs->freqs[gpu->dvfs->num_freqs-1]/1000000; + + return sprintf(c, "%lu\n", gpu_freq); +} + +static ssize_t store_gpu_oc(struct cpufreq_policy *policy, const char *buf, size_t count) +{ + int ret; + unsigned long gpu_freq = 0; + unsigned int i = 0; + unsigned long new_gpu_freq = 0; + unsigned int new_volt = 0; + + //all the tables that need to be updated with the new frequencies + struct clk *vde = tegra_get_clock_by_name("vde"); + struct clk *mpe = tegra_get_clock_by_name("mpe"); + struct clk *two_d = tegra_get_clock_by_name("2d"); + struct clk *epp = tegra_get_clock_by_name("epp"); + struct clk *three_d = tegra_get_clock_by_name("3d"); + struct clk *three_d2 = tegra_get_clock_by_name("3d2"); + struct clk *se = tegra_get_clock_by_name("se"); + struct clk *cbus = tegra_get_clock_by_name("cbus"); + struct clk *host1x = tegra_get_clock_by_name("host1x"); + struct clk *pll_c = tegra_get_clock_by_name("pll_c"); + struct clk *sbus = tegra_get_clock_by_name("sbus"); + + unsigned int array_size = three_d->dvfs->num_freqs; + + if (array_size <= 0) + return -EINVAL; + + char cur_size[array_size]; + i = array_size; + + ret = sscanf(buf, "%lu", &gpu_freq); + + if (ret == 0) + return -EINVAL; + + new_gpu_freq = gpu_freq*1000000; + + vde->max_rate = new_gpu_freq; + mpe->max_rate = new_gpu_freq; + two_d->max_rate = new_gpu_freq; + epp->max_rate = new_gpu_freq; + three_d->max_rate = new_gpu_freq; + three_d2->max_rate = new_gpu_freq; + se->max_rate = new_gpu_freq; + host1x->max_rate = ( new_gpu_freq / 2 ); + cbus->max_rate = new_gpu_freq; + pll_c->max_rate = ( new_gpu_freq*2 ); + pr_info("NEW PLL_C MAX_RATE: %lu\n", pll_c->max_rate); + sbus->max_rate = (new_gpu_freq/3); + + for (i--; i >= 5; i--) { + mutex_lock(&dvfs_lock); +/* if (gpu_freq < 600) { + new_volt = 1200; + vde->dvfs->millivolts[i] = new_volt; + pr_info("NEW VOLTAGES < 600: %d\n", vde->dvfs->millivolts[i]); + } + if (gpu_freq >= 600 && gpu_freq < 666) { + new_volt = 1400; + vde->dvfs->millivolts[i] = new_volt; + pr_info("NEW VOLTAGES >= 600: %d\n", vde->dvfs->millivolts[i]); + } + if (gpu_freq >= 666 && gpu_freq < 750) { + new_volt = 1500; + vde->dvfs->millivolts[i] = new_volt; + pr_info("NEW VOLTAGES > 700: %d\n", vde->dvfs->millivolts[i]); + } + if (gpu_freq >= 700 && gpu_freq < 775) { + new_volt = 1550; + vde->dvfs->millivolts[i] = new_volt; + pr_info("NEW VOLTAGES >= 750: %d\n", vde->dvfs->millivolts[i]); + } + if (gpu_freq >= 750 && gpu_freq < 800) { + new_volt = 1600; + vde->dvfs->millivolts[i] = new_volt; + pr_info("NEW VOLTAGES >= 775: %d\n", vde->dvfs->millivolts[i]); + } + if (gpu_freq >= 800) { + new_volt = 1650; + vde->dvfs->millivolts[i] = new_volt; + pr_info("NEW VOLTAGES >= 800: %d\n", vde->dvfs->millivolts[i]); + } +*/ + + vde->dvfs->freqs[i] = new_gpu_freq; + mpe->dvfs->freqs[i] = new_gpu_freq; + two_d->dvfs->freqs[i] = new_gpu_freq; + epp->dvfs->freqs[i] = new_gpu_freq; + three_d->dvfs->freqs[i] = new_gpu_freq; + three_d2->dvfs->freqs[i] = new_gpu_freq; + se->dvfs->freqs[i] = new_gpu_freq; + host1x->dvfs->freqs[i] = ( new_gpu_freq / 2 ); + cbus->dvfs->freqs[i] = new_gpu_freq; + pll_c->dvfs->freqs[i] = ( new_gpu_freq * 2 ); + pr_info("NEW PLL_C FREQS: %lu\n", pll_c->dvfs->freqs[i]); + sbus->dvfs->freqs[i] = ( new_gpu_freq / 3 ); + mutex_unlock(&dvfs_lock); + } + + ret = sscanf(buf, "%s", cur_size); + + if (ret == 0) + return -EINVAL; + + buf += (strlen(cur_size) + 1); + + return count; +} cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400); cpufreq_freq_attr_ro(cpuinfo_min_freq); cpufreq_freq_attr_ro(cpuinfo_max_freq); @@ -613,6 +957,13 @@ cpufreq_freq_attr_rw(scaling_setspeed); cpufreq_freq_attr_rw(dvfs_test); cpufreq_freq_attr_ro(policy_min_freq); cpufreq_freq_attr_ro(policy_max_freq); +cpufreq_freq_attr_rw(gpu_oc); +#ifdef CONFIG_VOLTAGE_CONTROL +cpufreq_freq_attr_rw(UV_mV_table); +cpufreq_freq_attr_rw(lp_UV_mV_table); +cpufreq_freq_attr_rw(emc_UV_mV_table); +cpufreq_freq_attr_rw(avp_UV_mV_table); +#endif static struct attribute *default_attrs[] = { &cpuinfo_min_freq.attr, @@ -628,7 +979,15 @@ static struct attribute *default_attrs[] = { &scaling_setspeed.attr, &dvfs_test.attr, &policy_min_freq.attr, + &gpu_oc.attr, &policy_max_freq.attr, +#ifdef CONFIG_VOLTAGE_CONTROL + &UV_mV_table.attr, + &lp_UV_mV_table.attr, + &emc_UV_mV_table.attr, + &avp_UV_mV_table.attr, +#endif + NULL }; diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 33b56e5c5c1..c97b468ee9f 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -120,10 +120,12 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) { - u64 idle_time = get_cpu_idle_time_us(cpu, wall); + u64 idle_time = get_cpu_idle_time_us(cpu, NULL); if (idle_time == -1ULL) return get_cpu_idle_time_jiffy(cpu, wall); + else + idle_time += get_cpu_iowait_time_us(cpu, wall); return idle_time; } diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 0a7b6d1c660..8cd5d736029 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -19,14 +19,15 @@ #include #include #include -#include +#include +#include +#include #include #include #include #include #include #include -#include #include #include #include @@ -35,38 +36,33 @@ #define CREATE_TRACE_POINTS #include -static atomic_t active_count = ATOMIC_INIT(0); +static int active_count; struct cpufreq_interactive_cpuinfo { struct timer_list cpu_timer; - int timer_idlecancel; + struct timer_list cpu_slack_timer; + spinlock_t load_lock; /* protects the next 4 fields */ u64 time_in_idle; - u64 time_in_iowait; - u64 idle_exit_time; - u64 timer_run_time; - int idling; - u64 freq_change_time; - u64 freq_change_time_in_idle; - u64 freq_change_time_in_iowait; + u64 time_in_idle_timestamp; + u64 cputime_speedadj; + u64 cputime_speedadj_timestamp; struct cpufreq_policy *policy; struct cpufreq_frequency_table *freq_table; unsigned int target_freq; unsigned int floor_freq; u64 floor_validate_time; + u64 hispeed_validate_time; + struct rw_semaphore enable_sem; int governor_enabled; }; static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo); -/* Workqueues handle frequency scaling */ -static struct task_struct *up_task; -static struct workqueue_struct *down_wq; -static struct work_struct freq_scale_down_work; -static cpumask_t up_cpumask; -static spinlock_t up_cpumask_lock; -static cpumask_t down_cpumask; -static spinlock_t down_cpumask_lock; -static struct mutex set_speed_lock; +/* realtime thread handles frequency scaling */ +static struct task_struct *speedchange_task; +static cpumask_t speedchange_cpumask; +static spinlock_t speedchange_cpumask_lock; +static struct mutex gov_lock; struct cpufreq_interactive_core_lock { struct pm_qos_request_list qos_min_req; @@ -84,51 +80,38 @@ struct cpufreq_interactive_core_lock { static struct cpufreq_interactive_core_lock core_lock; - /* Hi speed to bump to from lo speed when load burst (default max) */ -static u64 hispeed_freq; - -/* Boost frequency by boost_factor when CPU load at or above this value. */ -#define DEFAULT_GO_MAXSPEED_LOAD 85 -static unsigned long go_maxspeed_load; - -/* Go to hispeed_freq when CPU load at or above this value. */ -#define DEFAULT_GO_HISPEED_LOAD 85 -static unsigned long go_hispeed_load; +static unsigned int hispeed_freq; -/* Base of exponential raise to max speed; if 0 - jump to maximum */ -static unsigned long boost_factor; +/* Go to hi speed when CPU load at or above this value. */ +#define DEFAULT_GO_HISPEED_LOAD 99 +static unsigned long go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; -/* Max frequency boost in Hz; if 0 - no max is enforced */ -static unsigned long max_boost; - -/* Consider IO as busy */ -static unsigned long io_is_busy; - -/* - * Targeted sustainable load relatively to current frequency. - * If 0, target is set realtively to the max speed - */ -static unsigned long sustain_load; +/* Target load. Lower values result in higher CPU speeds. */ +#define DEFAULT_TARGET_LOAD 95 +static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; +static spinlock_t target_loads_lock; +static unsigned int *target_loads = default_target_loads; +static int ntarget_loads = ARRAY_SIZE(default_target_loads); /* * The minimum amount of time to spend at a frequency before we can ramp down. */ -#define DEFAULT_MIN_SAMPLE_TIME 30000; -static unsigned long min_sample_time; +#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC) +static unsigned long min_sample_time = DEFAULT_MIN_SAMPLE_TIME; /* * The sample rate of the timer used to increase frequency */ -#define DEFAULT_TIMER_RATE 20000; -static unsigned long timer_rate; +#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC) +static unsigned long timer_rate = DEFAULT_TIMER_RATE; /* * Wait this long before raising speed above hispeed, by default a single * timer interval. */ #define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE -static unsigned long above_hispeed_delay_val; +static unsigned long above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY; /* * Boost pulse to hispeed on touchscreen input. @@ -141,11 +124,21 @@ struct cpufreq_interactive_inputopen { }; static struct cpufreq_interactive_inputopen inputopen; +static struct workqueue_struct *inputopen_wq; + +/* Non-zero means indefinite speed boost active */ +static int boost_val; +/* Duration of a boot pulse in usecs */ +static int boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME; +/* End time of boost pulse in ktime converted to usecs */ +static u64 boostpulse_endtime; /* - * Non-zero means longer-term speed boost active. + * Max additional time to wait in idle, beyond timer_rate, at speeds above + * minimum before wakeup to reduce speed, or -1 if unnecessary. */ -static int boost_val; +#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE) +static int timer_slack_val = DEFAULT_TIMER_SLACK; static int cpufreq_governor_interactive(struct cpufreq_policy *policy, unsigned int event); @@ -160,178 +153,210 @@ struct cpufreq_governor cpufreq_gov_interactive = { .owner = THIS_MODULE, }; -static unsigned int cpufreq_interactive_get_target( - int cpu_load, int load_since_change, +static void cpufreq_interactive_timer_resched( struct cpufreq_interactive_cpuinfo *pcpu) { - unsigned int target_freq; + unsigned long expires = jiffies + usecs_to_jiffies(timer_rate); + unsigned long flags; - /* - * Choose greater of short-term load (since last idle timer - * started or timer function re-armed itself) or long-term load - * (since last frequency change). - */ - if (load_since_change > cpu_load) - cpu_load = load_since_change; + mod_timer_pinned(&pcpu->cpu_timer, expires); + if (timer_slack_val >= 0 && pcpu->target_freq > pcpu->policy->min) { + expires += usecs_to_jiffies(timer_slack_val); + mod_timer_pinned(&pcpu->cpu_slack_timer, expires); + } + + spin_lock_irqsave(&pcpu->load_lock, flags); + pcpu->time_in_idle = + get_cpu_idle_time_us(smp_processor_id(), + &pcpu->time_in_idle_timestamp); + pcpu->cputime_speedadj = 0; + pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp; + spin_unlock_irqrestore(&pcpu->load_lock, flags); +} - /* Exponential boost policy */ - if (boost_factor) { +static unsigned int freq_to_targetload(unsigned int freq) +{ + int i; + unsigned int ret; + unsigned long flags; - if (cpu_load >= go_maxspeed_load) { - target_freq = pcpu->policy->cur * boost_factor; + spin_lock_irqsave(&target_loads_lock, flags); - if (max_boost && - target_freq > pcpu->policy->cur + max_boost) + for (i = 0; i < ntarget_loads - 1 && freq >= target_loads[i+1]; i += 2) + ; - target_freq = pcpu->policy->cur + max_boost; - } else { + ret = target_loads[i]; + spin_unlock_irqrestore(&target_loads_lock, flags); + return ret; +} - if (!sustain_load) - sustain_load = 100; +/* + * If increasing frequencies never map to a lower target load then + * choose_freq() will find the minimum frequency that does not exceed its + * target load given the current load. + */ - target_freq = - (pcpu->policy->cur * cpu_load / sustain_load); - } +static unsigned int choose_freq( + struct cpufreq_interactive_cpuinfo *pcpu, unsigned int loadadjfreq) +{ + unsigned int freq = pcpu->policy->cur; + unsigned int prevfreq, freqmin, freqmax; + unsigned int tl; + int index; - goto done; - } + freqmin = 0; + freqmax = UINT_MAX; - /* Jump boost policy */ - if (cpu_load >= go_hispeed_load || boost_val) { - if (pcpu->target_freq <= pcpu->policy->min) { - target_freq = hispeed_freq; - } else { - target_freq = pcpu->policy->max * cpu_load / 100; - - if (target_freq < hispeed_freq) - target_freq = hispeed_freq; - - if (pcpu->target_freq == hispeed_freq && - target_freq > hispeed_freq && - cputime64_sub(pcpu->timer_run_time, - pcpu->freq_change_time) - < above_hispeed_delay_val) { - - target_freq = pcpu->target_freq; - trace_cpufreq_interactive_notyet( - smp_processor_id(), - cpu_load, - pcpu->target_freq, - target_freq); + do { + prevfreq = freq; + tl = freq_to_targetload(freq); + + /* + * Find the lowest frequency where the computed load is less + * than or equal to the target load. + */ + + cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, loadadjfreq / tl, + CPUFREQ_RELATION_L, &index); + freq = pcpu->freq_table[index].frequency; + + if (freq > prevfreq) { + /* The previous frequency is too low. */ + freqmin = prevfreq; + + if (freq >= freqmax) { + /* + * Find the highest frequency that is less + * than freqmax. + */ + cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, + freqmax - 1, CPUFREQ_RELATION_H, + &index); + freq = pcpu->freq_table[index].frequency; + + if (freq == freqmin) { + /* + * The first frequency below freqmax + * has already been found to be too + * low. freqmax is the lowest speed + * we found that is fast enough. + */ + freq = freqmax; + break; + } + } + } else if (freq < prevfreq) { + /* The previous frequency is high enough. */ + freqmax = prevfreq; + + if (freq <= freqmin) { + /* + * Find the lowest frequency that is higher + * than freqmin. + */ + cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, + freqmin + 1, CPUFREQ_RELATION_L, + &index); + freq = pcpu->freq_table[index].frequency; + + /* + * If freqmax is the first frequency above + * freqmin then we have already found that + * this speed is fast enough. + */ + if (freq == freqmax) + break; } } - } else { - target_freq = pcpu->policy->max * cpu_load / 100; - } -done: - target_freq = min(target_freq, pcpu->policy->max); - return target_freq; + /* If same frequency chosen as previous then done. */ + } while (freq != prevfreq); + + return freq; } -static inline cputime64_t get_cpu_iowait_time( - unsigned int cpu, cputime64_t *wall) +static u64 update_load(int cpu) { - u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu); + u64 now; + u64 now_idle; + unsigned int delta_idle; + unsigned int delta_time; + u64 active_time; - if (iowait_time == -1ULL) - return 0; + now_idle = get_cpu_idle_time_us(cpu, &now); + delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle); + delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp); + active_time = delta_time - delta_idle; + pcpu->cputime_speedadj += active_time * pcpu->policy->cur; - return iowait_time; + pcpu->time_in_idle = now_idle; + pcpu->time_in_idle_timestamp = now; + return now; } static void cpufreq_interactive_timer(unsigned long data) { - unsigned int delta_idle; - unsigned int delta_iowait; + u64 now; unsigned int delta_time; + u64 cputime_speedadj; int cpu_load; - int load_since_change; - u64 time_in_idle; - u64 time_in_iowait; - u64 idle_exit_time; struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, data); - u64 now_idle; - u64 now_iowait; unsigned int new_freq; + unsigned int loadadjfreq; unsigned int index; unsigned long flags; + bool boosted; - smp_rmb(); - + if (!down_read_trylock(&pcpu->enable_sem)) + return; if (!pcpu->governor_enabled) goto exit; - /* - * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time, - * this lets idle exit know the current idle time sample has - * been processed, and idle exit can generate a new sample and - * re-arm the timer. This prevents a concurrent idle - * exit on that CPU from writing a new set of info at the same time - * the timer function runs (the timer function can't use that info - * until more time passes). - */ - time_in_idle = pcpu->time_in_idle; - time_in_iowait = pcpu->time_in_iowait; - idle_exit_time = pcpu->idle_exit_time; - now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time); - now_iowait = get_cpu_iowait_time(data, NULL); - smp_wmb(); - - /* If we raced with cancelling a timer, skip. */ - if (!idle_exit_time) - goto exit; - - delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle); - delta_iowait = (unsigned int) cputime64_sub(now_iowait, time_in_iowait); - delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, - idle_exit_time); + spin_lock_irqsave(&pcpu->load_lock, flags); + now = update_load(data); + delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp); + cputime_speedadj = pcpu->cputime_speedadj; + spin_unlock_irqrestore(&pcpu->load_lock, flags); - /* - * If timer ran less than 1ms after short-term sample started, retry. - */ - if (delta_time < 1000) + if (WARN_ON_ONCE(!delta_time)) goto rearm; - if (delta_idle > delta_time) - cpu_load = 0; - else { - if (io_is_busy && delta_idle >= delta_iowait) - delta_idle -= delta_iowait; + do_div(cputime_speedadj, delta_time); + loadadjfreq = (unsigned int)cputime_speedadj * 100; + cpu_load = loadadjfreq / pcpu->target_freq; + boosted = boost_val || now < boostpulse_endtime; - cpu_load = 100 * (delta_time - delta_idle) / delta_time; + if (cpu_load >= go_hispeed_load || boosted) { + if (pcpu->target_freq < hispeed_freq) { + new_freq = hispeed_freq; + } else { + new_freq = choose_freq(pcpu, loadadjfreq); + + if (new_freq < hispeed_freq) + new_freq = hispeed_freq; + } + } else { + new_freq = choose_freq(pcpu, loadadjfreq); } - delta_idle = (unsigned int) cputime64_sub(now_idle, - pcpu->freq_change_time_in_idle); - delta_iowait = (unsigned int) cputime64_sub(now_iowait, - pcpu->freq_change_time_in_iowait); - delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, - pcpu->freq_change_time); - - if ((delta_time == 0) || (delta_idle > delta_time)) - load_since_change = 0; - else { - if (io_is_busy && delta_idle >= delta_iowait) - delta_idle -= delta_iowait; - - load_since_change = - 100 * (delta_time - delta_idle) / delta_time; + if (pcpu->target_freq >= hispeed_freq && + new_freq > pcpu->target_freq && + now - pcpu->hispeed_validate_time < above_hispeed_delay_val) { + trace_cpufreq_interactive_notyet( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + goto rearm; } - /* - * Combine short-term load (since last idle timer started or timer - * function re-armed itself) and long-term load (since last frequency - * change) to determine new target frequency. - * - * This function implements the cpufreq scaling policy - */ - new_freq = cpufreq_interactive_get_target(cpu_load, load_since_change, - pcpu); + pcpu->hispeed_validate_time = now; if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table, - new_freq, CPUFREQ_RELATION_H, + new_freq, CPUFREQ_RELATION_L, &index)) { pr_warn_once("timer %d: cpufreq_frequency_table_target error\n", (int) data); @@ -345,41 +370,42 @@ static void cpufreq_interactive_timer(unsigned long data) * floor frequency for the minimum sample time since last validated. */ if (new_freq < pcpu->floor_freq) { - if (cputime64_sub(pcpu->timer_run_time, - pcpu->floor_validate_time) - < min_sample_time) { - - trace_cpufreq_interactive_notyet(data, cpu_load, - pcpu->target_freq, new_freq); + if (now - pcpu->floor_validate_time < min_sample_time) { + trace_cpufreq_interactive_notyet( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); goto rearm; } } - pcpu->floor_freq = new_freq; - pcpu->floor_validate_time = pcpu->timer_run_time; + /* + * Update the timestamp for checking whether speed has been held at + * or above the selected frequency for a minimum of min_sample_time, + * if not boosted to hispeed_freq. If boosted to hispeed_freq then we + * allow the speed to drop as soon as the boostpulse duration expires + * (or the indefinite boost is turned off). + */ + + if (!boosted || new_freq > hispeed_freq) { + pcpu->floor_freq = new_freq; + pcpu->floor_validate_time = now; + } if (pcpu->target_freq == new_freq) { - trace_cpufreq_interactive_already(data, cpu_load, - pcpu->target_freq, new_freq); + trace_cpufreq_interactive_already( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); goto rearm_if_notmax; } trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq, - new_freq); - - if (new_freq < pcpu->target_freq) { - pcpu->target_freq = new_freq; - spin_lock_irqsave(&down_cpumask_lock, flags); - cpumask_set_cpu(data, &down_cpumask); - spin_unlock_irqrestore(&down_cpumask_lock, flags); - queue_work(down_wq, &freq_scale_down_work); - } else { - pcpu->target_freq = new_freq; - spin_lock_irqsave(&up_cpumask_lock, flags); - cpumask_set_cpu(data, &up_cpumask); - spin_unlock_irqrestore(&up_cpumask_lock, flags); - wake_up_process(up_task); - } + pcpu->policy->cur, new_freq); + + pcpu->target_freq = new_freq; + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + cpumask_set_cpu(data, &speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + wake_up_process(speedchange_task); rearm_if_notmax: /* @@ -390,31 +416,11 @@ static void cpufreq_interactive_timer(unsigned long data) goto exit; rearm: - if (!timer_pending(&pcpu->cpu_timer)) { - /* - * If already at min: if that CPU is idle, don't set timer. - * Else cancel the timer if that CPU goes idle. We don't - * need to re-evaluate speed until the next idle exit. - */ - if (pcpu->target_freq == pcpu->policy->min) { - smp_rmb(); - - if (pcpu->idling) - goto exit; - - pcpu->timer_idlecancel = 1; - } - - pcpu->time_in_idle = get_cpu_idle_time_us( - data, &pcpu->idle_exit_time); - pcpu->time_in_iowait = get_cpu_iowait_time( - data, NULL); - - mod_timer(&pcpu->cpu_timer, - jiffies + usecs_to_jiffies(timer_rate)); - } + if (!timer_pending(&pcpu->cpu_timer)) + cpufreq_interactive_timer_resched(pcpu); exit: + up_read(&pcpu->enable_sem); return; } @@ -424,15 +430,16 @@ static void cpufreq_interactive_idle_start(void) &per_cpu(cpuinfo, smp_processor_id()); int pending; - if (!pcpu->governor_enabled) + if (!down_read_trylock(&pcpu->enable_sem)) + return; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); return; + } - pcpu->idling = 1; - smp_wmb(); pending = timer_pending(&pcpu->cpu_timer); if (pcpu->target_freq != pcpu->policy->min) { -#ifdef CONFIG_SMP /* * Entering idle while not at lowest speed. On some * platforms this can hold the other CPU(s) at that speed @@ -441,35 +448,11 @@ static void cpufreq_interactive_idle_start(void) * min indefinitely. This should probably be a quirk of * the CPUFreq driver. */ - if (!pending) { - pcpu->time_in_idle = get_cpu_idle_time_us( - smp_processor_id(), &pcpu->idle_exit_time); - pcpu->time_in_iowait = get_cpu_iowait_time( - smp_processor_id(), NULL); - pcpu->timer_idlecancel = 0; - mod_timer(&pcpu->cpu_timer, - jiffies + usecs_to_jiffies(timer_rate)); - } -#endif - } else { - /* - * If at min speed and entering idle after load has - * already been evaluated, and a timer has been set just in - * case the CPU suddenly goes busy, cancel that timer. The - * CPU didn't go busy; we'll recheck things upon idle exit. - */ - if (pending && pcpu->timer_idlecancel) { - del_timer(&pcpu->cpu_timer); - /* - * Ensure last timer run time is after current idle - * sample start time, so next idle exit will always - * start a new idle sampling period. - */ - pcpu->idle_exit_time = 0; - pcpu->timer_idlecancel = 0; - } + if (!pending) + cpufreq_interactive_timer_resched(pcpu); } + up_read(&pcpu->enable_sem); } static void cpufreq_interactive_idle_end(void) @@ -477,37 +460,26 @@ static void cpufreq_interactive_idle_end(void) struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, smp_processor_id()); - pcpu->idling = 0; - smp_wmb(); + if (!down_read_trylock(&pcpu->enable_sem)) + return; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return; + } - /* - * Arm the timer for 1-2 ticks later if not already, and if the timer - * function has already processed the previous load sampling - * interval. (If the timer is not pending but has not processed - * the previous interval, it is probably racing with us on another - * CPU. Let it compute load based on the previous sample and then - * re-arm the timer for another interval when it's done, rather - * than updating the interval start time to be "now", which doesn't - * give the timer function enough time to make a decision on this - * run.) - */ - if (timer_pending(&pcpu->cpu_timer) == 0 && - pcpu->timer_run_time >= pcpu->idle_exit_time && - pcpu->governor_enabled) { - pcpu->time_in_idle = - get_cpu_idle_time_us(smp_processor_id(), - &pcpu->idle_exit_time); - pcpu->time_in_iowait = - get_cpu_iowait_time(smp_processor_id(), - NULL); - pcpu->timer_idlecancel = 0; - mod_timer(&pcpu->cpu_timer, - jiffies + usecs_to_jiffies(timer_rate)); + /* Arm the timer for 1-2 ticks later if not already. */ + if (!timer_pending(&pcpu->cpu_timer)) { + cpufreq_interactive_timer_resched(pcpu); + } else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) { + del_timer(&pcpu->cpu_timer); + del_timer(&pcpu->cpu_slack_timer); + cpufreq_interactive_timer(smp_processor_id()); } + up_read(&pcpu->enable_sem); } -static int cpufreq_interactive_up_task(void *data) +static int cpufreq_interactive_speedchange_task(void *data) { unsigned int cpu; cpumask_t tmp_mask; @@ -516,34 +488,35 @@ static int cpufreq_interactive_up_task(void *data) while (1) { set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irqsave(&up_cpumask_lock, flags); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); - if (cpumask_empty(&up_cpumask)) { - spin_unlock_irqrestore(&up_cpumask_lock, flags); + if (cpumask_empty(&speedchange_cpumask)) { + spin_unlock_irqrestore(&speedchange_cpumask_lock, + flags); schedule(); if (kthread_should_stop()) break; - spin_lock_irqsave(&up_cpumask_lock, flags); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); } set_current_state(TASK_RUNNING); - tmp_mask = up_cpumask; - cpumask_clear(&up_cpumask); - spin_unlock_irqrestore(&up_cpumask_lock, flags); + tmp_mask = speedchange_cpumask; + cpumask_clear(&speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); for_each_cpu(cpu, &tmp_mask) { unsigned int j; unsigned int max_freq = 0; pcpu = &per_cpu(cpuinfo, cpu); - smp_rmb(); - - if (!pcpu->governor_enabled) + if (!down_read_trylock(&pcpu->enable_sem)) continue; - - mutex_lock(&set_speed_lock); + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + continue; + } for_each_cpu(j, pcpu->policy->cpus) { struct cpufreq_interactive_cpuinfo *pjcpu = @@ -553,108 +526,169 @@ static int cpufreq_interactive_up_task(void *data) max_freq = pjcpu->target_freq; } - __cpufreq_driver_target(pcpu->policy, - max_freq, - CPUFREQ_RELATION_H); - mutex_unlock(&set_speed_lock); - - trace_cpufreq_interactive_up(cpu, pcpu->target_freq, - pcpu->policy->cur); + if (max_freq != pcpu->policy->cur) + __cpufreq_driver_target(pcpu->policy, + max_freq, + CPUFREQ_RELATION_H); + trace_cpufreq_interactive_setspeed(cpu, + pcpu->target_freq, + pcpu->policy->cur); - pcpu->freq_change_time_in_idle = - get_cpu_idle_time_us(cpu, - &pcpu->freq_change_time); - pcpu->freq_change_time_in_iowait = - get_cpu_iowait_time(cpu, NULL); + up_read(&pcpu->enable_sem); } } return 0; } -static void cpufreq_interactive_freq_down(struct work_struct *work) +static void cpufreq_interactive_boost(void) { - unsigned int cpu; - cpumask_t tmp_mask; + int i; + int anyboost = 0; unsigned long flags; struct cpufreq_interactive_cpuinfo *pcpu; - spin_lock_irqsave(&down_cpumask_lock, flags); - tmp_mask = down_cpumask; - cpumask_clear(&down_cpumask); - spin_unlock_irqrestore(&down_cpumask_lock, flags); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); - for_each_cpu(cpu, &tmp_mask) { - unsigned int j; - unsigned int max_freq = 0; + for_each_online_cpu(i) { + pcpu = &per_cpu(cpuinfo, i); - pcpu = &per_cpu(cpuinfo, cpu); - smp_rmb(); + if (pcpu->target_freq < hispeed_freq) { + pcpu->target_freq = hispeed_freq; + cpumask_set_cpu(i, &speedchange_cpumask); + pcpu->hispeed_validate_time = + ktime_to_us(ktime_get()); + anyboost = 1; + } - if (!pcpu->governor_enabled) - continue; + /* + * Set floor freq and (re)start timer for when last + * validated. + */ - mutex_lock(&set_speed_lock); + pcpu->floor_freq = hispeed_freq; + pcpu->floor_validate_time = ktime_to_us(ktime_get()); + } - for_each_cpu(j, pcpu->policy->cpus) { - struct cpufreq_interactive_cpuinfo *pjcpu = - &per_cpu(cpuinfo, j); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); - if (pjcpu->target_freq > max_freq) - max_freq = pjcpu->target_freq; - } + if (anyboost) + wake_up_process(speedchange_task); +} - __cpufreq_driver_target(pcpu->policy, max_freq, - CPUFREQ_RELATION_H); +static int cpufreq_interactive_notifier( + struct notifier_block *nb, unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpufreq_interactive_cpuinfo *pcpu; + int cpu; + unsigned long flags; - mutex_unlock(&set_speed_lock); + if (val == CPUFREQ_POSTCHANGE) { + pcpu = &per_cpu(cpuinfo, freq->cpu); + if (!down_read_trylock(&pcpu->enable_sem)) + return 0; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return 0; + } - trace_cpufreq_interactive_down(cpu, pcpu->target_freq, - pcpu->policy->cur); + for_each_cpu(cpu, pcpu->policy->cpus) { + struct cpufreq_interactive_cpuinfo *pjcpu = + &per_cpu(cpuinfo, cpu); + spin_lock_irqsave(&pjcpu->load_lock, flags); + update_load(cpu); + spin_unlock_irqrestore(&pjcpu->load_lock, flags); + } - pcpu->freq_change_time_in_idle = - get_cpu_idle_time_us(cpu, - &pcpu->freq_change_time); - pcpu->freq_change_time_in_iowait = - get_cpu_iowait_time(cpu, NULL); + up_read(&pcpu->enable_sem); } + return 0; } -static void cpufreq_interactive_boost(void) +static struct notifier_block cpufreq_notifier_block = { + .notifier_call = cpufreq_interactive_notifier, +}; + +static ssize_t show_target_loads( + struct kobject *kobj, struct attribute *attr, char *buf) { int i; - int anyboost = 0; + ssize_t ret = 0; unsigned long flags; - struct cpufreq_interactive_cpuinfo *pcpu; - spin_lock_irqsave(&up_cpumask_lock, flags); + spin_lock_irqsave(&target_loads_lock, flags); - for_each_online_cpu(i) { - pcpu = &per_cpu(cpuinfo, i); + for (i = 0; i < ntarget_loads; i++) + ret += sprintf(buf + ret, "%u%s", target_loads[i], + i & 0x1 ? ":" : " "); - if (pcpu->target_freq < hispeed_freq) { - pcpu->target_freq = hispeed_freq; - cpumask_set_cpu(i, &up_cpumask); - anyboost = 1; - } + ret += sprintf(buf + ret, "\n"); + spin_unlock_irqrestore(&target_loads_lock, flags); + return ret; +} - /* Set floor freq and (re)start timer for when last - * validated. - */ +static ssize_t store_target_loads( + struct kobject *kobj, struct attribute *attr, const char *buf, + size_t count) +{ + int ret; + const char *cp; + unsigned int *new_target_loads = NULL; + int ntokens = 1; + int i; + unsigned long flags; - pcpu->floor_freq = hispeed_freq; - pcpu->floor_validate_time = ktime_to_us(ktime_get()); + cp = buf; + while ((cp = strpbrk(cp + 1, " :"))) + ntokens++; + + if (!(ntokens & 0x1)) + goto err_inval; + + new_target_loads = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL); + if (!new_target_loads) { + ret = -ENOMEM; + goto err; } - spin_unlock_irqrestore(&up_cpumask_lock, flags); + cp = buf; + i = 0; + while (i < ntokens) { + if (sscanf(cp, "%u", &new_target_loads[i++]) != 1) + goto err_inval; - if (anyboost) - wake_up_process(up_task); + cp = strpbrk(cp, " :"); + if (!cp) + break; + cp++; + } + + if (i != ntokens) + goto err_inval; + + spin_lock_irqsave(&target_loads_lock, flags); + if (target_loads != default_target_loads) + kfree(target_loads); + target_loads = new_target_loads; + ntarget_loads = ntokens; + spin_unlock_irqrestore(&target_loads_lock, flags); + return count; + +err_inval: + ret = -EINVAL; +err: + kfree(new_target_loads); + return ret; } +static struct global_attr target_loads_attr = + __ATTR(target_loads, S_IRUGO | S_IWUSR, + show_target_loads, store_target_loads); + static void cpufreq_interactive_core_lock_timer(unsigned long data) { - queue_work(down_wq, &core_lock.unlock_work); + queue_work(inputopen_wq, &core_lock.unlock_work); } static void cpufreq_interactive_unlock_cores(struct work_struct *wq) @@ -729,6 +763,7 @@ static void cpufreq_interactive_input_event(struct input_handle *handle, { if (input_boost_val && type == EV_SYN && code == SYN_REPORT) { wake_up_process(core_lock.lock_task); + trace_cpufreq_interactive_boost("input"); cpufreq_interactive_boost(); } } @@ -766,7 +801,7 @@ static int cpufreq_interactive_input_connect(struct input_handler *handler, goto err; inputopen.handle = handle; - queue_work(down_wq, &inputopen.inputopen_work); + queue_work(inputopen_wq, &inputopen.inputopen_work); return 0; err: kfree(handle); @@ -807,115 +842,10 @@ static struct input_handler cpufreq_interactive_input_handler = { .id_table = cpufreq_interactive_ids, }; -static ssize_t show_go_maxspeed_load(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", go_maxspeed_load); -} - -static ssize_t store_go_maxspeed_load(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - go_maxspeed_load = val; - return count; -} - -static struct global_attr go_maxspeed_load_attr = __ATTR(go_maxspeed_load, 0644, - show_go_maxspeed_load, store_go_maxspeed_load); - -static ssize_t show_boost_factor(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", boost_factor); -} - -static ssize_t store_boost_factor(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - boost_factor = val; - return count; -} - -static struct global_attr boost_factor_attr = __ATTR(boost_factor, 0644, - show_boost_factor, store_boost_factor); - -static ssize_t show_io_is_busy(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", io_is_busy); -} - -static ssize_t store_io_is_busy(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) -{ - if (!strict_strtoul(buf, 0, &io_is_busy)) - return count; - return -EINVAL; -} - -static struct global_attr io_is_busy_attr = __ATTR(io_is_busy, 0644, - show_io_is_busy, store_io_is_busy); - -static ssize_t show_sustain_load(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", sustain_load); -} - -static ssize_t store_sustain_load(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - sustain_load = val; - return count; -} - -static struct global_attr sustain_load_attr = __ATTR(sustain_load, 0644, - show_sustain_load, store_sustain_load); - -static ssize_t show_max_boost(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", max_boost); -} - -static ssize_t store_max_boost(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - max_boost = val; - return count; -} - -static struct global_attr max_boost_attr = __ATTR(max_boost, 0644, - show_max_boost, store_max_boost); - static ssize_t show_hispeed_freq(struct kobject *kobj, struct attribute *attr, char *buf) { - return sprintf(buf, "%llu\n", hispeed_freq); + return sprintf(buf, "%u\n", hispeed_freq); } static ssize_t store_hispeed_freq(struct kobject *kobj, @@ -923,9 +853,9 @@ static ssize_t store_hispeed_freq(struct kobject *kobj, size_t count) { int ret; - u64 val; + long unsigned int val; - ret = strict_strtoull(buf, 0, &val); + ret = strict_strtoul(buf, 0, &val); if (ret < 0) return ret; hispeed_freq = val; @@ -1024,6 +954,29 @@ static ssize_t store_timer_rate(struct kobject *kobj, static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644, show_timer_rate, store_timer_rate); +static ssize_t show_timer_slack( + struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", timer_slack_val); +} + +static ssize_t store_timer_slack( + struct kobject *kobj, struct attribute *attr, const char *buf, + size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtol(buf, 10, &val); + if (ret < 0) + return ret; + + timer_slack_val = val; + return count; +} + +define_one_global_rw(timer_slack); + static ssize_t show_input_boost(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -1063,30 +1016,72 @@ static ssize_t store_boost(struct kobject *kobj, struct attribute *attr, boost_val = val; - if (boost_val) + if (boost_val) { + trace_cpufreq_interactive_boost("on"); cpufreq_interactive_boost(); - - if (!boost_val) - trace_cpufreq_interactive_unboost(hispeed_freq); + } else { + trace_cpufreq_interactive_unboost("off"); + } return count; } define_one_global_rw(boost); +static ssize_t store_boostpulse(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + boostpulse_endtime = ktime_to_us(ktime_get()) + boostpulse_duration_val; + trace_cpufreq_interactive_boost("pulse"); + cpufreq_interactive_boost(); + return count; +} + +static struct global_attr boostpulse = + __ATTR(boostpulse, 0200, NULL, store_boostpulse); + +static ssize_t show_boostpulse_duration( + struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", boostpulse_duration_val); +} + +static ssize_t store_boostpulse_duration( + struct kobject *kobj, struct attribute *attr, const char *buf, + size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + boostpulse_duration_val = val; + return count; +} + +define_one_global_rw(boostpulse_duration); + static struct attribute *interactive_attributes[] = { - &go_maxspeed_load_attr.attr, - &boost_factor_attr.attr, - &max_boost_attr.attr, - &io_is_busy_attr.attr, - &sustain_load_attr.attr, + &target_loads_attr.attr, &hispeed_freq_attr.attr, &go_hispeed_load_attr.attr, &above_hispeed_delay.attr, &min_sample_time_attr.attr, &timer_rate_attr.attr, + &timer_slack.attr, &input_boost.attr, &boost.attr, + &boostpulse.attr, + &boostpulse_duration.attr, NULL, }; @@ -1095,6 +1090,26 @@ static struct attribute_group interactive_attr_group = { .name = "interactive", }; +static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, + unsigned long val, + void *data) +{ + switch (val) { + case IDLE_START: + cpufreq_interactive_idle_start(); + break; + case IDLE_END: + cpufreq_interactive_idle_end(); + break; + } + + return 0; +} + +static struct notifier_block cpufreq_interactive_idle_nb = { + .notifier_call = cpufreq_interactive_idle_notifier, +}; + static int cpufreq_governor_interactive(struct cpufreq_policy *policy, unsigned int event) { @@ -1108,76 +1123,90 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, if (!cpu_online(policy->cpu)) return -EINVAL; + mutex_lock(&gov_lock); + freq_table = cpufreq_frequency_get_table(policy->cpu); + if (!hispeed_freq) + hispeed_freq = policy->max; for_each_cpu(j, policy->cpus) { + unsigned long expires; + pcpu = &per_cpu(cpuinfo, j); pcpu->policy = policy; pcpu->target_freq = policy->cur; pcpu->freq_table = freq_table; - pcpu->freq_change_time_in_idle = - get_cpu_idle_time_us(j, - &pcpu->freq_change_time); - pcpu->time_in_idle = pcpu->freq_change_time_in_idle; - pcpu->idle_exit_time = pcpu->freq_change_time; - pcpu->freq_change_time_in_iowait = - get_cpu_iowait_time(j, NULL); - pcpu->time_in_iowait = pcpu->freq_change_time_in_iowait; - - pcpu->timer_idlecancel = 1; pcpu->floor_freq = pcpu->target_freq; pcpu->floor_validate_time = - pcpu->freq_change_time; + ktime_to_us(ktime_get()); + pcpu->hispeed_validate_time = + pcpu->floor_validate_time; + down_write(&pcpu->enable_sem); + expires = jiffies + usecs_to_jiffies(timer_rate); + pcpu->cpu_timer.expires = expires; + add_timer_on(&pcpu->cpu_timer, j); + if (timer_slack_val >= 0) { + expires += usecs_to_jiffies(timer_slack_val); + pcpu->cpu_slack_timer.expires = expires; + add_timer_on(&pcpu->cpu_slack_timer, j); + } pcpu->governor_enabled = 1; - smp_wmb(); + up_write(&pcpu->enable_sem); } - if (!hispeed_freq) - hispeed_freq = policy->max; - /* * Do not register the idle hook and create sysfs * entries if we have already done so. */ - if (atomic_inc_return(&active_count) > 1) + if (++active_count > 1) { + mutex_unlock(&gov_lock); return 0; + } rc = sysfs_create_group(cpufreq_global_kobject, &interactive_attr_group); - if (rc) + if (rc) { + mutex_unlock(&gov_lock); return rc; + } rc = input_register_handler(&cpufreq_interactive_input_handler); if (rc) pr_warn("%s: failed to register input handler\n", __func__); + idle_notifier_register(&cpufreq_interactive_idle_nb); + cpufreq_register_notifier( + &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); + mutex_unlock(&gov_lock); break; case CPUFREQ_GOV_STOP: + mutex_lock(&gov_lock); for_each_cpu(j, policy->cpus) { pcpu = &per_cpu(cpuinfo, j); + down_write(&pcpu->enable_sem); pcpu->governor_enabled = 0; - smp_wmb(); del_timer_sync(&pcpu->cpu_timer); - - /* - * Reset idle exit time since we may cancel the timer - * before it can run after the last idle exit time, - * to avoid tripping the check in idle exit for a timer - * that is trying to run. - */ - pcpu->idle_exit_time = 0; + del_timer_sync(&pcpu->cpu_slack_timer); + up_write(&pcpu->enable_sem); } - flush_work(&freq_scale_down_work); - if (atomic_dec_return(&active_count) > 0) + flush_work(&inputopen.inputopen_work); + + if (--active_count > 0) { + mutex_unlock(&gov_lock); return 0; + } + cpufreq_unregister_notifier( + &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); + idle_notifier_unregister(&cpufreq_interactive_idle_nb); input_unregister_handler(&cpufreq_interactive_input_handler); sysfs_remove_group(cpufreq_global_kobject, &interactive_attr_group); + mutex_unlock(&gov_lock); break; @@ -1193,67 +1222,44 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, return 0; } -static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, - unsigned long val, - void *data) +static void cpufreq_interactive_nop_timer(unsigned long data) { - switch (val) { - case IDLE_START: - cpufreq_interactive_idle_start(); - break; - case IDLE_END: - cpufreq_interactive_idle_end(); - break; - } - - return 0; } -static struct notifier_block cpufreq_interactive_idle_nb = { - .notifier_call = cpufreq_interactive_idle_notifier, -}; - static int __init cpufreq_interactive_init(void) { unsigned int i; struct cpufreq_interactive_cpuinfo *pcpu; struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - go_maxspeed_load = DEFAULT_GO_MAXSPEED_LOAD; - go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; - min_sample_time = DEFAULT_MIN_SAMPLE_TIME; - above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY; - timer_rate = DEFAULT_TIMER_RATE; - /* Initalize per-cpu timers */ for_each_possible_cpu(i) { pcpu = &per_cpu(cpuinfo, i); - init_timer(&pcpu->cpu_timer); + init_timer_deferrable(&pcpu->cpu_timer); pcpu->cpu_timer.function = cpufreq_interactive_timer; pcpu->cpu_timer.data = i; + init_timer(&pcpu->cpu_slack_timer); + pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer; + spin_lock_init(&pcpu->load_lock); + init_rwsem(&pcpu->enable_sem); } - up_task = kthread_create(cpufreq_interactive_up_task, NULL, - "kinteractiveup"); - if (IS_ERR(up_task)) - return PTR_ERR(up_task); - - sched_setscheduler_nocheck(up_task, SCHED_FIFO, ¶m); - get_task_struct(up_task); - - /* No rescuer thread, bind to CPU queuing the work for possibly - warm cache (probably doesn't matter much). */ - down_wq = alloc_workqueue("knteractive_down", 0, 1); + spin_lock_init(&target_loads_lock); + spin_lock_init(&speedchange_cpumask_lock); + mutex_init(&gov_lock); + speedchange_task = + kthread_create(cpufreq_interactive_speedchange_task, NULL, + "cfinteractive"); + if (IS_ERR(speedchange_task)) + return PTR_ERR(speedchange_task); - if (!down_wq) - goto err_freeuptask; + sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m); + get_task_struct(speedchange_task); + + inputopen_wq = create_workqueue("cfinteractive"); - INIT_WORK(&freq_scale_down_work, - cpufreq_interactive_freq_down); - - spin_lock_init(&up_cpumask_lock); - spin_lock_init(&down_cpumask_lock); - mutex_init(&set_speed_lock); + if (!inputopen_wq) + goto err_freetask; pm_qos_add_request(&core_lock.qos_min_req, PM_QOS_MIN_ONLINE_CPUS, PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); @@ -1278,13 +1284,17 @@ static int __init cpufreq_interactive_init(void) sched_setscheduler_nocheck(core_lock.lock_task, SCHED_FIFO, ¶m); get_task_struct(core_lock.lock_task); - idle_notifier_register(&cpufreq_interactive_idle_nb); + INIT_WORK(&inputopen.inputopen_work, cpufreq_interactive_input_open); INIT_WORK(&core_lock.unlock_work, cpufreq_interactive_unlock_cores); - return cpufreq_register_governor(&cpufreq_gov_interactive); -err_freeuptask: - put_task_struct(up_task); + /* NB: wake up so the thread does not look hung to the freezer */ + wake_up_process(speedchange_task); + + return cpufreq_register_governor(&cpufreq_gov_interactive); + +err_freetask: + put_task_struct(speedchange_task); return -ENOMEM; } @@ -1297,9 +1307,9 @@ module_init(cpufreq_interactive_init); static void __exit cpufreq_interactive_exit(void) { cpufreq_unregister_governor(&cpufreq_gov_interactive); - kthread_stop(up_task); - put_task_struct(up_task); - destroy_workqueue(down_wq); + kthread_stop(speedchange_task); + put_task_struct(speedchange_task); + destroy_workqueue(inputopen_wq); pm_qos_remove_request(&core_lock.qos_min_req); pm_qos_remove_request(&core_lock.qos_max_req); diff --git a/drivers/cpufreq/cpufreq_lulzactive.c b/drivers/cpufreq/cpufreq_lulzactive.c new file mode 100644 index 00000000000..0f2257a1582 --- /dev/null +++ b/drivers/cpufreq/cpufreq_lulzactive.c @@ -0,0 +1,1011 @@ +/* + * drivers/cpufreq/cpufreq_lulzactive.c + * + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Mike Chan (mike@android.com) + * Edited: Tegrak (luciferanna@gmail.com) + * + * Driver values in /sys/devices/system/cpu/cpufreq/lulzactive + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LULZACTIVE_VERSION (2) +#define LULZACTIVE_AUTHOR "tegrak" + +// if you changed some codes for optimization, just write your name here. +#define LULZACTIVE_TUNER "motley" + +static atomic_t active_count = ATOMIC_INIT(0); + +struct cpufreq_lulzactive_cpuinfo { + struct timer_list cpu_timer; + int timer_idlecancel; + u64 time_in_idle; + u64 idle_exit_time; + u64 timer_run_time; + int idling; + u64 freq_change_time; + u64 freq_change_time_in_idle; + struct cpufreq_policy *policy; + struct cpufreq_frequency_table *freq_table; + struct cpufreq_frequency_table lulzfreq_table[32]; + unsigned int lulzfreq_table_size; + unsigned int target_freq; + int governor_enabled; +}; + +static DEFINE_PER_CPU(struct cpufreq_lulzactive_cpuinfo, cpuinfo); + +/* Workqueues handle frequency scaling */ +static struct task_struct *up_task; +static struct workqueue_struct *down_wq; +static struct work_struct freq_scale_down_work; +static cpumask_t up_cpumask; +static spinlock_t up_cpumask_lock; +static cpumask_t down_cpumask; +static spinlock_t down_cpumask_lock; +static struct mutex set_speed_lock; + +/* + * The minimum amount of time to spend at a frequency before we can step up. + */ +#define DEFAULT_UP_SAMPLE_TIME 24 * USEC_PER_MSEC +static unsigned long up_sample_time; + +/* + * The minimum amount of time to spend at a frequency before we can step down. + */ +#define DEFAULT_DOWN_SAMPLE_TIME 49 * USEC_PER_MSEC +static unsigned long down_sample_time; + +/* + * CPU freq will be increased if measured load > inc_cpu_load; + */ +#define DEFAULT_INC_CPU_LOAD 60 +static unsigned long inc_cpu_load; + +/* + * CPU freq will be decreased if measured load < dec_cpu_load; + * not implemented yet. + */ +#define DEFAULT_DEC_CPU_LOAD 30 +static unsigned long dec_cpu_load; + +/* + * Increasing frequency table index + * zero disables and causes to always jump straight to max frequency. + */ +#define DEFAULT_PUMP_UP_STEP 1 +static unsigned long pump_up_step; + +/* + * Decreasing frequency table index + * zero disables and will calculate frequency according to load heuristic. + */ +#define DEFAULT_PUMP_DOWN_STEP 1 +static unsigned long pump_down_step; + +/* + * Use minimum frequency while suspended. + */ +static unsigned int early_suspended; + +#define SCREEN_OFF_LOWEST_STEP (7) +#define DEFAULT_SCREEN_OFF_MIN_STEP (SCREEN_OFF_LOWEST_STEP) +static unsigned long screen_off_min_step; + +static int cpufreq_governor_lulzactive(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_LULZACTIVE +static +#endif +struct cpufreq_governor cpufreq_gov_lulzactive = { + .name = "lulzactive", + .governor = cpufreq_governor_lulzactive, + .max_transition_latency = 10000000, + .owner = THIS_MODULE, +}; + +static unsigned int get_lulzfreq_table_size(struct cpufreq_lulzactive_cpuinfo *pcpu) { + unsigned int size = 0, i; + for (i = 0; (pcpu->freq_table[i].frequency != CPUFREQ_TABLE_END); i++) { + unsigned int freq = pcpu->freq_table[i].frequency; + if (freq == CPUFREQ_ENTRY_INVALID) continue; + pcpu->lulzfreq_table[size].index = i; //in case we need it later -gm + pcpu->lulzfreq_table[size].frequency = freq; + size++; + } + pcpu->lulzfreq_table[size].index = 0; + pcpu->lulzfreq_table[size].frequency = CPUFREQ_TABLE_END; + return size; +} + +static inline void fix_screen_off_min_step(struct cpufreq_lulzactive_cpuinfo *pcpu) { + if (pcpu->lulzfreq_table_size <= 0) { + screen_off_min_step = 0; + return; + } + + if (DEFAULT_SCREEN_OFF_MIN_STEP == screen_off_min_step) + for(screen_off_min_step=0; + pcpu->lulzfreq_table[screen_off_min_step].frequency != 500000; + screen_off_min_step++); + + if (screen_off_min_step >= pcpu->lulzfreq_table_size) + for(screen_off_min_step=0; + pcpu->lulzfreq_table[screen_off_min_step].frequency != 500000; + screen_off_min_step++); +} + +static inline unsigned int adjust_screen_off_freq( + struct cpufreq_lulzactive_cpuinfo *pcpu, unsigned int freq) { + + if (early_suspended && freq > pcpu->lulzfreq_table[screen_off_min_step].frequency) { + freq = pcpu->lulzfreq_table[screen_off_min_step].frequency; + pcpu->target_freq = pcpu->policy->cur; + + if (freq > pcpu->policy->max) + freq = pcpu->policy->max; + if (freq < pcpu->policy->min) + freq = pcpu->policy->min; + } + + return freq; +} + +static void cpufreq_lulzactive_timer(unsigned long data) +{ + unsigned int delta_idle; + unsigned int delta_time; + int cpu_load; + int load_since_change; + u64 time_in_idle; + u64 idle_exit_time; + struct cpufreq_lulzactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, data); + u64 now_idle; + unsigned int new_freq; + unsigned int index; + unsigned long flags; + int ret; + + smp_rmb(); + + if (!pcpu->governor_enabled) + goto exit; + + /* + * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time, + * this lets idle exit know the current idle time sample has + * been processed, and idle exit can generate a new sample and + * re-arm the timer. This prevents a concurrent idle + * exit on that CPU from writing a new set of info at the same time + * the timer function runs (the timer function can't use that info + * until more time passes). + */ + time_in_idle = pcpu->time_in_idle; + idle_exit_time = pcpu->idle_exit_time; + now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time); + smp_wmb(); + + /* If we raced with cancelling a timer, skip. */ + if (!idle_exit_time) + goto exit; + + delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle); + delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, + idle_exit_time); + + /* + * If timer ran less than 1ms after short-term sample started, retry. + */ + if (delta_time < 1000) + goto rearm; + + if (delta_idle > delta_time) + cpu_load = 0; + else + cpu_load = 100 * (delta_time - delta_idle) / delta_time; + + delta_idle = (unsigned int) cputime64_sub(now_idle, + pcpu->freq_change_time_in_idle); + delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, + pcpu->freq_change_time); + + if ((delta_time == 0) || (delta_idle > delta_time)) + load_since_change = 0; + else + load_since_change = + 100 * (delta_time - delta_idle) / delta_time; + + /* + * Choose greater of short-term load (since last idle timer + * started or timer function re-armed itself) or long-term load + * (since last frequency change). + */ + if (load_since_change > cpu_load) + cpu_load = load_since_change; + + /* + * START lulzactive algorithm section + */ + if (cpu_load >= inc_cpu_load) { + if (pump_up_step && pcpu->policy->cur < pcpu->policy->max) { + ret = cpufreq_frequency_table_target( + pcpu->policy, pcpu->lulzfreq_table, + pcpu->policy->cur, CPUFREQ_RELATION_H, + &index); + if (ret < 0) { + goto rearm; + } + + // apply pump_up_step by tegrak + index -= pump_up_step; + if (index < 0) + index = 0; + + new_freq = pcpu->lulzfreq_table[index].frequency; + } + else { + new_freq = pcpu->policy->max; + } + } + else { + if (pump_down_step) { + ret = cpufreq_frequency_table_target( + pcpu->policy, pcpu->lulzfreq_table, + pcpu->policy->cur, CPUFREQ_RELATION_H, + &index); + if (ret < 0) { + goto rearm; + } + + // apply pump_down_step by tegrak + index += pump_down_step; + if (index >= pcpu->lulzfreq_table_size) { + index = pcpu->lulzfreq_table_size - 1; + } + + new_freq = (pcpu->policy->cur > pcpu->policy->min) ? + (pcpu->lulzfreq_table[index].frequency) : + (pcpu->policy->min); + } + else { + new_freq = pcpu->policy->max * cpu_load / 100; + ret = cpufreq_frequency_table_target( + pcpu->policy, pcpu->lulzfreq_table, + new_freq, CPUFREQ_RELATION_H, + &index); + if (ret < 0) { + goto rearm; + } + new_freq = pcpu->lulzfreq_table[index].frequency; + } + } + + // adjust freq when screen off + new_freq = adjust_screen_off_freq(pcpu, new_freq); + + if (pcpu->target_freq == new_freq) + goto rearm_if_notmax; + + /* + * Do not scale down unless we have been at this frequency for the + * minimum sample time. + */ + if (new_freq < pcpu->target_freq) { + if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time) + < down_sample_time) + goto rearm; + } + else { + if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time) < + up_sample_time) { + /* don't reset timer */ + goto rearm; + } + } + + if (new_freq < pcpu->target_freq) { + pcpu->target_freq = new_freq; + spin_lock_irqsave(&down_cpumask_lock, flags); + cpumask_set_cpu(data, &down_cpumask); + spin_unlock_irqrestore(&down_cpumask_lock, flags); + queue_work(down_wq, &freq_scale_down_work); + } else { + pcpu->target_freq = new_freq; + spin_lock_irqsave(&up_cpumask_lock, flags); + cpumask_set_cpu(data, &up_cpumask); + spin_unlock_irqrestore(&up_cpumask_lock, flags); + wake_up_process(up_task); + } + +rearm_if_notmax: + /* + * Already set max speed and don't see a need to change that, + * wait until next idle to re-evaluate, don't need timer. + */ + if (pcpu->target_freq == pcpu->policy->max) + goto exit; + +rearm: + if (!timer_pending(&pcpu->cpu_timer)) { + /* + * If already at min: if that CPU is idle, don't set timer. + * Else cancel the timer if that CPU goes idle. We don't + * need to re-evaluate speed until the next idle exit. + */ + if (pcpu->target_freq == pcpu->policy->min) { + smp_rmb(); + + if (pcpu->idling) + goto exit; + + pcpu->timer_idlecancel = 1; + } + + pcpu->time_in_idle = get_cpu_idle_time_us( + data, &pcpu->idle_exit_time); + mod_timer(&pcpu->cpu_timer, + jiffies + 4); + } + +exit: + return; +} + +static void cpufreq_lulzactive_idle_start(void) +{ + struct cpufreq_lulzactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, smp_processor_id()); + int pending; + + if (!pcpu->governor_enabled) + return; + + pcpu->idling = 1; + smp_wmb(); + pending = timer_pending(&pcpu->cpu_timer); + + if (pcpu->target_freq != pcpu->policy->min) { +#ifdef CONFIG_SMP + /* + * Entering idle while not at lowest speed. On some + * platforms this can hold the other CPU(s) at that speed + * even though the CPU is idle. Set a timer to re-evaluate + * speed so this idle CPU doesn't hold the other CPUs above + * min indefinitely. This should probably be a quirk of + * the CPUFreq driver. + */ + if (!pending) { + pcpu->time_in_idle = get_cpu_idle_time_us( + smp_processor_id(), &pcpu->idle_exit_time); + pcpu->timer_idlecancel = 0; + mod_timer(&pcpu->cpu_timer, + jiffies + 4); + } +#endif + } else { + /* + * If at min speed and entering idle after load has + * already been evaluated, and a timer has been set just in + * case the CPU suddenly goes busy, cancel that timer. The + * CPU didn't go busy; we'll recheck things upon idle exit. + */ + if (pending && pcpu->timer_idlecancel) { + del_timer(&pcpu->cpu_timer); + /* + * Ensure last timer run time is after current idle + * sample start time, so next idle exit will always + * start a new idle sampling period. + */ + pcpu->idle_exit_time = 0; + pcpu->timer_idlecancel = 0; + } + } + +} + +static void cpufreq_lulzactive_idle_end(void) +{ + struct cpufreq_lulzactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, smp_processor_id()); + + pcpu->idling = 0; + smp_wmb(); + + /* + * Arm the timer for 1-2 ticks later if not already, and if the timer + * function has already processed the previous load sampling + * interval. (If the timer is not pending but has not processed + * the previous interval, it is probably racing with us on another + * CPU. Let it compute load based on the previous sample and then + * re-arm the timer for another interval when it's done, rather + * than updating the interval start time to be "now", which doesn't + * give the timer function enough time to make a decision on this + * run.) + */ + if (timer_pending(&pcpu->cpu_timer) == 0 && + pcpu->timer_run_time >= pcpu->idle_exit_time && + pcpu->governor_enabled) { + pcpu->time_in_idle = + get_cpu_idle_time_us(smp_processor_id(), + &pcpu->idle_exit_time); + pcpu->timer_idlecancel = 0; + mod_timer(&pcpu->cpu_timer, + jiffies + 4); + } + +} + +static int cpufreq_lulzactive_up_task(void *data) +{ + unsigned int cpu; + cpumask_t tmp_mask; + unsigned long flags; + struct cpufreq_lulzactive_cpuinfo *pcpu; + + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + spin_lock_irqsave(&up_cpumask_lock, flags); + + if (cpumask_empty(&up_cpumask)) { + spin_unlock_irqrestore(&up_cpumask_lock, flags); + schedule(); + + if (kthread_should_stop()) + break; + + spin_lock_irqsave(&up_cpumask_lock, flags); + } + + set_current_state(TASK_RUNNING); + tmp_mask = up_cpumask; + cpumask_clear(&up_cpumask); + spin_unlock_irqrestore(&up_cpumask_lock, flags); + + for_each_cpu(cpu, &tmp_mask) { + unsigned int j; + unsigned int max_freq = 0; + + pcpu = &per_cpu(cpuinfo, cpu); + smp_rmb(); + + if (!pcpu->governor_enabled) + continue; + + mutex_lock(&set_speed_lock); + + for_each_cpu(j, pcpu->policy->cpus) { + struct cpufreq_lulzactive_cpuinfo *pjcpu = + &per_cpu(cpuinfo, j); + + if (pjcpu->target_freq > max_freq) + max_freq = pjcpu->target_freq; + } + + if (max_freq != pcpu->policy->cur) + __cpufreq_driver_target(pcpu->policy, + max_freq, + CPUFREQ_RELATION_H); + mutex_unlock(&set_speed_lock); + } + } + + return 0; +} + +static void cpufreq_lulzactive_freq_down(struct work_struct *work) +{ + unsigned int cpu; + cpumask_t tmp_mask; + unsigned long flags; + struct cpufreq_lulzactive_cpuinfo *pcpu; + + spin_lock_irqsave(&down_cpumask_lock, flags); + tmp_mask = down_cpumask; + cpumask_clear(&down_cpumask); + spin_unlock_irqrestore(&down_cpumask_lock, flags); + + for_each_cpu(cpu, &tmp_mask) { + unsigned int j; + unsigned int max_freq = 0; + + pcpu = &per_cpu(cpuinfo, cpu); + smp_rmb(); + + if (!pcpu->governor_enabled) + continue; + + mutex_lock(&set_speed_lock); + + for_each_cpu(j, pcpu->policy->cpus) { + struct cpufreq_lulzactive_cpuinfo *pjcpu = + &per_cpu(cpuinfo, j); + + if (pjcpu->target_freq > max_freq) + max_freq = pjcpu->target_freq; + } + + if (max_freq != pcpu->policy->cur) + __cpufreq_driver_target(pcpu->policy, max_freq, + CPUFREQ_RELATION_H); + + mutex_unlock(&set_speed_lock); + } +} + +// inc_cpu_load +static ssize_t show_inc_cpu_load(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", inc_cpu_load); +} + +static ssize_t store_inc_cpu_load(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + if(strict_strtoul(buf, 0, &inc_cpu_load)==-EINVAL) return -EINVAL; + + if (inc_cpu_load > 100) { + inc_cpu_load = 100; + } + else if (inc_cpu_load < 10) { + inc_cpu_load = 10; + } + return count; +} + +static struct global_attr inc_cpu_load_attr = __ATTR(inc_cpu_load, 0666, + show_inc_cpu_load, store_inc_cpu_load); + +// down_sample_time +static ssize_t show_down_sample_time(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", down_sample_time); +} + +static ssize_t store_down_sample_time(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + if(strict_strtoul(buf, 0, &down_sample_time)==-EINVAL) return -EINVAL; + return count; +} + +static struct global_attr down_sample_time_attr = __ATTR(down_sample_time, 0666, + show_down_sample_time, store_down_sample_time); + +// up_sample_time +static ssize_t show_up_sample_time(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", up_sample_time); +} + +static ssize_t store_up_sample_time(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + if(strict_strtoul(buf, 0, &up_sample_time)==-EINVAL) return -EINVAL; + return count; +} + +static struct global_attr up_sample_time_attr = __ATTR(up_sample_time, 0666, + show_up_sample_time, store_up_sample_time); + +// debug_mode +static ssize_t show_debug_mode(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "0\n"); +} + +static ssize_t store_debug_mode(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + return count; +} + +static struct global_attr debug_mode_attr = __ATTR(debug_mode, 0666, + show_debug_mode, store_debug_mode); + +// pump_up_step +static ssize_t show_pump_up_step(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", pump_up_step); +} + +static ssize_t store_pump_up_step(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + if(strict_strtoul(buf, 0, &pump_up_step)==-EINVAL) return -EINVAL; + return count; +} + +static struct global_attr pump_up_step_attr = __ATTR(pump_up_step, 0666, + show_pump_up_step, store_pump_up_step); + +// pump_down_step +static ssize_t show_pump_down_step(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", pump_down_step); +} + +static ssize_t store_pump_down_step(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct cpufreq_lulzactive_cpuinfo *pcpu; + + if(strict_strtoul(buf, 0, &pump_down_step)==-EINVAL) return -EINVAL; + + pcpu = &per_cpu(cpuinfo, 0); + // fix out of bound + if (pcpu->lulzfreq_table_size <= pump_down_step) { + pump_down_step = pcpu->lulzfreq_table_size - 1; + } + return count; +} + +static struct global_attr pump_down_step_attr = __ATTR(pump_down_step, 0666, + show_pump_down_step, store_pump_down_step); + +// screen_off_min_step +static ssize_t show_screen_off_min_step(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpufreq_lulzactive_cpuinfo *pcpu; + + pcpu = &per_cpu(cpuinfo, 0); + fix_screen_off_min_step(pcpu); + + return sprintf(buf, "%lu\n", screen_off_min_step); +} + +static ssize_t store_screen_off_min_step(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct cpufreq_lulzactive_cpuinfo *pcpu; + + if(strict_strtoul(buf, 0, &screen_off_min_step)==-EINVAL) return -EINVAL; + + pcpu = &per_cpu(cpuinfo, 0); + fix_screen_off_min_step(pcpu); + + return count; +} + +static struct global_attr screen_off_min_step_attr = __ATTR(screen_off_min_step, 0666, + show_screen_off_min_step, store_screen_off_min_step); + +// author +static ssize_t show_author(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", LULZACTIVE_AUTHOR); +} + +static struct global_attr author_attr = __ATTR(author, 0444, + show_author, NULL); + +// tuner +static ssize_t show_tuner(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", LULZACTIVE_TUNER); +} + +static struct global_attr tuner_attr = __ATTR(tuner, 0444, + show_tuner, NULL); + +// version +static ssize_t show_version(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", LULZACTIVE_VERSION); +} + +static struct global_attr version_attr = __ATTR(version, 0444, + show_version, NULL); + +// freq_table +static ssize_t show_freq_table(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpufreq_lulzactive_cpuinfo *pcpu; + char temp[64]; + int i; + + pcpu = &per_cpu(cpuinfo, 0); + + for (i = 0; i < pcpu->lulzfreq_table_size; i++) { + sprintf(temp, "%u\n", pcpu->lulzfreq_table[i].frequency); + strcat(buf, temp); + } + + return strlen(buf); +} + +static struct global_attr freq_table_attr = __ATTR(freq_table, 0444, + show_freq_table, NULL); + +static struct attribute *lulzactive_attributes[] = { + &inc_cpu_load_attr.attr, + &up_sample_time_attr.attr, + &down_sample_time_attr.attr, + &pump_up_step_attr.attr, + &pump_down_step_attr.attr, + &screen_off_min_step_attr.attr, + &debug_mode_attr.attr, + &author_attr.attr, + &tuner_attr.attr, + &version_attr.attr, + &freq_table_attr.attr, + NULL, +}; + +void start_lulzactive(void); +void stop_lulzactive(void); + +static struct attribute_group lulzactive_attr_group = { + .attrs = lulzactive_attributes, + .name = "lulzactive", +}; + +static int cpufreq_governor_lulzactive(struct cpufreq_policy *policy, + unsigned int event) +{ + int rc; + unsigned int j; + struct cpufreq_lulzactive_cpuinfo *pcpu; + struct cpufreq_frequency_table *freq_table; + + switch (event) { + case CPUFREQ_GOV_START: + if (!cpu_online(policy->cpu)) + return -EINVAL; + + freq_table = + cpufreq_frequency_get_table(policy->cpu); + + for_each_cpu(j, policy->cpus) { + pcpu = &per_cpu(cpuinfo, j); + pcpu->policy = policy; + pcpu->target_freq = policy->cur; + pcpu->freq_table = freq_table; + pcpu->freq_change_time_in_idle = + get_cpu_idle_time_us(j, + &pcpu->freq_change_time); + pcpu->governor_enabled = 1; + smp_wmb(); + pcpu->lulzfreq_table_size = get_lulzfreq_table_size(pcpu); + + // fix invalid screen_off_min_step + fix_screen_off_min_step(pcpu); + } + + /* + * Do not register the idle hook and create sysfs + * entries if we have already done so. + */ + if (atomic_inc_return(&active_count) > 1) + return 0; + start_lulzactive(); + + rc = sysfs_create_group(cpufreq_global_kobject, + &lulzactive_attr_group); + if (rc) + return rc; + + break; + + case CPUFREQ_GOV_STOP: + for_each_cpu(j, policy->cpus) { + pcpu = &per_cpu(cpuinfo, j); + pcpu->governor_enabled = 0; + smp_wmb(); + del_timer_sync(&pcpu->cpu_timer); + + /* + * Reset idle exit time since we may cancel the timer + * before it can run after the last idle exit time, + * to avoid tripping the check in idle exit for a timer + * that is trying to run. + */ + pcpu->idle_exit_time = 0; + } + + flush_work(&freq_scale_down_work); + if (atomic_dec_return(&active_count) > 0) + return 0; + + sysfs_remove_group(cpufreq_global_kobject, + &lulzactive_attr_group); + stop_lulzactive(); + break; + + case CPUFREQ_GOV_LIMITS: + if (policy->max < policy->cur) + __cpufreq_driver_target(policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > policy->cur) + __cpufreq_driver_target(policy, + policy->min, CPUFREQ_RELATION_L); + break; + } + return 0; +} + +static int cpufreq_lulzactive_idle_notifier(struct notifier_block *nb, + unsigned long val, + void *data) +{ + switch (val) { + case IDLE_START: + cpufreq_lulzactive_idle_start(); + break; + case IDLE_END: + cpufreq_lulzactive_idle_end(); + break; + } + + return 0; +} + +static struct notifier_block cpufreq_lulzactive_idle_nb = { + .notifier_call = cpufreq_lulzactive_idle_notifier, +}; + +static void lulzactive_early_suspend(struct early_suspend *handler) { + early_suspended = 1; +} + +static void lulzactive_late_resume(struct early_suspend *handler) { + early_suspended = 0; +} + +static struct early_suspend lulzactive_power_suspend = { + .suspend = lulzactive_early_suspend, + .resume = lulzactive_late_resume, + .level = EARLY_SUSPEND_LEVEL_DISABLE_FB + 1, +}; + +void start_lulzactive(void) +{ + //it is more appropriate to start the up_task thread after starting the governor -gm + unsigned int i, index500, index800; + struct cpufreq_lulzactive_cpuinfo *pcpu; + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + + if( pump_up_step == 0 ) + { + pcpu = &per_cpu(cpuinfo, 0); + cpufreq_frequency_table_target( + pcpu->policy, pcpu->lulzfreq_table, + 500000, CPUFREQ_RELATION_H, + &index500); + cpufreq_frequency_table_target( + pcpu->policy, pcpu->lulzfreq_table, + 800000, CPUFREQ_RELATION_H, + &index800); + for(i=index800;ilulzfreq_table[i].frequency==CPUFREQ_ENTRY_INVALID) continue; + pump_up_step++; + } + } + if( pump_down_step == 0 ) + { + pump_down_step = pump_up_step; + } + + up_task = kthread_create(cpufreq_lulzactive_up_task, NULL, + "klulzactiveup"); + + sched_setscheduler_nocheck(up_task, SCHED_FIFO, ¶m); + get_task_struct(up_task); + + idle_notifier_register(&cpufreq_lulzactive_idle_nb); + register_early_suspend(&lulzactive_power_suspend); +} + +void stop_lulzactive(void) +{ + //cleanup the thread after stopping the governor -gm + kthread_stop(up_task); + put_task_struct(up_task); + + idle_notifier_unregister(&cpufreq_lulzactive_idle_nb); + unregister_early_suspend(&lulzactive_power_suspend); + pump_up_step = DEFAULT_PUMP_UP_STEP; + pump_down_step = DEFAULT_PUMP_DOWN_STEP; +} + +static int __init cpufreq_lulzactive_init(void) +{ + unsigned int i; + struct cpufreq_lulzactive_cpuinfo *pcpu; + up_sample_time = DEFAULT_UP_SAMPLE_TIME; + down_sample_time = DEFAULT_DOWN_SAMPLE_TIME; + inc_cpu_load = DEFAULT_INC_CPU_LOAD; + dec_cpu_load = DEFAULT_DEC_CPU_LOAD; + pump_up_step = DEFAULT_PUMP_UP_STEP; + pump_down_step = DEFAULT_PUMP_DOWN_STEP; + early_suspended = 0; + screen_off_min_step = DEFAULT_SCREEN_OFF_MIN_STEP; + + + /* Initalize per-cpu timers */ + for_each_possible_cpu(i) { + pcpu = &per_cpu(cpuinfo, i); + init_timer(&pcpu->cpu_timer); + pcpu->cpu_timer.function = cpufreq_lulzactive_timer; + pcpu->cpu_timer.data = i; + } + + /* No rescuer thread, bind to CPU queuing the work for possibly + warm cache (probably doesn't matter much). */ + down_wq = alloc_workqueue("knteractive_down", 0, 1); + + if (!down_wq) + goto err_freeuptask; + + INIT_WORK(&freq_scale_down_work, + cpufreq_lulzactive_freq_down); + + spin_lock_init(&up_cpumask_lock); + spin_lock_init(&down_cpumask_lock); + mutex_init(&set_speed_lock); + + return cpufreq_register_governor(&cpufreq_gov_lulzactive); + +err_freeuptask: + put_task_struct(up_task); + return -ENOMEM; +} + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_LULZACTIVE +fs_initcall(cpufreq_lulzactive_init); +#else +module_init(cpufreq_lulzactive_init); +#endif + +static void __exit cpufreq_lulzactive_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_lulzactive); + kthread_stop(up_task); + put_task_struct(up_task); + destroy_workqueue(down_wq); +} + +module_exit(cpufreq_lulzactive_exit); + +MODULE_AUTHOR("Tegrak "); +MODULE_DESCRIPTION("'lulzactive' - improved interactive governor inspired by smartass"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index a9442a349bb..76f73de99c4 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -22,6 +22,15 @@ #include #include #include +#include +#include +#include +#include +#include +#include + +#include "../../arch/arm/mach-tegra/clock.h" +#include "../../arch/arm/mach-tegra/pm.h" /* * dbs is used in this file as a shortform for demandbased switching @@ -29,14 +38,18 @@ */ #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) -#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_UP_THRESHOLD (90) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) -#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) -#define MICRO_FREQUENCY_UP_THRESHOLD (95) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define MICRO_FREQUENCY_UP_THRESHOLD (90) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) +#define DEF_SAMPLING_RATE (50000) +#define DEF_IO_IS_BUSY (1) +#define DEF_UI_DYNAMIC_SAMPLING_RATE (30000) +#define DEF_UI_COUNTER (5) /* * The polling frequency of this governor depends on the capability of @@ -51,6 +64,7 @@ #define MIN_SAMPLING_RATE_RATIO (2) static unsigned int min_sampling_rate; +static unsigned int def_sampling_rate; #define LATENCY_MULTIPLIER (1000) #define MIN_LATENCY_MULTIPLIER (100) @@ -60,6 +74,10 @@ static void do_dbs_timer(struct work_struct *work); static int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); +/* lpcpu variables */ +static struct clk *cpu_lp_clk; +static unsigned int idle_top_freq; + #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND static #endif @@ -94,9 +112,11 @@ struct cpu_dbs_info_s { */ struct mutex timer_mutex; }; + static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); static unsigned int dbs_enable; /* number of CPUs using this policy */ +static unsigned int g_ui_counter = 0; /* * dbs_mutex protects dbs_enable in governor start/stop. @@ -111,12 +131,19 @@ static struct dbs_tuners { unsigned int sampling_down_factor; unsigned int powersave_bias; unsigned int io_is_busy; + unsigned int two_phase_freq; + unsigned int origin_sampling_rate; + unsigned int ui_sampling_rate; + unsigned int ui_counter; } dbs_tuners_ins = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, .ignore_nice = 0, .powersave_bias = 0, + .two_phase_freq = 1200000, + .ui_sampling_rate = DEF_UI_DYNAMIC_SAMPLING_RATE, + .ui_counter = DEF_UI_COUNTER, }; static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, @@ -144,10 +171,12 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) { - u64 idle_time = get_cpu_idle_time_us(cpu, wall); + u64 idle_time = get_cpu_idle_time_us(cpu, NULL); if (idle_time == -1ULL) return get_cpu_idle_time_jiffy(cpu, wall); + else + idle_time += get_cpu_iowait_time_us(cpu, wall); return idle_time; } @@ -253,9 +282,12 @@ show_one(sampling_rate, sampling_rate); show_one(io_is_busy, io_is_busy); show_one(up_threshold, up_threshold); show_one(sampling_down_factor, sampling_down_factor); +show_one(down_differential, down_differential); show_one(ignore_nice_load, ignore_nice); show_one(powersave_bias, powersave_bias); - +show_one(two_phase_freq, two_phase_freq); +show_one(ui_sampling_rate, ui_sampling_rate); +show_one(ui_counter, ui_counter); static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, const char *buf, size_t count) { @@ -265,6 +297,35 @@ static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, if (ret != 1) return -EINVAL; dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); + dbs_tuners_ins.origin_sampling_rate = dbs_tuners_ins.sampling_rate; + return count; +} + +static ssize_t store_two_phase_freq(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.two_phase_freq = input; + + return count; +} + +static ssize_t store_ui_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.ui_sampling_rate = max(input, min_sampling_rate); + return count; } @@ -296,6 +357,20 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, return count; } +static ssize_t store_down_differential(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if(ret != 1) { + return -EINVAL; + } + dbs_tuners_ins.down_differential = input; + return count; +} + static ssize_t store_sampling_down_factor(struct kobject *a, struct attribute *b, const char *buf, size_t count) { @@ -367,21 +442,43 @@ static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, return count; } +static ssize_t store_ui_counter(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if(ret != 1) + return -EINVAL; + + dbs_tuners_ins.ui_counter = input; + return count; +} + define_one_global_rw(sampling_rate); define_one_global_rw(io_is_busy); define_one_global_rw(up_threshold); +define_one_global_rw(down_differential); define_one_global_rw(sampling_down_factor); define_one_global_rw(ignore_nice_load); define_one_global_rw(powersave_bias); +define_one_global_rw(two_phase_freq); +define_one_global_rw(ui_sampling_rate); +define_one_global_rw(ui_counter); static struct attribute *dbs_attributes[] = { &sampling_rate_min.attr, &sampling_rate.attr, &up_threshold.attr, + &down_differential.attr, &sampling_down_factor.attr, &ignore_nice_load.attr, &powersave_bias.attr, &io_is_busy.attr, + &two_phase_freq.attr, + &ui_sampling_rate.attr, + &ui_counter.attr, NULL }; @@ -396,19 +493,30 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) { if (dbs_tuners_ins.powersave_bias) freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H); - else if (p->cur == p->max) - return; + //else if (p->cur == p->max) + // return; __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ? CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); } +int set_two_phase_freq(int cpufreq) +{ + dbs_tuners_ins.two_phase_freq = cpufreq; + return 0; +} + static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) { unsigned int max_load_freq; + unsigned int debug_freq; + unsigned int debug_load; + unsigned int debug_iowait; struct cpufreq_policy *policy; unsigned int j; + static unsigned int phase = 0; + static unsigned int counter = 0; this_dbs_info->freq_lo = 0; policy = this_dbs_info->cur_policy; @@ -489,20 +597,57 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) freq_avg = policy->cur; load_freq = load * freq_avg; - if (load_freq > max_load_freq) + if (load_freq > max_load_freq) { max_load_freq = load_freq; + debug_load = load; + debug_iowait = 100 * iowait_time / wall_time; + } + } + + if (g_ui_counter > 0){ + g_ui_counter--; + if(g_ui_counter == 0) + dbs_tuners_ins.sampling_rate = dbs_tuners_ins.origin_sampling_rate; } /* Check for frequency increase */ if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { /* If switching to max speed, apply sampling_down_factor */ - if (policy->cur < policy->max) - this_dbs_info->rate_mult = - dbs_tuners_ins.sampling_down_factor; - dbs_freq_increase(policy, policy->max); + if (counter < 5) { + counter++; + if (counter > 2) { + /* change to busy phase */ + phase = 1; + } + } + if (dbs_tuners_ins.two_phase_freq != 0 && phase == 0) { + debug_freq = dbs_tuners_ins.two_phase_freq; + /* idle phase + * limit the frequency to max lpcpu if only 1 cpu is online + * this should avoid fast "peak"-switching out of lpcpu */ + if (!is_lp_cluster()) + dbs_freq_increase(policy, dbs_tuners_ins.two_phase_freq); + else + dbs_freq_increase(policy, idle_top_freq); + } else { + /* busy phase */ + if (policy->cur < policy->max) + this_dbs_info->rate_mult = + dbs_tuners_ins.sampling_down_factor; + debug_freq = policy->max; + dbs_freq_increase(policy, policy->max); + } return; } + if (counter > 0) { + counter--; + if (counter == 0) { + /* change to idle phase */ + phase = 0; + } + } + /* Check for frequency decrease */ /* if we cannot reduce the frequency anymore, break out early */ if (policy->cur == policy->min) @@ -528,11 +673,13 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) freq_next = policy->min; if (!dbs_tuners_ins.powersave_bias) { + debug_freq = freq_next; __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); } else { int freq = powersave_bias_target(policy, freq_next, CPUFREQ_RELATION_L); + debug_freq = freq; __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); } @@ -616,9 +763,76 @@ static int should_io_be_busy(void) boot_cpu_data.x86_model >= 15) return 1; #endif - return 0; + return DEF_IO_IS_BUSY; } +#define AID_SYSTEM (1000) +static void dbs_chown(void) +{ + int ret; + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/ignore_nice_load", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown ignore_nice_load error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/io_is_busy", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown io_is_busy error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/powersave_bias", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown powersave_bias error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/sampling_down_factor", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown sampling_down_factor error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/sampling_rate", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown sampling_rate error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/two_phase_freq", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown two_phase_freq error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/up_threshold", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown up_threshold error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/down_differential", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown down_differential error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/ui_sampling_rate", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown ui_sampling_rate error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/ui_counter", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown ui_counter error: %d", ret); +} + +static void dbs_refresh_callback(struct work_struct *unused) +{ + struct cpufreq_policy *policy; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int cpu = smp_processor_id(); + + if (lock_policy_rwsem_write(cpu) < 0) + return; + + this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + policy = this_dbs_info->cur_policy; + + g_ui_counter = dbs_tuners_ins.ui_counter; + if(dbs_tuners_ins.ui_counter > 0) + dbs_tuners_ins.sampling_rate = dbs_tuners_ins.ui_sampling_rate; + + unlock_policy_rwsem_write(cpu); +} + +static DECLARE_WORK(dbs_refresh_work, dbs_refresh_callback); + static int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { @@ -666,6 +880,8 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return rc; } + dbs_chown(); + /* policy latency is in nS. Convert it to uS first */ latency = policy->cpuinfo.transition_latency / 1000; if (latency == 0) @@ -676,8 +892,12 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, dbs_tuners_ins.sampling_rate = max(min_sampling_rate, latency * LATENCY_MULTIPLIER); + if (def_sampling_rate) + dbs_tuners_ins.sampling_rate = def_sampling_rate; + dbs_tuners_ins.origin_sampling_rate = dbs_tuners_ins.sampling_rate; dbs_tuners_ins.io_is_busy = should_io_be_busy(); } + mutex_unlock(&dbs_mutex); mutex_init(&this_dbs_info->timer_mutex); @@ -690,6 +910,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, mutex_lock(&dbs_mutex); mutex_destroy(&this_dbs_info->timer_mutex); dbs_enable--; + if (!dbs_enable) sysfs_remove_group(cpufreq_global_kobject, &dbs_attr_group); @@ -716,6 +937,9 @@ static int __init cpufreq_gov_dbs_init(void) u64 idle_time; int cpu = get_cpu(); + cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); + idle_top_freq = clk_get_max_rate(cpu_lp_clk) / 1000; + idle_time = get_cpu_idle_time_us(cpu, &wall); put_cpu(); if (idle_time != -1ULL) { @@ -734,6 +958,7 @@ static int __init cpufreq_gov_dbs_init(void) min_sampling_rate = MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); } + def_sampling_rate = DEF_SAMPLING_RATE; return cpufreq_register_governor(&cpufreq_gov_ondemand); } diff --git a/drivers/cpufreq/cpufreq_pegasusq.c b/drivers/cpufreq/cpufreq_pegasusq.c new file mode 100644 index 00000000000..dbca0a62288 --- /dev/null +++ b/drivers/cpufreq/cpufreq_pegasusq.c @@ -0,0 +1,1413 @@ +/* + * drivers/cpufreq/cpufreq_pegasusq.c + * + * Copyright (C) 2011 Samsung Electronics co. ltd + * ByungChang Cha + * + * Based on ondemand governor + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_HAS_EARLYSUSPEND +#include +#endif + +/* + * runqueue average + */ + +#define RQ_AVG_TIMER_RATE 10 + +struct runqueue_data { + unsigned int nr_run_avg; + unsigned int update_rate; + int64_t last_time; + int64_t total_time; + struct delayed_work work; + struct workqueue_struct *nr_run_wq; + spinlock_t lock; +}; + +static struct runqueue_data *rq_data; +static void rq_work_fn(struct work_struct *work); + +static void start_rq_work(void) +{ + rq_data->nr_run_avg = 0; + rq_data->last_time = 0; + rq_data->total_time = 0; + if (rq_data->nr_run_wq == NULL) + rq_data->nr_run_wq = + create_singlethread_workqueue("nr_run_avg"); + + queue_delayed_work(rq_data->nr_run_wq, &rq_data->work, + msecs_to_jiffies(rq_data->update_rate)); + return; +} + +static void stop_rq_work(void) +{ + if (rq_data->nr_run_wq) + cancel_delayed_work(&rq_data->work); + return; +} + +static int __init init_rq_avg(void) +{ + rq_data = kzalloc(sizeof(struct runqueue_data), GFP_KERNEL); + if (rq_data == NULL) { + pr_err("%s cannot allocate memory\n", __func__); + return -ENOMEM; + } + spin_lock_init(&rq_data->lock); + rq_data->update_rate = RQ_AVG_TIMER_RATE; + INIT_DELAYED_WORK_DEFERRABLE(&rq_data->work, rq_work_fn); + + return 0; +} + +static void rq_work_fn(struct work_struct *work) +{ + int64_t time_diff = 0; + int64_t nr_run = 0; + unsigned long flags = 0; + int64_t cur_time = ktime_to_ns(ktime_get()); + + spin_lock_irqsave(&rq_data->lock, flags); + + if (rq_data->last_time == 0) + rq_data->last_time = cur_time; + if (rq_data->nr_run_avg == 0) + rq_data->total_time = 0; + + nr_run = nr_running() * 100; + time_diff = cur_time - rq_data->last_time; + do_div(time_diff, 1000 * 1000); + + if (time_diff != 0 && rq_data->total_time != 0) { + nr_run = (nr_run * time_diff) + + (rq_data->nr_run_avg * rq_data->total_time); + do_div(nr_run, rq_data->total_time + time_diff); + } + rq_data->nr_run_avg = nr_run; + rq_data->total_time += time_diff; + rq_data->last_time = cur_time; + + if (rq_data->update_rate != 0) + queue_delayed_work(rq_data->nr_run_wq, &rq_data->work, + msecs_to_jiffies(rq_data->update_rate)); + + spin_unlock_irqrestore(&rq_data->lock, flags); +} + +static unsigned int get_nr_run_avg(void) +{ + unsigned int nr_run_avg; + unsigned long flags = 0; + + spin_lock_irqsave(&rq_data->lock, flags); + nr_run_avg = rq_data->nr_run_avg; + rq_data->nr_run_avg = 0; + spin_unlock_irqrestore(&rq_data->lock, flags); + + return nr_run_avg; +} + + +/* + * dbs is used in this file as a shortform for demandbased switching + * It helps to keep variable names smaller, simpler + */ + +#define DEF_SAMPLING_DOWN_FACTOR (2) +#define MAX_SAMPLING_DOWN_FACTOR (100000) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (5) +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_MIN_SAMPLE_RATE (10000) +#define MIN_FREQUENCY_UP_THRESHOLD (11) +#define MAX_FREQUENCY_UP_THRESHOLD (100) +#define DEF_SAMPLING_RATE (50000) +#define MIN_SAMPLING_RATE (10000) +#define MAX_HOTPLUG_RATE (40u) + +#define DEF_MAX_CPU_LOCK (0) +#define DEF_UP_NR_CPUS (1) +#define DEF_CPU_UP_RATE (10) +#define DEF_CPU_DOWN_RATE (20) +#define DEF_FREQ_STEP (40) +#define DEF_START_DELAY (0) + +#define UP_THRESHOLD_AT_MIN_FREQ (40) +#define FREQ_FOR_RESPONSIVENESS (500000) + +#define HOTPLUG_DOWN_INDEX (0) +#define HOTPLUG_UP_INDEX (1) + +#ifdef CONFIG_MACH_MIDAS +static int hotplug_rq[4][2] = { + {0, 200}, {200, 300}, {300, 400}, {400, 0} +}; + +static int hotplug_freq[4][2] = { + {0, 500000}, + {400000, 500000}, + {400000, 800000}, + {600000, 0} +}; +#else +static int hotplug_rq[4][2] = { + {0, 100}, {100, 200}, {200, 300}, {300, 0} +}; + +static int hotplug_freq[4][2] = { + {0, 500000}, + {200000, 500000}, + {200000, 500000}, + {200000, 0} +}; +#endif + +static unsigned int min_sampling_rate; + +static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_PEGASUSQ +static +#endif +struct cpufreq_governor cpufreq_gov_pegasusq = { + .name = "pegasusq", + .governor = cpufreq_governor_dbs, + .owner = THIS_MODULE, +}; + +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + +struct cpu_dbs_info_s { + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_iowait; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + struct work_struct up_work; + struct work_struct down_work; + struct cpufreq_frequency_table *freq_table; + unsigned int rate_mult; + int cpu; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; +}; +static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); + +struct workqueue_struct *dvfs_workqueue; + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +/* + * dbs_mutex protects dbs_enable in governor start/stop. + */ +static DEFINE_MUTEX(dbs_mutex); + +static struct dbs_tuners { + unsigned int sampling_rate; + unsigned int up_threshold; + unsigned int down_differential; + unsigned int ignore_nice; + unsigned int sampling_down_factor; + unsigned int io_is_busy; + /* pegasusq tuners */ + unsigned int freq_step; + unsigned int cpu_up_rate; + unsigned int cpu_down_rate; + unsigned int up_nr_cpus; + unsigned int max_cpu_lock; + atomic_t hotplug_lock; + unsigned int dvfs_debug; + unsigned int max_freq; + unsigned int min_freq; +#ifdef CONFIG_HAS_EARLYSUSPEND + int early_suspend; +#endif + unsigned int up_threshold_at_min_freq; + unsigned int freq_for_responsiveness; +} dbs_tuners_ins = { + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, + .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, + .ignore_nice = 0, + .freq_step = DEF_FREQ_STEP, + .cpu_up_rate = DEF_CPU_UP_RATE, + .cpu_down_rate = DEF_CPU_DOWN_RATE, + .up_nr_cpus = DEF_UP_NR_CPUS, + .max_cpu_lock = DEF_MAX_CPU_LOCK, + .hotplug_lock = ATOMIC_INIT(0), + .dvfs_debug = 0, +#ifdef CONFIG_HAS_EARLYSUSPEND + .early_suspend = -1, +#endif + .up_threshold_at_min_freq = UP_THRESHOLD_AT_MIN_FREQ, + .freq_for_responsiveness = FREQ_FOR_RESPONSIVENESS, +}; + + +/* + * CPU hotplug lock interface + */ + +static atomic_t g_hotplug_count = ATOMIC_INIT(0); +static atomic_t g_hotplug_lock = ATOMIC_INIT(0); + +static void apply_hotplug_lock(void) +{ + int online, possible, lock, flag; + struct work_struct *work; + struct cpu_dbs_info_s *dbs_info; + + /* do turn_on/off cpus */ + dbs_info = &per_cpu(od_cpu_dbs_info, 0); /* from CPU0 */ + online = num_online_cpus(); + possible = num_possible_cpus(); + lock = atomic_read(&g_hotplug_lock); + flag = lock - online; + + if (flag == 0) + return; + + work = flag > 0 ? &dbs_info->up_work : &dbs_info->down_work; + + pr_debug("%s online %d possible %d lock %d flag %d %d\n", + __func__, online, possible, lock, flag, (int)abs(flag)); + + queue_work_on(dbs_info->cpu, dvfs_workqueue, work); +} + +int cpufreq_pegasusq_cpu_lock(int num_core) +{ + int prev_lock; + + if (num_core < 1 || num_core > num_possible_cpus()) + return -EINVAL; + + prev_lock = atomic_read(&g_hotplug_lock); + + if (prev_lock != 0 && prev_lock < num_core) + return -EINVAL; + else if (prev_lock == num_core) + atomic_inc(&g_hotplug_count); + + atomic_set(&g_hotplug_lock, num_core); + atomic_set(&g_hotplug_count, 1); + apply_hotplug_lock(); + + return 0; +} + +int cpufreq_pegasusq_cpu_unlock(int num_core) +{ + int prev_lock = atomic_read(&g_hotplug_lock); + + if (prev_lock < num_core) + return 0; + else if (prev_lock == num_core) + atomic_dec(&g_hotplug_count); + + if (atomic_read(&g_hotplug_count) == 0) + atomic_set(&g_hotplug_lock, 0); + + return 0; +} + + +/* + * History of CPU usage + */ +struct cpu_usage { + unsigned int freq; + unsigned int load[NR_CPUS]; + unsigned int rq_avg; +}; + +struct cpu_usage_history { + struct cpu_usage usage[MAX_HOTPLUG_RATE]; + unsigned int num_hist; +}; + +struct cpu_usage_history *hotplug_history; + +static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, + cputime64_t *wall) +{ + cputime64_t idle_time; + cputime64_t cur_wall_time; + cputime64_t busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, + kstat_cpu(cpu).cpustat.system); + + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); + + idle_time = cputime64_sub(cur_wall_time, busy_time); + if (wall) + *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); + + return (cputime64_t)jiffies_to_usecs(idle_time); +} + +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, wall); + + if (idle_time == -1ULL) + return get_cpu_idle_time_jiffy(cpu, wall); + + return idle_time; +} + +static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, + cputime64_t *wall) +{ + u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + + if (iowait_time == -1ULL) + return 0; + + return iowait_time; +} + +/************************** sysfs interface ************************/ + +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", min_sampling_rate); +} + +define_one_global_ro(sampling_rate_min); + +/* cpufreq_pegasusq Governor Tunables */ +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ +} +show_one(sampling_rate, sampling_rate); +show_one(io_is_busy, io_is_busy); +show_one(up_threshold, up_threshold); +show_one(sampling_down_factor, sampling_down_factor); +show_one(ignore_nice_load, ignore_nice); +show_one(down_differential, down_differential); +show_one(freq_step, freq_step); +show_one(cpu_up_rate, cpu_up_rate); +show_one(cpu_down_rate, cpu_down_rate); +show_one(up_nr_cpus, up_nr_cpus); +show_one(max_cpu_lock, max_cpu_lock); +show_one(dvfs_debug, dvfs_debug); +show_one(up_threshold_at_min_freq, up_threshold_at_min_freq); +show_one(freq_for_responsiveness, freq_for_responsiveness); +static ssize_t show_hotplug_lock(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", atomic_read(&g_hotplug_lock)); +} + +#define show_hotplug_param(file_name, num_core, up_down) \ +static ssize_t show_##file_name##_##num_core##_##up_down \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", file_name[num_core - 1][up_down]); \ +} + +#define store_hotplug_param(file_name, num_core, up_down) \ +static ssize_t store_##file_name##_##num_core##_##up_down \ +(struct kobject *kobj, struct attribute *attr, \ + const char *buf, size_t count) \ +{ \ + unsigned int input; \ + int ret; \ + ret = sscanf(buf, "%u", &input); \ + if (ret != 1) \ + return -EINVAL; \ + file_name[num_core - 1][up_down] = input; \ + return count; \ +} + +show_hotplug_param(hotplug_freq, 1, 1); +show_hotplug_param(hotplug_freq, 2, 0); +show_hotplug_param(hotplug_freq, 2, 1); +show_hotplug_param(hotplug_freq, 3, 0); +show_hotplug_param(hotplug_freq, 3, 1); +show_hotplug_param(hotplug_freq, 4, 0); + +show_hotplug_param(hotplug_rq, 1, 1); +show_hotplug_param(hotplug_rq, 2, 0); +show_hotplug_param(hotplug_rq, 2, 1); +show_hotplug_param(hotplug_rq, 3, 0); +show_hotplug_param(hotplug_rq, 3, 1); +show_hotplug_param(hotplug_rq, 4, 0); + +store_hotplug_param(hotplug_freq, 1, 1); +store_hotplug_param(hotplug_freq, 2, 0); +store_hotplug_param(hotplug_freq, 2, 1); +store_hotplug_param(hotplug_freq, 3, 0); +store_hotplug_param(hotplug_freq, 3, 1); +store_hotplug_param(hotplug_freq, 4, 0); + +store_hotplug_param(hotplug_rq, 1, 1); +store_hotplug_param(hotplug_rq, 2, 0); +store_hotplug_param(hotplug_rq, 2, 1); +store_hotplug_param(hotplug_rq, 3, 0); +store_hotplug_param(hotplug_rq, 3, 1); +store_hotplug_param(hotplug_rq, 4, 0); + +define_one_global_rw(hotplug_freq_1_1); +define_one_global_rw(hotplug_freq_2_0); +define_one_global_rw(hotplug_freq_2_1); +define_one_global_rw(hotplug_freq_3_0); +define_one_global_rw(hotplug_freq_3_1); +define_one_global_rw(hotplug_freq_4_0); + +define_one_global_rw(hotplug_rq_1_1); +define_one_global_rw(hotplug_rq_2_0); +define_one_global_rw(hotplug_rq_2_1); +define_one_global_rw(hotplug_rq_3_0); +define_one_global_rw(hotplug_rq_3_1); +define_one_global_rw(hotplug_rq_4_0); + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); + return count; +} + +static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.io_is_busy = !!input; + return count; +} + +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD) { + return -EINVAL; + } + dbs_tuners_ins.up_threshold = input; + return count; +} + +static ssize_t store_sampling_down_factor(struct kobject *a, + struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input, j; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + dbs_tuners_ins.sampling_down_factor = input; + + /* Reset down sampling multiplier in case it was active */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->rate_mult = 1; + } + return count; +} + +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ + return count; + } + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->prev_cpu_idle = + get_cpu_idle_time(j, &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + } + return count; +} + +static ssize_t store_down_differential(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.down_differential = min(input, 100u); + return count; +} + +static ssize_t store_freq_step(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.freq_step = min(input, 100u); + return count; +} + +static ssize_t store_cpu_up_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.cpu_up_rate = min(input, MAX_HOTPLUG_RATE); + return count; +} + +static ssize_t store_cpu_down_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.cpu_down_rate = min(input, MAX_HOTPLUG_RATE); + return count; +} + + +static ssize_t store_up_nr_cpus(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.up_nr_cpus = min(input, num_possible_cpus()); + return count; +} + +static ssize_t store_max_cpu_lock(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.max_cpu_lock = min(input, num_possible_cpus()); + return count; +} + +static ssize_t store_hotplug_lock(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + int prev_lock; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + input = min(input, num_possible_cpus()); + prev_lock = atomic_read(&dbs_tuners_ins.hotplug_lock); + + if (prev_lock) + cpufreq_pegasusq_cpu_unlock(prev_lock); + + if (input == 0) { + atomic_set(&dbs_tuners_ins.hotplug_lock, 0); + return count; + } + + ret = cpufreq_pegasusq_cpu_lock(input); + if (ret) { + printk(KERN_ERR "[HOTPLUG] already locked with smaller value %d < %d\n", + atomic_read(&g_hotplug_lock), input); + return ret; + } + + atomic_set(&dbs_tuners_ins.hotplug_lock, input); + + return count; +} + +static ssize_t store_dvfs_debug(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.dvfs_debug = input > 0; + return count; +} + +static ssize_t store_up_threshold_at_min_freq(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD) { + return -EINVAL; + } + dbs_tuners_ins.up_threshold_at_min_freq = input; + return count; +} + +static ssize_t store_freq_for_responsiveness(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.freq_for_responsiveness = input; + return count; +} + +define_one_global_rw(sampling_rate); +define_one_global_rw(io_is_busy); +define_one_global_rw(up_threshold); +define_one_global_rw(sampling_down_factor); +define_one_global_rw(ignore_nice_load); +define_one_global_rw(down_differential); +define_one_global_rw(freq_step); +define_one_global_rw(cpu_up_rate); +define_one_global_rw(cpu_down_rate); +define_one_global_rw(up_nr_cpus); +define_one_global_rw(max_cpu_lock); +define_one_global_rw(hotplug_lock); +define_one_global_rw(dvfs_debug); +define_one_global_rw(up_threshold_at_min_freq); +define_one_global_rw(freq_for_responsiveness); + +static struct attribute *dbs_attributes[] = { + &sampling_rate_min.attr, + &sampling_rate.attr, + &up_threshold.attr, + &sampling_down_factor.attr, + &ignore_nice_load.attr, + &io_is_busy.attr, + &down_differential.attr, + &freq_step.attr, + &cpu_up_rate.attr, + &cpu_down_rate.attr, + &up_nr_cpus.attr, + /* priority: hotplug_lock > max_cpu_lock */ + &max_cpu_lock.attr, + &hotplug_lock.attr, + &dvfs_debug.attr, + &hotplug_freq_1_1.attr, + &hotplug_freq_2_0.attr, + &hotplug_freq_2_1.attr, + &hotplug_freq_3_0.attr, + &hotplug_freq_3_1.attr, + &hotplug_freq_4_0.attr, + &hotplug_rq_1_1.attr, + &hotplug_rq_2_0.attr, + &hotplug_rq_2_1.attr, + &hotplug_rq_3_0.attr, + &hotplug_rq_3_1.attr, + &hotplug_rq_4_0.attr, + &up_threshold_at_min_freq.attr, + &freq_for_responsiveness.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "pegasusq", +}; + +/************************** sysfs end ************************/ + +static void cpu_up_work(struct work_struct *work) +{ + int cpu; + int online = num_online_cpus(); + int nr_up = dbs_tuners_ins.up_nr_cpus; + int hotplug_lock = atomic_read(&g_hotplug_lock); + if (hotplug_lock) + nr_up = hotplug_lock - online; + + if (online == 1) { + printk(KERN_ERR "CPU_UP 3\n"); + cpu_up(num_possible_cpus() - 1); + nr_up -= 1; + } + + for_each_cpu_not(cpu, cpu_online_mask) { + if (nr_up-- == 0) + break; + if (cpu == 0) + continue; + printk(KERN_ERR "CPU_UP %d\n", cpu); + cpu_up(cpu); + } +} + +static void cpu_down_work(struct work_struct *work) +{ + int cpu; + int online = num_online_cpus(); + int nr_down = 1; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock) + nr_down = online - hotplug_lock; + + for_each_online_cpu(cpu) { + if (cpu == 0) + continue; + printk(KERN_ERR "CPU_DOWN %d\n", cpu); + cpu_down(cpu); + if (--nr_down == 0) + break; + } +} + +static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) +{ +#ifndef CONFIG_ARCH_EXYNOS4 + if (p->cur == p->max) + return; +#endif + + __cpufreq_driver_target(p, freq, CPUFREQ_RELATION_L); +} + +/* + * print hotplug debugging info. + * which 1 : UP, 0 : DOWN + */ +static void debug_hotplug_check(int which, int rq_avg, int freq, + struct cpu_usage *usage) +{ + int cpu; + printk(KERN_ERR "CHECK %s rq %d.%02d freq %d [", which ? "up" : "down", + rq_avg / 100, rq_avg % 100, freq); + for_each_online_cpu(cpu) { + printk(KERN_ERR "(%d, %d), ", cpu, usage->load[cpu]); + } + printk(KERN_ERR "]\n"); +} + +static int check_up(void) +{ + int num_hist = hotplug_history->num_hist; + struct cpu_usage *usage; + int freq, rq_avg; + int i; + int up_rate = dbs_tuners_ins.cpu_up_rate; + int up_freq, up_rq; + int min_freq = INT_MAX; + int min_rq_avg = INT_MAX; + int online; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock > 0) + return 0; + + online = num_online_cpus(); + up_freq = hotplug_freq[online - 1][HOTPLUG_UP_INDEX]; + up_rq = hotplug_rq[online - 1][HOTPLUG_UP_INDEX]; + + if (online == num_possible_cpus()) + return 0; + if (dbs_tuners_ins.max_cpu_lock != 0 + && online >= dbs_tuners_ins.max_cpu_lock) + return 0; + + if (num_hist == 0 || num_hist % up_rate) + return 0; + + for (i = num_hist - 1; i >= num_hist - up_rate; --i) { + usage = &hotplug_history->usage[i]; + + freq = usage->freq; + rq_avg = usage->rq_avg; + + min_freq = min(min_freq, freq); + min_rq_avg = min(min_rq_avg, rq_avg); + + if (dbs_tuners_ins.dvfs_debug) + debug_hotplug_check(1, rq_avg, freq, usage); + } + + if (min_freq >= up_freq && min_rq_avg > up_rq) { + printk(KERN_ERR "[HOTPLUG IN] %s %d>=%d && %d>%d\n", + __func__, min_freq, up_freq, min_rq_avg, up_rq); + hotplug_history->num_hist = 0; + return 1; + } + return 0; +} + +static int check_down(void) +{ + int num_hist = hotplug_history->num_hist; + struct cpu_usage *usage; + int freq, rq_avg; + int i; + int down_rate = dbs_tuners_ins.cpu_down_rate; + int down_freq, down_rq; + int max_freq = 0; + int max_rq_avg = 0; + int online; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock > 0) + return 0; + + online = num_online_cpus(); + down_freq = hotplug_freq[online - 1][HOTPLUG_DOWN_INDEX]; + down_rq = hotplug_rq[online - 1][HOTPLUG_DOWN_INDEX]; + + if (online == 1) + return 0; + + if (dbs_tuners_ins.max_cpu_lock != 0 + && online > dbs_tuners_ins.max_cpu_lock) + return 1; + + if (num_hist == 0 || num_hist % down_rate) + return 0; + + for (i = num_hist - 1; i >= num_hist - down_rate; --i) { + usage = &hotplug_history->usage[i]; + + freq = usage->freq; + rq_avg = usage->rq_avg; + + max_freq = max(max_freq, freq); + max_rq_avg = max(max_rq_avg, rq_avg); + + if (dbs_tuners_ins.dvfs_debug) + debug_hotplug_check(0, rq_avg, freq, usage); + } + + if (max_freq <= down_freq && max_rq_avg <= down_rq) { + printk(KERN_ERR "[HOTPLUG OUT] %s %d<=%d && %d<%d\n", + __func__, max_freq, down_freq, max_rq_avg, down_rq); + hotplug_history->num_hist = 0; + return 1; + } + + return 0; +} + +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) +{ + unsigned int max_load_freq; + + struct cpufreq_policy *policy; + unsigned int j; + int num_hist = hotplug_history->num_hist; + int max_hotplug_rate = max(dbs_tuners_ins.cpu_up_rate, + dbs_tuners_ins.cpu_down_rate); + int up_threshold = dbs_tuners_ins.up_threshold; + + policy = this_dbs_info->cur_policy; + + hotplug_history->usage[num_hist].freq = policy->cur; + hotplug_history->usage[num_hist].rq_avg = get_nr_run_avg(); + ++hotplug_history->num_hist; + + /* Get Absolute Load - in terms of freq */ + max_load_freq = 0; + + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; + cputime64_t prev_wall_time, prev_idle_time, prev_iowait_time; + unsigned int idle_time, wall_time, iowait_time; + unsigned int load, load_freq; + int freq_avg; + + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + prev_wall_time = j_dbs_info->prev_cpu_wall; + prev_idle_time = j_dbs_info->prev_cpu_idle; + prev_iowait_time = j_dbs_info->prev_cpu_iowait; + + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); + + wall_time = (unsigned int) cputime64_sub(cur_wall_time, + prev_wall_time); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + idle_time = (unsigned int) cputime64_sub(cur_idle_time, + prev_idle_time); + j_dbs_info->prev_cpu_idle = cur_idle_time; + + iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, + prev_iowait_time); + j_dbs_info->prev_cpu_iowait = cur_iowait_time; + + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, + j_dbs_info->prev_cpu_nice); + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + + j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + + if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) + idle_time -= iowait_time; + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + load = 100 * (wall_time - idle_time) / wall_time; + hotplug_history->usage[num_hist].load[j] = load; + + freq_avg = __cpufreq_driver_getavg(policy, j); + if (freq_avg <= 0) + freq_avg = policy->cur; + + load_freq = load * freq_avg; + if (load_freq > max_load_freq) + max_load_freq = load_freq; + } + + /* Check for CPU hotplug */ + if (check_up()) { + queue_work_on(this_dbs_info->cpu, dvfs_workqueue, + &this_dbs_info->up_work); + } else if (check_down()) { + queue_work_on(this_dbs_info->cpu, dvfs_workqueue, + &this_dbs_info->down_work); + } + if (hotplug_history->num_hist == max_hotplug_rate) + hotplug_history->num_hist = 0; + + /* Check for frequency increase */ + if (policy->cur < dbs_tuners_ins.freq_for_responsiveness) { + up_threshold = dbs_tuners_ins.up_threshold_at_min_freq; + } + + if (max_load_freq > up_threshold * policy->cur) { + int inc = (policy->max * dbs_tuners_ins.freq_step) / 100; + int target = min(policy->max, policy->cur + inc); + /* If switching to max speed, apply sampling_down_factor */ + if (policy->cur < policy->max && target == policy->max) + this_dbs_info->rate_mult = + dbs_tuners_ins.sampling_down_factor; + dbs_freq_increase(policy, target); + return; + } + + /* Check for frequency decrease */ +#ifndef CONFIG_ARCH_EXYNOS4 + /* if we cannot reduce the frequency anymore, break out early */ + if (policy->cur == policy->min) + return; +#endif + + /* + * The optimal frequency is the frequency that is the lowest that + * can support the current CPU usage without triggering the up + * policy. To be safe, we focus DOWN_DIFFERENTIAL points under + * the threshold. + */ + if (max_load_freq < + (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * + policy->cur) { + unsigned int freq_next; + unsigned int down_thres; + + freq_next = max_load_freq / + (dbs_tuners_ins.up_threshold - + dbs_tuners_ins.down_differential); + + /* No longer fully busy, reset rate_mult */ + this_dbs_info->rate_mult = 1; + + if (freq_next < policy->min) + freq_next = policy->min; + + + down_thres = dbs_tuners_ins.up_threshold_at_min_freq + - dbs_tuners_ins.down_differential; + + if (freq_next < dbs_tuners_ins.freq_for_responsiveness + && (max_load_freq / freq_next) > down_thres) + freq_next = dbs_tuners_ins.freq_for_responsiveness; + + if (policy->cur == freq_next) + return; + + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + } +} + +static void do_dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info_s *dbs_info = + container_of(work, struct cpu_dbs_info_s, work.work); + unsigned int cpu = dbs_info->cpu; + int delay; + + mutex_lock(&dbs_info->timer_mutex); + + dbs_check_cpu(dbs_info); + /* We want all CPUs to do sampling nearly on + * same jiffy + */ + delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate + * dbs_info->rate_mult); + + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + queue_delayed_work_on(cpu, dvfs_workqueue, &dbs_info->work, delay); + mutex_unlock(&dbs_info->timer_mutex); +} + +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) +{ + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(DEF_START_DELAY * 1000 * 1000 + + dbs_tuners_ins.sampling_rate); + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); + INIT_WORK(&dbs_info->up_work, cpu_up_work); + INIT_WORK(&dbs_info->down_work, cpu_down_work); + + queue_delayed_work_on(dbs_info->cpu, dvfs_workqueue, + &dbs_info->work, delay + 2 * HZ); +} + +static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) +{ + cancel_delayed_work_sync(&dbs_info->work); + cancel_work_sync(&dbs_info->up_work); + cancel_work_sync(&dbs_info->down_work); +} + +static int pm_notifier_call(struct notifier_block *this, + unsigned long event, void *ptr) +{ + static unsigned int prev_hotplug_lock; + switch (event) { + case PM_SUSPEND_PREPARE: + prev_hotplug_lock = atomic_read(&g_hotplug_lock); + atomic_set(&g_hotplug_lock, 1); + apply_hotplug_lock(); + pr_debug("%s enter suspend\n", __func__); + return NOTIFY_OK; + case PM_POST_RESTORE: + case PM_POST_SUSPEND: + atomic_set(&g_hotplug_lock, prev_hotplug_lock); + if (prev_hotplug_lock) + apply_hotplug_lock(); + prev_hotplug_lock = 0; + pr_debug("%s exit suspend\n", __func__); + return NOTIFY_OK; + } + return NOTIFY_DONE; +} + +static struct notifier_block pm_notifier = { + .notifier_call = pm_notifier_call, +}; + +static int reboot_notifier_call(struct notifier_block *this, + unsigned long code, void *_cmd) +{ + atomic_set(&g_hotplug_lock, 1); + return NOTIFY_DONE; +} + +static struct notifier_block reboot_notifier = { + .notifier_call = reboot_notifier_call, +}; + +#ifdef CONFIG_HAS_EARLYSUSPEND +static struct early_suspend early_suspend; +unsigned int prev_freq_step; +unsigned int prev_sampling_rate; +static void cpufreq_pegasusq_early_suspend(struct early_suspend *h) +{ + dbs_tuners_ins.early_suspend = + atomic_read(&g_hotplug_lock); + prev_freq_step = dbs_tuners_ins.freq_step; + prev_sampling_rate = dbs_tuners_ins.sampling_rate; + dbs_tuners_ins.freq_step = 20; + dbs_tuners_ins.sampling_rate *= 4; + atomic_set(&g_hotplug_lock, 1); + apply_hotplug_lock(); + stop_rq_work(); +} +static void cpufreq_pegasusq_late_resume(struct early_suspend *h) +{ + atomic_set(&g_hotplug_lock, dbs_tuners_ins.early_suspend); + dbs_tuners_ins.early_suspend = -1; + dbs_tuners_ins.freq_step = prev_freq_step; + dbs_tuners_ins.sampling_rate = prev_sampling_rate; + apply_hotplug_lock(); + start_rq_work(); +} +#endif + +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int j; + int rc; + + this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + dbs_tuners_ins.max_freq = policy->max; + dbs_tuners_ins.min_freq = policy->min; + hotplug_history->num_hist = 0; + start_rq_work(); + + mutex_lock(&dbs_mutex); + + dbs_enable++; + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) { + j_dbs_info->prev_cpu_nice = + kstat_cpu(j).cpustat.nice; + } + } + this_dbs_info->cpu = cpu; + this_dbs_info->rate_mult = 1; + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + + min_sampling_rate = MIN_SAMPLING_RATE; + dbs_tuners_ins.sampling_rate = DEF_SAMPLING_RATE; + dbs_tuners_ins.io_is_busy = 0; + } + mutex_unlock(&dbs_mutex); + + register_reboot_notifier(&reboot_notifier); + + mutex_init(&this_dbs_info->timer_mutex); + dbs_timer_init(this_dbs_info); + +#ifdef CONFIG_HAS_EARLYSUSPEND + register_early_suspend(&early_suspend); +#endif + break; + + case CPUFREQ_GOV_STOP: +#ifdef CONFIG_HAS_EARLYSUSPEND + unregister_early_suspend(&early_suspend); +#endif + + dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); + mutex_destroy(&this_dbs_info->timer_mutex); + + unregister_reboot_notifier(&reboot_notifier); + + dbs_enable--; + mutex_unlock(&dbs_mutex); + + stop_rq_work(); + + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); + + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&this_dbs_info->timer_mutex); + + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->max, + CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->min, + CPUFREQ_RELATION_L); + + mutex_unlock(&this_dbs_info->timer_mutex); + break; + } + return 0; +} + +static int __init cpufreq_gov_dbs_init(void) +{ + int ret; + + ret = init_rq_avg(); + if (ret) + return ret; + + hotplug_history = kzalloc(sizeof(struct cpu_usage_history), GFP_KERNEL); + if (!hotplug_history) { + pr_err("%s cannot create hotplug history array\n", __func__); + ret = -ENOMEM; + goto err_hist; + } + + dvfs_workqueue = create_workqueue("kpegasusq"); + if (!dvfs_workqueue) { + pr_err("%s cannot create workqueue\n", __func__); + ret = -ENOMEM; + goto err_queue; + } + + ret = cpufreq_register_governor(&cpufreq_gov_pegasusq); + if (ret) + goto err_reg; + +#ifdef CONFIG_HAS_EARLYSUSPEND + early_suspend.level = EARLY_SUSPEND_LEVEL_DISABLE_FB; + early_suspend.suspend = cpufreq_pegasusq_early_suspend; + early_suspend.resume = cpufreq_pegasusq_late_resume; +#endif + + return ret; + +err_reg: + destroy_workqueue(dvfs_workqueue); +err_queue: + kfree(hotplug_history); +err_hist: + kfree(rq_data); + return ret; +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_pegasusq); + destroy_workqueue(dvfs_workqueue); + kfree(hotplug_history); + kfree(rq_data); +} + +MODULE_AUTHOR("ByungChang Cha "); +MODULE_DESCRIPTION("'cpufreq_pegasusq' - A dynamic cpufreq/cpuhotplug governor"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PEGASUSQ +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index f13a8a9af6a..b89ed16f588 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -15,12 +15,27 @@ #include #include +#include +static struct pm_qos_request_list perf_core_lock_min; +static struct pm_qos_request_list perf_core_lock_max; + static int cpufreq_governor_performance(struct cpufreq_policy *policy, unsigned int event) { switch (event) { case CPUFREQ_GOV_START: + pm_qos_update_request(&perf_core_lock_min, + (s32)4); + pm_qos_update_request(&perf_core_lock_max, + (s32)4); + break; + case CPUFREQ_GOV_STOP: + pm_qos_update_request(&perf_core_lock_min, + (s32)PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); + pm_qos_update_request(&perf_core_lock_max, + (s32)PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE); + break; case CPUFREQ_GOV_LIMITS: pr_debug("setting to %u kHz because of event %u\n", policy->max, event); @@ -45,12 +60,18 @@ struct cpufreq_governor cpufreq_gov_performance = { static int __init cpufreq_gov_performance_init(void) { + pm_qos_add_request(&perf_core_lock_min, PM_QOS_MIN_ONLINE_CPUS, + PM_QOS_DEFAULT_VALUE); + pm_qos_add_request(&perf_core_lock_max, PM_QOS_MAX_ONLINE_CPUS, + PM_QOS_DEFAULT_VALUE); return cpufreq_register_governor(&cpufreq_gov_performance); } static void __exit cpufreq_gov_performance_exit(void) { + pm_qos_remove_request(&perf_core_lock_min); + pm_qos_remove_request(&perf_core_lock_max); cpufreq_unregister_governor(&cpufreq_gov_performance); } diff --git a/drivers/cpufreq/cpufreq_touchdemand.c b/drivers/cpufreq/cpufreq_touchdemand.c new file mode 100644 index 00000000000..2b56894521c --- /dev/null +++ b/drivers/cpufreq/cpufreq_touchdemand.c @@ -0,0 +1,1161 @@ +/* + * drivers/cpufreq/cpufreq_touchdemand.c + * + * Touch-Demand + * - Cpu scaling governor based on ondemand + * - Modified for Tegra 3, the LP core, and touchscreen response + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../arch/arm/mach-tegra/clock.h" +#include "../../arch/arm/mach-tegra/pm.h" + +#include +static struct pm_qos_request_list touch_min_cpu_req; +unsigned int min_cpus_lock; + +/* + * dbs is used in this file as a shortform for demandbased switching + * It helps to keep variable names smaller, simpler + */ + +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define DEF_FREQUENCY_UP_THRESHOLD (90) +#define DEF_SAMPLING_DOWN_FACTOR (4) +#define MAX_SAMPLING_DOWN_FACTOR (100000) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define MICRO_FREQUENCY_UP_THRESHOLD (90) +#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) +#define MIN_FREQUENCY_UP_THRESHOLD (11) +#define MAX_FREQUENCY_UP_THRESHOLD (100) +#define DEF_SAMPLING_RATE (20000) +#define DEF_IO_IS_BUSY (1) + +/* + * The polling frequency of this governor depends on the capability of + * the processor. Default polling frequency is 1000 times the transition + * latency of the processor. The governor will work on any processor with + * transition latency <= 10mS, using appropriate sampling + * rate. + * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) + * this governor will not work. + * All times here are in uS. + */ +#define MIN_SAMPLING_RATE_RATIO (2) + +static unsigned int min_sampling_rate; +static unsigned int def_sampling_rate; + +#define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (100) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + +static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +/* lpcpu variables */ +static struct clk *cpu_lp_clk; +static unsigned int idle_top_freq; + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND +static +#endif +struct cpufreq_governor cpufreq_gov_touchdemand = { + .name = "touchdemand", + .governor = cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + +struct cpu_dbs_info_s { + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_iowait; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_lo; + unsigned int freq_lo_jiffies; + unsigned int freq_hi_jiffies; + unsigned int rate_mult; + int cpu; + unsigned int sample_type:1; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; +}; + +static DEFINE_PER_CPU(struct cpu_dbs_info_s, td_cpu_dbs_info); + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +/* + * dbs_mutex protects dbs_enable in governor start/stop. + */ +static DEFINE_MUTEX(dbs_mutex); + +static struct dbs_tuners { + unsigned int sampling_rate; + unsigned int up_threshold; + unsigned int down_differential; + unsigned int ignore_nice; + unsigned int sampling_down_factor; + unsigned int powersave_bias; + unsigned int io_is_busy; + unsigned int touch_floor_freq; + unsigned int touch_floor_time; + unsigned int touch_min_cores; + unsigned int touch_factor; + unsigned int touch_poke; + unsigned int origin_sampling_rate; +} dbs_tuners_ins = { + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, + .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, + .ignore_nice = 0, + .powersave_bias = 0, + .touch_floor_freq = 475000, + .touch_floor_time = 1000, + .touch_min_cores = 0, + .touch_factor = 4, + .touch_poke = 1, +}; + +static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, + cputime64_t *wall) +{ + cputime64_t idle_time; + cputime64_t cur_wall_time; + cputime64_t busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, + kstat_cpu(cpu).cpustat.system); + + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); + + idle_time = cputime64_sub(cur_wall_time, busy_time); + if (wall) + *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); + + return (cputime64_t)jiffies_to_usecs(idle_time); +} + +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, wall); + + if (idle_time == -1ULL) + return get_cpu_idle_time_jiffy(cpu, wall); + + return idle_time; +} + +static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) +{ + u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + + if (iowait_time == -1ULL) + return 0; + + return iowait_time; +} + +/* + * Find right freq to be set now with powersave_bias on. + * Returns the freq_hi to be used right now and will set freq_hi_jiffies, + * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. + */ +static unsigned int powersave_bias_target(struct cpufreq_policy *policy, + unsigned int freq_next, + unsigned int relation) +{ + unsigned int freq_req, freq_reduc, freq_avg; + unsigned int freq_hi, freq_lo; + unsigned int index = 0; + unsigned int jiffies_total, jiffies_hi, jiffies_lo; + struct cpu_dbs_info_s *dbs_info = &per_cpu(td_cpu_dbs_info, + policy->cpu); + + if (!dbs_info->freq_table) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_next; + } + + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, + relation, &index); + freq_req = dbs_info->freq_table[index].frequency; + freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; + freq_avg = freq_req - freq_reduc; + + /* Find freq bounds for freq_avg in freq_table */ + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_H, &index); + freq_lo = dbs_info->freq_table[index].frequency; + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_L, &index); + freq_hi = dbs_info->freq_table[index].frequency; + + /* Find out how long we have to be in hi and lo freqs */ + if (freq_hi == freq_lo) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_lo; + } + jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + jiffies_hi = (freq_avg - freq_lo) * jiffies_total; + jiffies_hi += ((freq_hi - freq_lo) / 2); + jiffies_hi /= (freq_hi - freq_lo); + jiffies_lo = jiffies_total - jiffies_hi; + dbs_info->freq_lo = freq_lo; + dbs_info->freq_lo_jiffies = jiffies_lo; + dbs_info->freq_hi_jiffies = jiffies_hi; + return freq_hi; +} + +static void touchdemand_powersave_bias_init_cpu(int cpu) +{ + struct cpu_dbs_info_s *dbs_info = &per_cpu(td_cpu_dbs_info, cpu); + dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + dbs_info->freq_lo = 0; +} + +static void touchdemand_powersave_bias_init(void) +{ + int i; + for_each_online_cpu(i) { + touchdemand_powersave_bias_init_cpu(i); + } +} + +/************************** sysfs interface ************************/ + +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", min_sampling_rate); +} + +define_one_global_ro(sampling_rate_min); + +/* cpufreq_touchdemand Governor Tunables */ +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ +} +show_one(sampling_rate, sampling_rate); +show_one(io_is_busy, io_is_busy); +show_one(up_threshold, up_threshold); +show_one(sampling_down_factor, sampling_down_factor); +show_one(down_differential, down_differential); +show_one(ignore_nice_load, ignore_nice); +show_one(powersave_bias, powersave_bias); +show_one(touch_floor_freq, touch_floor_freq); +show_one(touch_floor_time, touch_floor_time); +show_one(touch_min_cores, touch_min_cores); +show_one(touch_factor, touch_factor); +show_one(touch_poke, touch_poke); + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); + dbs_tuners_ins.origin_sampling_rate = dbs_tuners_ins.sampling_rate; + return count; +} + +static ssize_t store_touch_floor_freq(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.touch_floor_freq = input; + + return count; +} + +static ssize_t store_touch_floor_time(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.touch_floor_time = input; + + return count; +} + +static ssize_t store_touch_min_cores(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 4) + input = 4; + + dbs_tuners_ins.touch_min_cores = input; + + /* Make sure touch lock gets reset */ + pm_qos_update_request(&touch_min_cpu_req, + (s32)PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); + min_cpus_lock = 0; + + return count; +} + +static ssize_t store_touch_factor(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.touch_factor = input; + + return count; +} + +static unsigned int Touch_poke_attr[4] = {1300000, 1200000, 1000000, 0}; +static unsigned int Touch_poke_boost = 1; +static unsigned long Touch_poke_boost_till_jiffies = 0; + +static ssize_t store_touch_poke(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + int ret; + ret = sscanf(buf, "%u,%u,%u,%u,%u", &Touch_poke_attr[0], &Touch_poke_attr[1], + &Touch_poke_attr[2], &Touch_poke_attr[3], &Touch_poke_boost); + if (ret < 4) + return -EINVAL; + + if (ret != 5) + Touch_poke_boost = 0; + + if(Touch_poke_attr[0] == 0) + dbs_tuners_ins.touch_poke = 0; + else + dbs_tuners_ins.touch_poke = 1; + + return count; +} + +static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.io_is_busy = !!input; + return count; +} + +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD) { + return -EINVAL; + } + dbs_tuners_ins.up_threshold = input; + return count; +} + +static ssize_t store_down_differential(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if(ret != 1) { + return -EINVAL; + } + dbs_tuners_ins.down_differential = input; + return count; +} + +static ssize_t store_sampling_down_factor(struct kobject *a, + struct attribute *b, const char *buf, size_t count) +{ + unsigned int input, j; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + dbs_tuners_ins.sampling_down_factor = input; + + /* Reset down sampling multiplier in case it was active */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(td_cpu_dbs_info, j); + dbs_info->rate_mult = 1; + } + return count; +} + +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ + return count; + } + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(td_cpu_dbs_info, j); + dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + + } + return count; +} + +static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + if (input > 1000) + input = 1000; + + dbs_tuners_ins.powersave_bias = input; + touchdemand_powersave_bias_init(); + return count; +} + +define_one_global_rw(sampling_rate); +define_one_global_rw(io_is_busy); +define_one_global_rw(up_threshold); +define_one_global_rw(down_differential); +define_one_global_rw(sampling_down_factor); +define_one_global_rw(ignore_nice_load); +define_one_global_rw(powersave_bias); +define_one_global_rw(touch_floor_freq); +define_one_global_rw(touch_floor_time); +define_one_global_rw(touch_min_cores); +define_one_global_rw(touch_factor); +define_one_global_rw(touch_poke); + +static struct attribute *dbs_attributes[] = { + &sampling_rate_min.attr, + &sampling_rate.attr, + &up_threshold.attr, + &down_differential.attr, + &sampling_down_factor.attr, + &ignore_nice_load.attr, + &powersave_bias.attr, + &io_is_busy.attr, + &touch_floor_freq.attr, + &touch_floor_time.attr, + &touch_min_cores.attr, + &touch_factor.attr, + &touch_poke.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "touchdemand", +}; + +/************************** sysfs end ************************/ + +static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) +{ + if (dbs_tuners_ins.powersave_bias) + freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H); + //else if (p->cur == p->max) + // return; + + __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ? + CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); +} + +int set_touch_floor_freq(int cpufreq) +{ + dbs_tuners_ins.touch_floor_freq = cpufreq; + return 0; +} + +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) +{ + unsigned int max_load_freq; + unsigned int debug_freq; + unsigned int debug_load; + unsigned int debug_iowait; + unsigned int down_diff; + + struct cpufreq_policy *policy; + unsigned int j; + + this_dbs_info->freq_lo = 0; + policy = this_dbs_info->cur_policy; + + /* + * keep freq for touch boost + */ +// if (Touch_poke_boost_till_jiffies > jiffies) +// return; + + if ((dbs_tuners_ins.touch_min_cores >= 2) && (Touch_poke_boost_till_jiffies < jiffies) && (min_cpus_lock == 1)) { + min_cpus_lock = 0; + pm_qos_update_request(&touch_min_cpu_req, + (s32)PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); + } + + /* + * Every sampling_rate, we check, if current idle time is less + * than 20% (default), then we try to increase frequency + * Every sampling_rate, we look for a the lowest + * frequency which can sustain the load while keeping idle time over + * 30%. If such a frequency exist, we try to decrease to this frequency. + * + * Any frequency increase takes it to the maximum frequency. + * Frequency reduction happens at minimum steps of + * 5% (default) of current frequency + */ + + /* Get Absolute Load - in terms of freq */ + max_load_freq = 0; + + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; + unsigned int idle_time, wall_time, iowait_time; + unsigned int load, load_freq; + int freq_avg; + + j_dbs_info = &per_cpu(td_cpu_dbs_info, j); + + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); + + wall_time = (unsigned int) cputime64_sub(cur_wall_time, + j_dbs_info->prev_cpu_wall); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + idle_time = (unsigned int) cputime64_sub(cur_idle_time, + j_dbs_info->prev_cpu_idle); + j_dbs_info->prev_cpu_idle = cur_idle_time; + + iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, + j_dbs_info->prev_cpu_iowait); + j_dbs_info->prev_cpu_iowait = cur_iowait_time; + + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, + j_dbs_info->prev_cpu_nice); + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + + j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + + /* + * For the purpose of touchdemand, waiting for disk IO is an + * indication that you're performance critical, and not that + * the system is actually idle. So subtract the iowait time + * from the cpu idle time. + */ + + if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) + idle_time -= iowait_time; + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + load = 100 * (wall_time - idle_time) / wall_time; + + freq_avg = __cpufreq_driver_getavg(policy, j); + if (freq_avg <= 0) + freq_avg = policy->cur; + + load_freq = load * freq_avg; + if (load_freq > max_load_freq) { + max_load_freq = load_freq; + debug_load = load; + debug_iowait = 100 * iowait_time / wall_time; + } + } + + /* Check for frequency increase */ + if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { + /* If switching to max speed, apply sampling_down_factor */ + if (policy->cur < policy->max) + this_dbs_info->rate_mult = + dbs_tuners_ins.sampling_down_factor; + debug_freq = policy->max; + dbs_freq_increase(policy, policy->max); + return; + } + + /* Check for frequency decrease */ + /* if we cannot reduce the frequency anymore, break out early */ + + if (policy->cur == policy->min) + return; + + /* + * The optimal frequency is the frequency that is the lowest that + * can support the current CPU usage without triggering the up + * policy. To be safe, we focus 10 points under the threshold. + */ + if (Touch_poke_boost_till_jiffies > jiffies) { + down_diff = (dbs_tuners_ins.down_differential * dbs_tuners_ins.touch_factor); + } else { + down_diff = dbs_tuners_ins.down_differential; + } + + if (max_load_freq < + (dbs_tuners_ins.up_threshold - down_diff) * + policy->cur) { + unsigned int freq_next; + unsigned int freq_min; + freq_next = max_load_freq / + (dbs_tuners_ins.up_threshold - + down_diff); + + /* No longer fully busy, reset rate_mult */ + this_dbs_info->rate_mult = 1; + + if (Touch_poke_boost_till_jiffies > jiffies) { + if (is_lp_cluster()) { + freq_min = idle_top_freq; + } else { + if (dbs_tuners_ins.touch_floor_freq >= policy->min) { + freq_min = dbs_tuners_ins.touch_floor_freq; + } else { + freq_min = policy->min; + } + } + } else { + freq_min = policy->min; + } + + if (freq_next < freq_min) + freq_next = freq_min; + + if (!dbs_tuners_ins.powersave_bias) { + debug_freq = freq_next; + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + } else { + int freq = powersave_bias_target(policy, freq_next, + CPUFREQ_RELATION_L); + debug_freq = freq; + __cpufreq_driver_target(policy, freq, + CPUFREQ_RELATION_L); + } + } +} + +static void do_dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info_s *dbs_info = + container_of(work, struct cpu_dbs_info_s, work.work); + unsigned int cpu = dbs_info->cpu; + int sample_type = dbs_info->sample_type; + + int delay; + + mutex_lock(&dbs_info->timer_mutex); + + /* Common NORMAL_SAMPLE setup */ + dbs_info->sample_type = DBS_NORMAL_SAMPLE; + if (!dbs_tuners_ins.powersave_bias || + sample_type == DBS_NORMAL_SAMPLE) { + dbs_check_cpu(dbs_info); + if (dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + dbs_info->sample_type = DBS_SUB_SAMPLE; + delay = dbs_info->freq_hi_jiffies; + } else { + /* We want all CPUs to do sampling nearly on + * same jiffy + */ + delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate + * dbs_info->rate_mult); + + if (num_online_cpus() > 1) + delay -= jiffies % delay; + } + } else { + __cpufreq_driver_target(dbs_info->cur_policy, + dbs_info->freq_lo, CPUFREQ_RELATION_H); + delay = dbs_info->freq_lo_jiffies; + } + schedule_delayed_work_on(cpu, &dbs_info->work, delay); + mutex_unlock(&dbs_info->timer_mutex); +} + +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) +{ + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + dbs_info->sample_type = DBS_NORMAL_SAMPLE; + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); + schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); +} + +static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) +{ + cancel_delayed_work_sync(&dbs_info->work); +} + +/* + * Not all CPUs want IO time to be accounted as busy; this dependson how + * efficient idling at a higher frequency/voltage is. + * Pavel Machek says this is not so for various generations of AMD and old + * Intel systems. + * Mike Chan (androidlcom) calis this is also not true for ARM. + * Because of this, whitelist specific known (series) of CPUs by default, and + * leave all others up to the user. + */ +static int should_io_be_busy(void) +{ +#if defined(CONFIG_X86) + /* + * For Intel, Core 2 (model 15) andl later have an efficient idle. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model >= 15) + return 1; +#endif + return DEF_IO_IS_BUSY; +} + +#define AID_SYSTEM (1000) +static void dbs_chown(void) +{ + int ret; + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/ignore_nice_load", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown ignore_nice_load error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/io_is_busy", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown io_is_busy error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/powersave_bias", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown powersave_bias error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/sampling_down_factor", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown sampling_down_factor error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/sampling_rate", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown sampling_rate error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/touch_floor_freq", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown touch_floor_freq error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/touch_floor_time", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown touch_floor_time error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/touch_min_cores", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown touch_min_cores error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/touch_factor", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown touch_factor error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/up_threshold", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown up_threshold error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/down_differential", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown down_differential error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/touch_poke", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown touch_poke error: %d", ret); +} + +static void dbs_refresh_callback(struct work_struct *unused) +{ + struct cpufreq_policy *policy; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int nr_cpus; + unsigned int touch_poke_freq; + unsigned int cpu = smp_processor_id(); + + if (lock_policy_rwsem_write(cpu) < 0) + return; + + this_dbs_info = &per_cpu(td_cpu_dbs_info, cpu); + policy = this_dbs_info->cur_policy; + + if (Touch_poke_boost) + Touch_poke_boost_till_jiffies = + jiffies + msecs_to_jiffies(dbs_tuners_ins.touch_floor_time); + + /* We poke the frequency base on the online cpu number */ + nr_cpus = num_online_cpus(); + + if (!is_lp_cluster()) + touch_poke_freq = Touch_poke_attr[nr_cpus-1]; + else + touch_poke_freq = idle_top_freq; + + if(touch_poke_freq == 0 || policy->cur >= touch_poke_freq){ + unlock_policy_rwsem_write(cpu); + return; + } + + __cpufreq_driver_target(policy, touch_poke_freq, + CPUFREQ_RELATION_L); + this_dbs_info->prev_cpu_idle = get_cpu_idle_time(cpu, + &this_dbs_info->prev_cpu_wall); + + if ((dbs_tuners_ins.touch_min_cores >= 2) && (!is_lp_cluster()) && (min_cpus_lock == 0)) { + pm_qos_update_request(&touch_min_cpu_req, + (s32)dbs_tuners_ins.touch_min_cores); + min_cpus_lock = 1; + } + + unlock_policy_rwsem_write(cpu); +} + +static DECLARE_WORK(dbs_refresh_work, dbs_refresh_callback); + +static void dbs_input_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + if (dbs_tuners_ins.touch_poke) + schedule_work(&dbs_refresh_work); +} + +static int input_dev_filter(const char* input_dev_name) +{ + int ret = 0; + if (strstr(input_dev_name, "touchscreen") || + strstr(input_dev_name, "-keypad") || + strstr(input_dev_name, "-nav") || + strstr(input_dev_name, "-oj")) { + } + else { + ret = 1; + } + return ret; +} + + +static int dbs_input_connect(struct input_handler *handler, + struct input_dev *dev, const struct input_device_id *id) +{ + struct input_handle *handle; + int error; + + /* filter out those input_dev that we don't care */ + if (input_dev_filter(dev->name)) + return 0; + + handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = "cpufreq"; + + error = input_register_handle(handle); + if (error) + goto err2; + + error = input_open_device(handle); + if (error) + goto err1; + + return 0; +err1: + input_unregister_handle(handle); +err2: + kfree(handle); + return error; +} + +static void dbs_input_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} + +static const struct input_device_id dbs_ids[] = { + { .driver_info = 1 }, + { }, +}; +static struct input_handler dbs_input_handler = { + .event = dbs_input_event, + .connect = dbs_input_connect, + .disconnect = dbs_input_disconnect, + .name = "cpufreq_ond", + .id_table = dbs_ids, +}; + +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int j; + int rc; + + this_dbs_info = &per_cpu(td_cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + mutex_lock(&dbs_mutex); + + dbs_enable++; + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(td_cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) { + j_dbs_info->prev_cpu_nice = + kstat_cpu(j).cpustat.nice; + } + } + this_dbs_info->cpu = cpu; + this_dbs_info->rate_mult = 1; + touchdemand_powersave_bias_init_cpu(cpu); + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + unsigned int latency; + + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + + dbs_chown(); + + /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + /* Bring kernel and HW constraints together */ + min_sampling_rate = max(min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + dbs_tuners_ins.sampling_rate = + max(min_sampling_rate, + latency * LATENCY_MULTIPLIER); + if (def_sampling_rate) + dbs_tuners_ins.sampling_rate = def_sampling_rate; + dbs_tuners_ins.origin_sampling_rate = dbs_tuners_ins.sampling_rate; + dbs_tuners_ins.io_is_busy = should_io_be_busy(); + } + if (!cpu) + rc = input_register_handler(&dbs_input_handler); + + mutex_unlock(&dbs_mutex); + + mutex_init(&this_dbs_info->timer_mutex); + dbs_timer_init(this_dbs_info); + break; + + case CPUFREQ_GOV_STOP: + dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); + mutex_destroy(&this_dbs_info->timer_mutex); + dbs_enable--; + + if (!cpu) + input_unregister_handler(&dbs_input_handler); + + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); + mutex_unlock(&dbs_mutex); + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&this_dbs_info->timer_mutex); + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->min, CPUFREQ_RELATION_L); + mutex_unlock(&this_dbs_info->timer_mutex); + break; + } + return 0; +} + +static int __init cpufreq_gov_dbs_init(void) +{ + cputime64_t wall; + u64 idle_time; + int cpu = get_cpu(); + + cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); + idle_top_freq = clk_get_max_rate(cpu_lp_clk) / 1000; + + pm_qos_add_request(&touch_min_cpu_req, PM_QOS_MIN_ONLINE_CPUS, + PM_QOS_DEFAULT_VALUE); + + idle_time = get_cpu_idle_time_us(cpu, &wall); + put_cpu(); + if (idle_time != -1ULL) { + /* Idle micro accounting is supported. Use finer thresholds */ + dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + dbs_tuners_ins.down_differential = + MICRO_FREQUENCY_DOWN_DIFFERENTIAL; + /* + * In no_hz/micro accounting case we set the minimum frequency + * not depending on HZ, but fixed (very low). The deferred + * timer might skip some samples if idle/sleeping as needed. + */ + min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; + } else { + /* For correct statistics, we need 10 ticks for each measure */ + min_sampling_rate = + MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); + } + def_sampling_rate = DEF_SAMPLING_RATE; + + return cpufreq_register_governor(&cpufreq_gov_touchdemand); +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + pm_qos_remove_request(&touch_min_cpu_req); + cpufreq_unregister_governor(&cpufreq_gov_touchdemand); +} + + +MODULE_AUTHOR("Venkatesh Pallipadi "); +MODULE_AUTHOR("Alexey Starikovskiy "); +MODULE_DESCRIPTION("'cpufreq_touchdemand' - A dynamic cpufreq governor " + "based on ondemand modified for Tegra 3 by Metallice"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/drivers/cpuquiet/Kconfig b/drivers/cpuquiet/Kconfig new file mode 100644 index 00000000000..844cd34a69b --- /dev/null +++ b/drivers/cpuquiet/Kconfig @@ -0,0 +1,11 @@ +menu "CPUQUIET Framework" + +config CPUQUIET_FRAMEWORK + bool "Cpuquiet framework" + default n + help + Cpuquiet implements pluggable policies for forcing cpu cores into a + quiescent state. Appropriate policies will save power without hurting + performance. + +endmenu diff --git a/drivers/cpuquiet/Makefile b/drivers/cpuquiet/Makefile new file mode 100644 index 00000000000..e438defaacd --- /dev/null +++ b/drivers/cpuquiet/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_CPUQUIET_FRAMEWORK) += cpuquiet.o driver.o sysfs.o cpuquiet_attribute.o governor.o governors/ diff --git a/drivers/cpuquiet/cpuquiet.c b/drivers/cpuquiet/cpuquiet.c new file mode 100644 index 00000000000..d902af26c8d --- /dev/null +++ b/drivers/cpuquiet/cpuquiet.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include +#include "cpuquiet.h" + +DEFINE_MUTEX(cpuquiet_lock); + +static int __init cpuquiet_init(void) +{ + return cpuquiet_add_class_sysfs(&cpu_sysdev_class); +} + +core_initcall(cpuquiet_init); diff --git a/drivers/cpuquiet/cpuquiet.h b/drivers/cpuquiet/cpuquiet.h new file mode 100644 index 00000000000..fa61946ff11 --- /dev/null +++ b/drivers/cpuquiet/cpuquiet.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#ifndef __DRIVER_CPUQUIET_H +#define __DRIVER_CPUQUIET_H + +#include + +extern struct mutex cpuquiet_lock; +extern struct cpuquiet_governor *cpuquiet_curr_governor; +extern struct list_head cpuquiet_governors; +int cpuquiet_add_class_sysfs(struct sysdev_class *cls); +struct cpuquiet_governor *cpuquiet_find_governor(const char *str); +int cpuquiet_switch_governor(struct cpuquiet_governor *gov); +struct cpuquiet_governor *cpuquiet_get_first_governor(void); +struct cpuquiet_driver *cpuquiet_get_driver(void); +void cpuquiet_add_dev(struct sys_device *sys_dev, unsigned int cpu); +void cpuquiet_remove_dev(unsigned int cpu); +int cpuquiet_cpu_kobject_init(struct kobject *kobj, struct kobj_type *type, + char *name, int cpu); +#endif diff --git a/drivers/cpuquiet/cpuquiet_attribute.c b/drivers/cpuquiet/cpuquiet_attribute.c new file mode 100644 index 00000000000..9f1aa430149 --- /dev/null +++ b/drivers/cpuquiet/cpuquiet_attribute.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include + +ssize_t show_int_attribute(struct cpuquiet_attribute *cattr, char *buf) +{ + return sprintf(buf, "%d\n", *((int *)cattr->param)); +} + +ssize_t store_int_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count) +{ + int err, val; + + err = kstrtoint(buf, 0, &val); + if (err < 0) + return err; + + *((int *)(cattr->param)) = val; + + if (cattr->store_callback) + cattr->store_callback(cattr); + + return count; +} + +ssize_t show_bool_attribute(struct cpuquiet_attribute *cattr, char *buf) +{ + return sprintf(buf, "%d\n", *((bool *)cattr->param)); +} + +ssize_t store_bool_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count) +{ + int err, val; + + err = kstrtoint(buf, 0, &val); + if (err < 0) + return err; + + if (val < 0 || val > 1) + return -EINVAL; + + *((bool *)(cattr->param)) = val; + + if (cattr->store_callback) + cattr->store_callback(cattr); + + return count; +} + +ssize_t show_uint_attribute(struct cpuquiet_attribute *cattr, char *buf) +{ + return sprintf(buf, "%u\n", *((unsigned int *)cattr->param)); +} + +ssize_t store_uint_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count) +{ + int err; + unsigned int val; + + err = kstrtouint(buf, 0, &val); + if (err < 0) + return err; + + *((unsigned int *)(cattr->param)) = val; + + if (cattr->store_callback) + cattr->store_callback(cattr); + + return count; +} + +ssize_t store_ulong_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count) +{ + int err; + unsigned long val; + + err = kstrtoul(buf, 0, &val); + if (err < 0) + return err; + + *((unsigned long *)(cattr->param)) = val; + + if (cattr->store_callback) + cattr->store_callback(cattr); + + return count; +} + +ssize_t show_ulong_attribute(struct cpuquiet_attribute *cattr, + char *buf) +{ + return sprintf(buf, "%lu\n", *((unsigned long *)cattr->param)); +} + +ssize_t cpuquiet_auto_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct cpuquiet_attribute *cattr = + container_of(attr, struct cpuquiet_attribute, attr); + + if (cattr->store) + return cattr->store(cattr, buf, count); + + return -EINVAL; +} + +ssize_t cpuquiet_auto_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpuquiet_attribute *cattr = + container_of(attr, struct cpuquiet_attribute, attr); + + return cattr->show(cattr, buf); +} diff --git a/drivers/cpuquiet/driver.c b/drivers/cpuquiet/driver.c new file mode 100644 index 00000000000..d9dbea76994 --- /dev/null +++ b/drivers/cpuquiet/driver.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "cpuquiet.h" + +struct cpuquiet_cpu_stat { + cputime64_t time_up_total; + u64 last_update; + unsigned int up_down_count; + struct kobject cpu_kobject; +}; + +struct cpu_attribute { + struct attribute attr; + enum { up_down_count, time_up_total } type; +}; + +static struct cpuquiet_driver *cpuquiet_curr_driver; +struct cpuquiet_cpu_stat *stats; + +#define CPU_ATTRIBUTE(_name) \ + static struct cpu_attribute _name ## _attr = { \ + .attr = {.name = __stringify(_name), .mode = 0444 }, \ + .type = _name, \ +} + +CPU_ATTRIBUTE(up_down_count); +CPU_ATTRIBUTE(time_up_total); + +static struct attribute *cpu_attributes[] = { + &up_down_count_attr.attr, + &time_up_total_attr.attr, + NULL, +}; + +static void stats_update(struct cpuquiet_cpu_stat *stat, bool up) +{ + u64 cur_jiffies = get_jiffies_64(); + bool was_up = stat->up_down_count & 0x1; + + if (was_up) + stat->time_up_total = cputime64_add(stat->time_up_total, + cputime64_sub(cur_jiffies, stat->last_update)); + + if (was_up != up) + stat->up_down_count++; + + stat->last_update = cur_jiffies; +} + +int cpuquiet_quiesence_cpu(unsigned int cpunumber) +{ + int err = -EPERM; + + if (cpuquiet_curr_driver && cpuquiet_curr_driver->quiesence_cpu) + err = cpuquiet_curr_driver->quiesence_cpu(cpunumber); + + if (!err) + stats_update(stats + cpunumber, 0); + + return err; +} +EXPORT_SYMBOL(cpuquiet_quiesence_cpu); + +int cpuquiet_wake_cpu(unsigned int cpunumber) +{ + int err = -EPERM; + + if (cpuquiet_curr_driver && cpuquiet_curr_driver->wake_cpu) + err = cpuquiet_curr_driver->wake_cpu(cpunumber); + + if (!err) + stats_update(stats + cpunumber, 1); + + return err; +} +EXPORT_SYMBOL(cpuquiet_wake_cpu); + +static ssize_t stats_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpu_attribute *cattr = + container_of(attr, struct cpu_attribute, attr); + struct cpuquiet_cpu_stat *stat = + container_of(kobj, struct cpuquiet_cpu_stat, cpu_kobject); + ssize_t len = 0; + bool was_up = stat->up_down_count & 0x1; + + stats_update(stat, was_up); + + switch (cattr->type) { + case up_down_count: + len = sprintf(buf, "%u\n", stat->up_down_count); + break; + case time_up_total: + len = sprintf(buf, "%llu\n", stat->time_up_total); + break; + } + + return len; +} + +static const struct sysfs_ops stats_sysfs_ops = { + .show = stats_sysfs_show, +}; + +static struct kobj_type ktype_cpu_stats = { + .sysfs_ops = &stats_sysfs_ops, + .default_attrs = cpu_attributes, +}; + +int cpuquiet_register_driver(struct cpuquiet_driver *drv) +{ + int err = -EBUSY; + unsigned int cpu; + struct sys_device *sys_dev; + u64 cur_jiffies; + + if (!drv) + return -EINVAL; + + stats = kzalloc(nr_cpu_ids * sizeof(*stats), GFP_KERNEL); + if (!stats) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + cur_jiffies = get_jiffies_64(); + stats[cpu].last_update = cur_jiffies; + if (cpu_online(cpu)) + stats[cpu].up_down_count = 1; + sys_dev = get_cpu_sysdev(cpu); + if (sys_dev) { + cpuquiet_add_dev(sys_dev, cpu); + cpuquiet_cpu_kobject_init(&stats[cpu].cpu_kobject, + &ktype_cpu_stats, "stats", cpu); + } + } + + mutex_lock(&cpuquiet_lock); + if (!cpuquiet_curr_driver) { + err = 0; + cpuquiet_curr_driver = drv; + cpuquiet_switch_governor(cpuquiet_get_first_governor()); + } + mutex_unlock(&cpuquiet_lock); + + return err; +} +EXPORT_SYMBOL(cpuquiet_register_driver); + +struct cpuquiet_driver *cpuquiet_get_driver(void) +{ + return cpuquiet_curr_driver; +} + +void cpuquiet_unregister_driver(struct cpuquiet_driver *drv) +{ + unsigned int cpu; + + if (drv != cpuquiet_curr_driver) { + WARN(1, "invalid cpuquiet_unregister_driver(%s)\n", + drv->name); + return; + } + + /* stop current governor first */ + cpuquiet_switch_governor(NULL); + + mutex_lock(&cpuquiet_lock); + cpuquiet_curr_driver = NULL; + + for_each_possible_cpu(cpu) { + kobject_put(&stats[cpu].cpu_kobject); + cpuquiet_remove_dev(cpu); + } + + mutex_unlock(&cpuquiet_lock); +} +EXPORT_SYMBOL(cpuquiet_unregister_driver); diff --git a/drivers/cpuquiet/governor.c b/drivers/cpuquiet/governor.c new file mode 100644 index 00000000000..176ba3bd705 --- /dev/null +++ b/drivers/cpuquiet/governor.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include + +#include "cpuquiet.h" + +LIST_HEAD(cpuquiet_governors); +struct cpuquiet_governor *cpuquiet_curr_governor; + +struct cpuquiet_governor *cpuquiet_get_first_governor(void) +{ + if (!list_empty(&cpuquiet_governors)) + return list_entry(cpuquiet_governors.next, + struct cpuquiet_governor, + governor_list); + else + return NULL; +} + +struct cpuquiet_governor *cpuquiet_find_governor(const char *str) +{ + struct cpuquiet_governor *gov; + + list_for_each_entry(gov, &cpuquiet_governors, governor_list) + if (!strnicmp(str, gov->name, CPUQUIET_NAME_LEN)) + return gov; + + return NULL; +} + +int cpuquiet_switch_governor(struct cpuquiet_governor *gov) +{ + int err = 0; + + if (cpuquiet_curr_governor) { + if (cpuquiet_curr_governor->stop) + cpuquiet_curr_governor->stop(); + module_put(cpuquiet_curr_governor->owner); + } + + cpuquiet_curr_governor = gov; + + if (gov) { + if (!try_module_get(cpuquiet_curr_governor->owner)) + return -EINVAL; + if (gov->start) + err = gov->start(); + if (!err) + cpuquiet_curr_governor = gov; + } + + return err; +} + +int cpuquiet_register_governor(struct cpuquiet_governor *gov) +{ + int ret = -EEXIST; + + if (!gov) + return -EINVAL; + + mutex_lock(&cpuquiet_lock); + if (cpuquiet_find_governor(gov->name) == NULL) { + ret = 0; + list_add_tail(&gov->governor_list, &cpuquiet_governors); + if (!cpuquiet_curr_governor && cpuquiet_get_driver()) + cpuquiet_switch_governor(gov); + } + mutex_unlock(&cpuquiet_lock); + + return ret; +} + +void cpuquiet_unregister_governor(struct cpuquiet_governor *gov) +{ + if (!gov) + return; + + mutex_lock(&cpuquiet_lock); + if (cpuquiet_curr_governor == gov) + cpuquiet_switch_governor(NULL); + list_del(&gov->governor_list); + mutex_unlock(&cpuquiet_lock); +} + +void cpuquiet_device_busy(void) +{ + if (cpuquiet_curr_governor && + cpuquiet_curr_governor->device_busy_notification) + cpuquiet_curr_governor->device_busy_notification(); +} + +void cpuquiet_device_free(void) +{ + if (cpuquiet_curr_governor && + cpuquiet_curr_governor->device_free_notification) + cpuquiet_curr_governor->device_free_notification(); +} diff --git a/drivers/cpuquiet/governors/Makefile b/drivers/cpuquiet/governors/Makefile new file mode 100644 index 00000000000..94c78991f14 --- /dev/null +++ b/drivers/cpuquiet/governors/Makefile @@ -0,0 +1 @@ +obj-y += balanced.o userspace.o runnable_threads.o diff --git a/drivers/cpuquiet/governors/balanced.c b/drivers/cpuquiet/governors/balanced.c new file mode 100644 index 00000000000..b0f88f9a042 --- /dev/null +++ b/drivers/cpuquiet/governors/balanced.c @@ -0,0 +1,505 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CPUNAMELEN 8 + +typedef enum { + CPU_SPEED_BALANCED, + CPU_SPEED_BIASED, + CPU_SPEED_SKEWED, +} CPU_SPEED_BALANCE; + +typedef enum { + IDLE, + DOWN, + UP, +} BALANCED_STATE; + +struct idle_info { + u64 idle_last; + u64 last_timestamp; + u64 idle_current; + u64 timestamp; +}; + +static DEFINE_PER_CPU(struct idle_info, idleinfo); +static DEFINE_PER_CPU(unsigned int, cpu_load); + +static struct timer_list load_timer; +static bool load_timer_active; + +/* configurable parameters */ +static unsigned int balance_level = 60; +static unsigned int idle_bottom_freq; +static unsigned int idle_top_freq; +static unsigned long up_delay; +static unsigned long down_delay; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD +static unsigned long last_change_time; +static unsigned int load_sample_rate = 20; /* msec */ +#endif +static struct workqueue_struct *balanced_wq; +static struct delayed_work balanced_work; +static BALANCED_STATE balanced_state; +static struct kobject *balanced_kobject; + +static void calculate_load_timer(unsigned long data) +{ + int i; + u64 idle_time, elapsed_time; + + if (!load_timer_active) + return; + + for_each_online_cpu(i) { + struct idle_info *iinfo = &per_cpu(idleinfo, i); + unsigned int *load = &per_cpu(cpu_load, i); + + iinfo->idle_last = iinfo->idle_current; + iinfo->last_timestamp = iinfo->timestamp; + iinfo->idle_current = + get_cpu_idle_time_us(i, &iinfo->timestamp); + elapsed_time = iinfo->timestamp - iinfo->last_timestamp; + + idle_time = iinfo->idle_current - iinfo->idle_last; + idle_time *= 100; + do_div(idle_time, elapsed_time); + *load = 100 - idle_time; + } +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + mod_timer(&load_timer, jiffies + msecs_to_jiffies(load_sample_rate)); +#else + mod_timer(&load_timer, jiffies + msecs_to_jiffies(100)); +#endif +} + +static void start_load_timer(void) +{ + int i; + + if (load_timer_active) + return; + + load_timer_active = true; + + for_each_online_cpu(i) { + struct idle_info *iinfo = &per_cpu(idleinfo, i); + + iinfo->idle_current = + get_cpu_idle_time_us(i, &iinfo->timestamp); + } + mod_timer(&load_timer, jiffies + msecs_to_jiffies(100)); +} + +static void stop_load_timer(void) +{ + if (!load_timer_active) + return; + + load_timer_active = false; + del_timer(&load_timer); +} + +static unsigned int get_slowest_cpu_n(void) +{ + unsigned int cpu = nr_cpu_ids; + unsigned long minload = ULONG_MAX; + int i; + + for_each_online_cpu(i) { + unsigned int *load = &per_cpu(cpu_load, i); + + if ((i > 0) && (minload > *load)) { + cpu = i; + minload = *load; + } + } + + return cpu; +} + +static unsigned int cpu_highest_speed(void) +{ + unsigned int maxload = 0; + int i; + + for_each_online_cpu(i) { + unsigned int *load = &per_cpu(cpu_load, i); + + maxload = max(maxload, *load); + } + + return maxload; +} + +static unsigned int count_slow_cpus(unsigned int limit) +{ + unsigned int cnt = 0; + int i; + + for_each_online_cpu(i) { + unsigned int *load = &per_cpu(cpu_load, i); + + if (*load <= limit) + cnt++; + } + + return cnt; +} +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD +#define NR_FSHIFT 2 +static unsigned int nr_run_thresholds[] = { +/* 1, 2, 3, 4 - on-line cpus target */ + 5, 9, 10, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ +}; +static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ +static unsigned int nr_run_last; +#endif + +static CPU_SPEED_BALANCE balanced_speed_balance(void) +{ + unsigned long highest_speed = cpu_highest_speed(); + unsigned long balanced_speed = highest_speed * balance_level / 100; + unsigned long skewed_speed = balanced_speed / 2; + unsigned int nr_cpus = num_online_cpus(); + unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + unsigned int avg_nr_run = avg_nr_running(); + unsigned int nr_run; +#endif + + /* balanced: freq targets for all CPUs are above 50% of highest speed + biased: freq target for at least one CPU is below 50% threshold + skewed: freq targets for at least 2 CPUs are below 25% threshold */ +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) { + unsigned int nr_threshold = nr_run_thresholds[nr_run - 1]; + if (nr_run_last <= nr_run) + nr_threshold += nr_run_hysteresis; + if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT))) + break; + } + nr_run_last = nr_run; + + if (count_slow_cpus(skewed_speed) >= 2 || nr_cpus > max_cpus || + nr_run < nr_cpus) +#else + if (count_slow_cpus(skewed_speed) >= 2 || nr_cpus > max_cpus) +#endif + return CPU_SPEED_SKEWED; + +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + if (count_slow_cpus(balanced_speed) >= 1 || nr_cpus == max_cpus || + nr_run <= nr_cpus) +#else + if (count_slow_cpus(balanced_speed) >= 1 || nr_cpus == max_cpus) +#endif + return CPU_SPEED_BIASED; + + return CPU_SPEED_BALANCED; +} + +static void balanced_work_func(struct work_struct *work) +{ + bool up = false; + unsigned int cpu = nr_cpu_ids; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + unsigned long now = jiffies; +#endif + + CPU_SPEED_BALANCE balance; + + switch (balanced_state) { + case IDLE: + break; + case DOWN: + cpu = get_slowest_cpu_n(); + if (cpu < nr_cpu_ids) { + up = false; + queue_delayed_work(balanced_wq, +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + &balanced_work, up_delay); +#else + &balanced_work, down_delay); +#endif + } else + stop_load_timer(); + break; + case UP: + balance = balanced_speed_balance(); + switch (balance) { + + /* cpu speed is up and balanced - one more on-line */ + case CPU_SPEED_BALANCED: + cpu = cpumask_next_zero(0, cpu_online_mask); + if (cpu < nr_cpu_ids) + up = true; + break; + /* cpu speed is up, but skewed - remove one core */ + case CPU_SPEED_SKEWED: + cpu = get_slowest_cpu_n(); + if (cpu < nr_cpu_ids) + up = false; + break; + /* cpu speed is up, but under-utilized - do nothing */ + case CPU_SPEED_BIASED: + default: + break; + } + queue_delayed_work( + balanced_wq, &balanced_work, up_delay); + break; + default: + pr_err("%s: invalid cpuquiet balanced governor state %d\n", + __func__, balanced_state); + } + +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + if (!up && ((now - last_change_time) < down_delay)) + cpu = nr_cpu_ids; +#endif + + if (cpu < nr_cpu_ids) { +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + last_change_time = now; +#endif + if (up) + cpuquiet_wake_cpu(cpu); + else + cpuquiet_quiesence_cpu(cpu); + } +} + +static int balanced_cpufreq_transition(struct notifier_block *nb, + unsigned long state, void *data) +{ + struct cpufreq_freqs *freqs = data; + unsigned long cpu_freq; + + if (state == CPUFREQ_POSTCHANGE || state == CPUFREQ_RESUMECHANGE) { + cpu_freq = freqs->new; + + switch (balanced_state) { + case IDLE: + if (cpu_freq >= idle_top_freq) { + balanced_state = UP; + queue_delayed_work( + balanced_wq, &balanced_work, up_delay); + start_load_timer(); + } else if (cpu_freq <= idle_bottom_freq) { + balanced_state = DOWN; + queue_delayed_work( + balanced_wq, &balanced_work, + down_delay); + start_load_timer(); + } + break; + case DOWN: + if (cpu_freq >= idle_top_freq) { + balanced_state = UP; + queue_delayed_work( + balanced_wq, &balanced_work, up_delay); + start_load_timer(); + } + break; + case UP: + if (cpu_freq <= idle_bottom_freq) { + balanced_state = DOWN; + queue_delayed_work(balanced_wq, +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + &balanced_work, up_delay); +#else + &balanced_work, down_delay); +#endif + start_load_timer(); + } + break; + default: + pr_err("%s: invalid cpuquiet balanced governor " + "state %d\n", __func__, balanced_state); + } + } + + return NOTIFY_OK; +} + +static struct notifier_block balanced_cpufreq_nb = { + .notifier_call = balanced_cpufreq_transition, +}; + +static void delay_callback(struct cpuquiet_attribute *attr) +{ + unsigned long val; + + if (attr) { + val = (*((unsigned long *)(attr->param))); + (*((unsigned long *)(attr->param))) = msecs_to_jiffies(val); + } +} + +CPQ_BASIC_ATTRIBUTE(balance_level, 0644, uint); +CPQ_BASIC_ATTRIBUTE(idle_bottom_freq, 0644, uint); +CPQ_BASIC_ATTRIBUTE(idle_top_freq, 0644, uint); +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD +CPQ_BASIC_ATTRIBUTE(load_sample_rate, 0644, uint); +#endif +CPQ_ATTRIBUTE(up_delay, 0644, ulong, delay_callback); +CPQ_ATTRIBUTE(down_delay, 0644, ulong, delay_callback); + +static struct attribute *balanced_attributes[] = { + &balance_level_attr.attr, + &idle_bottom_freq_attr.attr, + &idle_top_freq_attr.attr, + &up_delay_attr.attr, + &down_delay_attr.attr, +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + &load_sample_rate_attr.attr, +#endif + NULL, +}; + +static const struct sysfs_ops balanced_sysfs_ops = { + .show = cpuquiet_auto_sysfs_show, + .store = cpuquiet_auto_sysfs_store, +}; + +static struct kobj_type ktype_balanced = { + .sysfs_ops = &balanced_sysfs_ops, + .default_attrs = balanced_attributes, +}; + +static int balanced_sysfs(void) +{ + int err; + + balanced_kobject = kzalloc(sizeof(*balanced_kobject), + GFP_KERNEL); + + if (!balanced_kobject) + return -ENOMEM; + + err = cpuquiet_kobject_init(balanced_kobject, &ktype_balanced, + "balanced"); + + if (err) + kfree(balanced_kobject); + + return err; +} + +static void balanced_stop(void) +{ + /* + first unregister the notifiers. This ensures the governor state + can't be modified by a cpufreq transition + */ + cpufreq_unregister_notifier(&balanced_cpufreq_nb, + CPUFREQ_TRANSITION_NOTIFIER); + + /* now we can force the governor to be idle */ + balanced_state = IDLE; + cancel_delayed_work_sync(&balanced_work); + destroy_workqueue(balanced_wq); + del_timer(&load_timer); + + kobject_put(balanced_kobject); +} + +static int balanced_start(void) +{ + int err, count; + struct cpufreq_frequency_table *table; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + struct cpufreq_freqs initial_freq; +#endif + + err = balanced_sysfs(); + if (err) + return err; + + balanced_wq = alloc_workqueue("cpuquiet-balanced", + WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1); + if (!balanced_wq) + return -ENOMEM; + + INIT_DELAYED_WORK(&balanced_work, balanced_work_func); + +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + up_delay = msecs_to_jiffies(100); + down_delay = msecs_to_jiffies(500); +#else + up_delay = msecs_to_jiffies(1000); + down_delay = msecs_to_jiffies(2000); +#endif + + table = cpufreq_frequency_get_table(0); + for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++); + + idle_top_freq = table[(count / 2) - 1].frequency; + idle_bottom_freq = table[(count / 2) - 2].frequency; + + cpufreq_register_notifier(&balanced_cpufreq_nb, + CPUFREQ_TRANSITION_NOTIFIER); + + init_timer(&load_timer); + load_timer.function = calculate_load_timer; + +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + /*FIXME: Kick start the state machine by faking a freq notification*/ + initial_freq.new = cpufreq_get(0); + if (initial_freq.new != 0) + balanced_cpufreq_transition(NULL, CPUFREQ_RESUMECHANGE, + &initial_freq); +#endif + return 0; +} + +struct cpuquiet_governor balanced_governor = { + .name = "balanced", + .start = balanced_start, + .stop = balanced_stop, + .owner = THIS_MODULE, +}; + +static int __init init_balanced(void) +{ + return cpuquiet_register_governor(&balanced_governor); +} + +static void __exit exit_balanced(void) +{ + cpuquiet_unregister_governor(&balanced_governor); +} + +MODULE_LICENSE("GPL"); +module_init(init_balanced); +module_exit(exit_balanced); + diff --git a/drivers/cpuquiet/governors/runnable_threads.c b/drivers/cpuquiet/governors/runnable_threads.c new file mode 100644 index 00000000000..fe1df969420 --- /dev/null +++ b/drivers/cpuquiet/governors/runnable_threads.c @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef enum { + DISABLED, + IDLE, + DOWN, + UP, +} RUNNABLES_STATE; + +static struct delayed_work runnables_work; +static struct kobject *runnables_kobject; + +/* configurable parameters */ +static unsigned int sample_rate = 20; /* msec */ + +static RUNNABLES_STATE runnables_state; +static struct workqueue_struct *runnables_wq; + +#define NR_FSHIFT_EXP 3 +#define NR_FSHIFT (1 << NR_FSHIFT_EXP) +/* avg run threads * 8 (e.g., 11 = 1.375 threads) */ +static unsigned int default_thresholds[] = { + 9, 17, 25, UINT_MAX +}; + +static unsigned int nr_run_last; +static unsigned int nr_run_hysteresis = 4; /* 1 / 4 thread */ +static unsigned int default_threshold_level = 4; /* 1 / 4 thread */ +static unsigned int nr_run_thresholds[NR_CPUS]; + +DEFINE_MUTEX(runnables_work_lock); + +static void update_runnables_state(void) +{ + unsigned int nr_cpus = num_online_cpus(); + int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; + int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); + unsigned int avg_nr_run = avg_nr_running(); + unsigned int nr_run; + + if (runnables_state == DISABLED) + return; + + for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) { + unsigned int nr_threshold = nr_run_thresholds[nr_run - 1]; + if (nr_run_last <= nr_run) + nr_threshold += NR_FSHIFT / nr_run_hysteresis; + if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT_EXP))) + break; + } + nr_run_last = nr_run; + + if ((nr_cpus > max_cpus || nr_run < nr_cpus) && nr_cpus >= min_cpus) { + runnables_state = DOWN; + } else if (nr_cpus < min_cpus || nr_run > nr_cpus) { + runnables_state = UP; + } else { + runnables_state = IDLE; + } +} + +static unsigned int get_lightest_loaded_cpu_n(void) +{ + unsigned long min_avg_runnables = ULONG_MAX; + unsigned int cpu = nr_cpu_ids; + int i; + + for_each_online_cpu(i) { + unsigned int nr_runnables = get_avg_nr_running(i); + + if (i > 0 && min_avg_runnables > nr_runnables) { + cpu = i; + min_avg_runnables = nr_runnables; + } + } + + return cpu; +} + +static void runnables_work_func(struct work_struct *work) +{ + bool up = false; + bool sample = false; + unsigned int cpu = nr_cpu_ids; + + mutex_lock(&runnables_work_lock); + + update_runnables_state(); + + switch (runnables_state) { + case DISABLED: + break; + case IDLE: + sample = true; + break; + case UP: + cpu = cpumask_next_zero(0, cpu_online_mask); + up = true; + sample = true; + break; + case DOWN: + cpu = get_lightest_loaded_cpu_n(); + sample = true; + break; + default: + pr_err("%s: invalid cpuquiet runnable governor state %d\n", + __func__, runnables_state); + break; + } + + if (sample) + queue_delayed_work(runnables_wq, &runnables_work, + msecs_to_jiffies(sample_rate)); + + if (cpu < nr_cpu_ids) { + if (up) + cpuquiet_wake_cpu(cpu); + else + cpuquiet_quiesence_cpu(cpu); + } + + mutex_unlock(&runnables_work_lock); +} + +CPQ_BASIC_ATTRIBUTE(sample_rate, 0644, uint); +CPQ_BASIC_ATTRIBUTE(nr_run_hysteresis, 0644, uint); + +static struct attribute *runnables_attributes[] = { + &sample_rate_attr.attr, + &nr_run_hysteresis_attr.attr, + NULL, +}; + +static const struct sysfs_ops runnables_sysfs_ops = { + .show = cpuquiet_auto_sysfs_show, + .store = cpuquiet_auto_sysfs_store, +}; + +static struct kobj_type ktype_runnables = { + .sysfs_ops = &runnables_sysfs_ops, + .default_attrs = runnables_attributes, +}; + +static int runnables_sysfs(void) +{ + int err; + + runnables_kobject = kzalloc(sizeof(*runnables_kobject), + GFP_KERNEL); + + if (!runnables_kobject) + return -ENOMEM; + + err = cpuquiet_kobject_init(runnables_kobject, &ktype_runnables, + "runnable_threads"); + + if (err) + kfree(runnables_kobject); + + return err; +} + +static void runnables_device_busy(void) +{ + if (runnables_state != DISABLED) { + runnables_state = DISABLED; + cancel_delayed_work_sync(&runnables_work); + } +} + +static void runnables_device_free(void) +{ + if (runnables_state == DISABLED) { + runnables_state = IDLE; + runnables_work_func(NULL); + } +} + +static void runnables_stop(void) +{ + runnables_state = DISABLED; + cancel_delayed_work_sync(&runnables_work); + destroy_workqueue(runnables_wq); + kobject_put(runnables_kobject); +} + +static int runnables_start(void) +{ + int err, i; + + err = runnables_sysfs(); + if (err) + return err; + + runnables_wq = alloc_workqueue("cpuquiet-runnables", + WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1); + if (!runnables_wq) + return -ENOMEM; + + INIT_DELAYED_WORK(&runnables_work, runnables_work_func); + + for(i = 0; i < ARRAY_SIZE(nr_run_thresholds); ++i) { + if (i < ARRAY_SIZE(default_thresholds)) + nr_run_thresholds[i] = default_thresholds[i]; + else if (i == (ARRAY_SIZE(nr_run_thresholds) - 1)) + nr_run_thresholds[i] = UINT_MAX; + else + nr_run_thresholds[i] = i + 1 + + NR_FSHIFT / default_threshold_level; + } + + runnables_state = IDLE; + runnables_work_func(NULL); + + return 0; +} + +struct cpuquiet_governor runnables_governor = { + .name = "runnable", + .start = runnables_start, + .device_free_notification = runnables_device_free, + .device_busy_notification = runnables_device_busy, + .stop = runnables_stop, + .owner = THIS_MODULE, +}; + +static int __init init_runnables(void) +{ + return cpuquiet_register_governor(&runnables_governor); +} + +static void __exit exit_runnables(void) +{ + cpuquiet_unregister_governor(&runnables_governor); +} + +MODULE_LICENSE("GPL"); +module_init(init_runnables); +module_exit(exit_runnables); diff --git a/drivers/cpuquiet/governors/userspace.c b/drivers/cpuquiet/governors/userspace.c new file mode 100644 index 00000000000..470056c5e32 --- /dev/null +++ b/drivers/cpuquiet/governors/userspace.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include + +static DEFINE_MUTEX(userspace_mutex); + +static int governor_set(unsigned int cpu, bool active) +{ + mutex_lock(&userspace_mutex); + if (active) + cpuquiet_wake_cpu(cpu); + else + cpuquiet_quiesence_cpu(cpu); + mutex_unlock(&userspace_mutex); + + return 0; +} + +struct cpuquiet_governor userspace_governor = { + .name = "userspace", + .store_active = governor_set, + .owner = THIS_MODULE, +}; + +static int __init init_usermode(void) +{ + return cpuquiet_register_governor(&userspace_governor); +} + +static void __exit exit_usermode(void) +{ + cpuquiet_unregister_governor(&userspace_governor); +} + +MODULE_LICENSE("GPL"); +module_init(init_usermode); +module_exit(exit_usermode); diff --git a/drivers/cpuquiet/sysfs.c b/drivers/cpuquiet/sysfs.c new file mode 100644 index 00000000000..0d63eee37dc --- /dev/null +++ b/drivers/cpuquiet/sysfs.c @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include + +#include "cpuquiet.h" + +struct cpuquiet_dev { + unsigned int cpu; + struct kobject kobj; +}; + +struct cpuquiet_sysfs_attr { + struct attribute attr; + ssize_t (*show)(char *); + ssize_t (*store)(const char *, size_t count); +}; + +static struct kobject *cpuquiet_global_kobject; +struct cpuquiet_dev *cpuquiet_cpu_devices[CONFIG_NR_CPUS]; + +static ssize_t show_current_governor(char *buf) +{ + ssize_t ret; + + mutex_lock(&cpuquiet_lock); + + if (cpuquiet_curr_governor) + ret = sprintf(buf, "%s\n", cpuquiet_curr_governor->name); + else + ret = sprintf(buf, "none\n"); + + mutex_unlock(&cpuquiet_lock); + + return ret; + +} + +static ssize_t store_current_governor(const char *buf, size_t count) +{ + char name[CPUQUIET_NAME_LEN]; + struct cpuquiet_governor *gov; + int len = count, ret = -EINVAL; + + if (!len || len >= sizeof(name)) + return -EINVAL; + + memcpy(name, buf, count); + name[len] = '\0'; + if (name[len - 1] == '\n') + name[--len] = '\0'; + + mutex_lock(&cpuquiet_lock); + gov = cpuquiet_find_governor(name); + mutex_unlock(&cpuquiet_lock); + + if (gov) + ret = cpuquiet_switch_governor(gov); + + if (ret) + return ret; + else + return count; +} + +static ssize_t available_governors_show(char *buf) +{ + ssize_t ret = 0, len; + struct cpuquiet_governor *gov; + + mutex_lock(&cpuquiet_lock); + if (!list_empty(&cpuquiet_governors)) { + list_for_each_entry(gov, &cpuquiet_governors, governor_list) { + len = sprintf(buf, "%s ", gov->name); + buf += len; + ret += len; + } + buf--; + *buf = '\n'; + } else + ret = sprintf(buf, "none\n"); + + mutex_unlock(&cpuquiet_lock); + + return ret; +} + +struct cpuquiet_sysfs_attr attr_current_governor = __ATTR(current_governor, + 0644, show_current_governor, store_current_governor); +struct cpuquiet_sysfs_attr attr_governors = __ATTR_RO(available_governors); + + +static struct attribute *cpuquiet_default_attrs[] = { + &attr_current_governor.attr, + &attr_governors.attr, + NULL +}; + +static ssize_t cpuquiet_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpuquiet_sysfs_attr *cattr = + container_of(attr, struct cpuquiet_sysfs_attr, attr); + + return cattr->show(buf); +} + +static ssize_t cpuquiet_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct cpuquiet_sysfs_attr *cattr = + container_of(attr, struct cpuquiet_sysfs_attr, attr); + + if (cattr->store) + return cattr->store(buf, count); + + return -EINVAL; +} + +static const struct sysfs_ops cpuquiet_sysfs_ops = { + .show = cpuquiet_sysfs_show, + .store = cpuquiet_sysfs_store, +}; + +static struct kobj_type ktype_cpuquiet_sysfs = { + .sysfs_ops = &cpuquiet_sysfs_ops, + .default_attrs = cpuquiet_default_attrs, +}; + +int cpuquiet_add_group(struct attribute_group *attrs) +{ + return sysfs_create_group(cpuquiet_global_kobject, attrs); +} + +void cpuquiet_remove_group(struct attribute_group *attrs) +{ + sysfs_remove_group(cpuquiet_global_kobject, attrs); +} + +int cpuquiet_kobject_init(struct kobject *kobj, struct kobj_type *type, + char *name) +{ + int err; + + err = kobject_init_and_add(kobj, type, cpuquiet_global_kobject, name); + if (!err) + kobject_uevent(kobj, KOBJ_ADD); + + return err; +} + +int cpuquiet_cpu_kobject_init(struct kobject *kobj, struct kobj_type *type, + char *name, int cpu) +{ + int err; + + err = kobject_init_and_add(kobj, type, &cpuquiet_cpu_devices[cpu]->kobj, + name); + if (!err) + kobject_uevent(kobj, KOBJ_ADD); + + return err; +} + +int cpuquiet_add_class_sysfs(struct sysdev_class *cls) +{ + int err; + + cpuquiet_global_kobject = kzalloc(sizeof(*cpuquiet_global_kobject), + GFP_KERNEL); + if (!cpuquiet_global_kobject) + return -ENOMEM; + + err = kobject_init_and_add(cpuquiet_global_kobject, + &ktype_cpuquiet_sysfs, &cls->kset.kobj, "cpuquiet"); + if (!err) + kobject_uevent(cpuquiet_global_kobject, KOBJ_ADD); + + return err; +} + + +struct cpuquiet_attr { + struct attribute attr; + ssize_t (*show)(unsigned int, char *); + ssize_t (*store)(unsigned int, const char *, size_t count); +}; + + +static ssize_t cpuquiet_state_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpuquiet_attr *cattr = container_of(attr, + struct cpuquiet_attr, attr); + struct cpuquiet_dev *dev = container_of(kobj, + struct cpuquiet_dev, kobj); + + return cattr->show(dev->cpu, buf); +} + +static ssize_t cpuquiet_state_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct cpuquiet_attr *cattr = container_of(attr, + struct cpuquiet_attr, attr); + struct cpuquiet_dev *dev = container_of(kobj, + struct cpuquiet_dev, kobj); + + if (cattr->store) + return cattr->store(dev->cpu, buf, count); + + return -EINVAL; +} + +static ssize_t show_active(unsigned int cpu, char *buf) +{ + return sprintf(buf, "%u\n", cpu_online(cpu)); +} + +static ssize_t store_active(unsigned int cpu, const char *value, size_t count) +{ + unsigned int active; + int ret; + + if (!cpuquiet_curr_governor->store_active) + return -EINVAL; + + ret = sscanf(value, "%u", &active); + if (ret != 1) + return -EINVAL; + + cpuquiet_curr_governor->store_active(cpu, active); + + return count; +} + +struct cpuquiet_attr attr_active = __ATTR(active, 0644, show_active, + store_active); + +static struct attribute *cpuquiet_default_cpu_attrs[] = { + &attr_active.attr, + NULL +}; + +static const struct sysfs_ops cpuquiet_cpu_sysfs_ops = { + .show = cpuquiet_state_show, + .store = cpuquiet_state_store, +}; + +static struct kobj_type ktype_cpuquiet = { + .sysfs_ops = &cpuquiet_cpu_sysfs_ops, + .default_attrs = cpuquiet_default_cpu_attrs, +}; + +void cpuquiet_add_dev(struct sys_device *sys_dev, unsigned int cpu) +{ + struct cpuquiet_dev *dev; + int err; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + dev->cpu = cpu; + cpuquiet_cpu_devices[cpu] = dev; + err = kobject_init_and_add(&dev->kobj, &ktype_cpuquiet, + &sys_dev->kobj, "cpuquiet"); + if (!err) + kobject_uevent(&dev->kobj, KOBJ_ADD); +} + +void cpuquiet_remove_dev(unsigned int cpu) +{ + if (cpu < CONFIG_NR_CPUS && cpuquiet_cpu_devices[cpu]) + kobject_put(&cpuquiet_cpu_devices[cpu]->kobj); +} diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 1d9ce6501af..1ae4a398626 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -572,9 +572,12 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) } i2c_writel(i2c_dev, status, I2C_INT_STATUS); + i2c_readl(i2c_dev, I2C_INT_STATUS); - if (i2c_dev->is_dvc) + if (i2c_dev->is_dvc) { dvc_writel(i2c_dev, DVC_STATUS_I2C_DONE_INTR, DVC_STATUS); + dvc_readl(i2c_dev, DVC_STATUS); + } /* * ensure that the writes above post prior to leaving the interrupt @@ -618,13 +621,16 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) I2C_INT_RX_FIFO_DATA_REQ | I2C_INT_TX_FIFO_OVERFLOW); i2c_writel(i2c_dev, status, I2C_INT_STATUS); + i2c_readl(i2c_dev, I2C_INT_STATUS); /* An error occured, mask dvc interrupt */ if (i2c_dev->is_dvc) dvc_i2c_mask_irq(i2c_dev, DVC_CTRL_REG3_I2C_DONE_INTR_EN); - if (i2c_dev->is_dvc) + if (i2c_dev->is_dvc) { dvc_writel(i2c_dev, DVC_STATUS_I2C_DONE_INTR, DVC_STATUS); + dvc_readl(i2c_dev, DVC_STATUS); + } /* * ensure that the writes above post prior to leaving the interrupt diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index c48c81f0308..b5993acd48b 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -40,7 +40,6 @@ int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots) dev->mtsize = num_slots; input_set_abs_params(dev, ABS_MT_SLOT, 0, num_slots - 1, 0, 0); input_set_abs_params(dev, ABS_MT_TRACKING_ID, 0, TRKID_MAX, 0, 0); - input_set_events_per_packet(dev, 6 * num_slots); /* Mark slots as 'unused' */ for (i = 0; i < num_slots; i++) diff --git a/drivers/input/input.c b/drivers/input/input.c index da38d97a51b..e7c716baead 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -47,6 +47,8 @@ static DEFINE_MUTEX(input_mutex); static struct input_handler *input_table[8]; +static const struct input_value input_value_sync = { EV_SYN, SYN_REPORT, 1 }; + static inline int is_event_supported(unsigned int code, unsigned long *bm, unsigned int max) { @@ -69,42 +71,102 @@ static int input_defuzz_abs_event(int value, int old_val, int fuzz) return value; } +static void input_start_autorepeat(struct input_dev *dev, int code) +{ + if (test_bit(EV_REP, dev->evbit) && + dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && + dev->timer.data) { + dev->repeat_key = code; + mod_timer(&dev->timer, + jiffies + msecs_to_jiffies(dev->rep[REP_DELAY])); + } +} + +static void input_stop_autorepeat(struct input_dev *dev) +{ + del_timer(&dev->timer); +} + /* * Pass event first through all filters and then, if event has not been * filtered out, through all open handles. This function is called with * dev->event_lock held and interrupts disabled. */ -static void input_pass_event(struct input_dev *dev, - unsigned int type, unsigned int code, int value) +static unsigned int input_to_handler(struct input_handle *handle, + struct input_value *vals, unsigned int count) +{ + struct input_handler *handler = handle->handler; + struct input_value *end = vals; + struct input_value *v; + + for (v = vals; v != vals + count; v++) { + if (handler->filter && + handler->filter(handle, v->type, v->code, v->value)) + continue; + if (end != v) + *end = *v; + end++; + } + + count = end - vals; + if (!count) + return 0; + + if (handler->events) + handler->events(handle, vals, count); + else if (handler->event) + for (v = vals; v != end; v++) + handler->event(handle, v->type, v->code, v->value); + + return count; +} + +/* + * Pass values first through all filters and then, if event has not been + * filtered out, through all open handles. This function is called with + * dev->event_lock held and interrupts disabled. + */ +static void input_pass_values(struct input_dev *dev, + struct input_value *vals, unsigned int count) { - struct input_handler *handler; struct input_handle *handle; + struct input_value *v; + + if (!count) + return; rcu_read_lock(); handle = rcu_dereference(dev->grab); - if (handle) - handle->handler->event(handle, type, code, value); - else { - bool filtered = false; - - list_for_each_entry_rcu(handle, &dev->h_list, d_node) { - if (!handle->open) - continue; + if (handle) { + count = input_to_handler(handle, vals, count); + } else { + list_for_each_entry_rcu(handle, &dev->h_list, d_node) + if (handle->open) + count = input_to_handler(handle, vals, count); + } - handler = handle->handler; - if (!handler->filter) { - if (filtered) - break; + rcu_read_unlock(); - handler->event(handle, type, code, value); + add_input_randomness(vals->type, vals->code, vals->value); - } else if (handler->filter(handle, type, code, value)) - filtered = true; + /* trigger auto repeat for key events */ + for (v = vals; v != vals + count; v++) { + if (v->type == EV_KEY && v->value != 2) { + if (v->value) + input_start_autorepeat(dev, v->code); + else + input_stop_autorepeat(dev); } } +} - rcu_read_unlock(); +static void input_pass_event(struct input_dev *dev, + unsigned int type, unsigned int code, int value) +{ + struct input_value vals[] = { { type, code, value } }; + + input_pass_values(dev, vals, ARRAY_SIZE(vals)); } /* @@ -121,18 +183,12 @@ static void input_repeat_key(unsigned long data) if (test_bit(dev->repeat_key, dev->key) && is_event_supported(dev->repeat_key, dev->keybit, KEY_MAX)) { + struct input_value vals[] = { + { EV_KEY, dev->repeat_key, 2 }, + input_value_sync + }; - input_pass_event(dev, EV_KEY, dev->repeat_key, 2); - - if (dev->sync) { - /* - * Only send SYN_REPORT if we are not in a middle - * of driver parsing a new hardware packet. - * Otherwise assume that the driver will send - * SYN_REPORT once it's done. - */ - input_pass_event(dev, EV_SYN, SYN_REPORT, 1); - } + input_pass_values(dev, vals, ARRAY_SIZE(vals)); if (dev->rep[REP_PERIOD]) mod_timer(&dev->timer, jiffies + @@ -142,25 +198,11 @@ static void input_repeat_key(unsigned long data) spin_unlock_irqrestore(&dev->event_lock, flags); } -static void input_start_autorepeat(struct input_dev *dev, int code) -{ - if (test_bit(EV_REP, dev->evbit) && - dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && - dev->timer.data) { - dev->repeat_key = code; - mod_timer(&dev->timer, - jiffies + msecs_to_jiffies(dev->rep[REP_DELAY])); - } -} - -static void input_stop_autorepeat(struct input_dev *dev) -{ - del_timer(&dev->timer); -} - #define INPUT_IGNORE_EVENT 0 #define INPUT_PASS_TO_HANDLERS 1 #define INPUT_PASS_TO_DEVICE 2 +#define INPUT_SLOT 4 +#define INPUT_FLUSH 8 #define INPUT_PASS_TO_ALL (INPUT_PASS_TO_HANDLERS | INPUT_PASS_TO_DEVICE) static int input_handle_abs_event(struct input_dev *dev, @@ -207,14 +249,14 @@ static int input_handle_abs_event(struct input_dev *dev, /* Flush pending "slot" event */ if (is_mt_event && dev->slot != input_abs_get_val(dev, ABS_MT_SLOT)) { input_abs_set_val(dev, ABS_MT_SLOT, dev->slot); - input_pass_event(dev, EV_ABS, ABS_MT_SLOT, dev->slot); + return INPUT_PASS_TO_HANDLERS | INPUT_SLOT; } return INPUT_PASS_TO_HANDLERS; } -static void input_handle_event(struct input_dev *dev, - unsigned int type, unsigned int code, int value) +static int input_get_disposition(struct input_dev *dev, + unsigned int type, unsigned int code, int value) { int disposition = INPUT_IGNORE_EVENT; @@ -227,13 +269,9 @@ static void input_handle_event(struct input_dev *dev, break; case SYN_REPORT: - if (!dev->sync) { - dev->sync = true; - disposition = INPUT_PASS_TO_HANDLERS; - } + disposition = INPUT_PASS_TO_HANDLERS | INPUT_FLUSH; break; case SYN_MT_REPORT: - dev->sync = false; disposition = INPUT_PASS_TO_HANDLERS; break; } @@ -317,14 +355,48 @@ static void input_handle_event(struct input_dev *dev, break; } - if (disposition != INPUT_IGNORE_EVENT && type != EV_SYN) - dev->sync = false; + return disposition; +} + +static void input_handle_event(struct input_dev *dev, + unsigned int type, unsigned int code, int value) +{ + int disposition; + + disposition = input_get_disposition(dev, type, code, value); if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event) dev->event(dev, type, code, value); - if (disposition & INPUT_PASS_TO_HANDLERS) - input_pass_event(dev, type, code, value); + if (!dev->vals) + return; + + if (disposition & INPUT_PASS_TO_HANDLERS) { + struct input_value *v; + + if (disposition & INPUT_SLOT) { + v = &dev->vals[dev->num_vals++]; + v->type = EV_ABS; + v->code = ABS_MT_SLOT; + v->value = dev->slot; + } + + v = &dev->vals[dev->num_vals++]; + v->type = type; + v->code = code; + v->value = value; + } + + if (disposition & INPUT_FLUSH) { + if (dev->num_vals >= 2) + input_pass_values(dev, dev->vals, dev->num_vals); + dev->num_vals = 0; + } else if (dev->num_vals >= dev->max_vals - 2) { + dev->vals[dev->num_vals++] = input_value_sync; + input_pass_values(dev, dev->vals, dev->num_vals); + dev->num_vals = 0; + } + } /** @@ -352,7 +424,6 @@ void input_event(struct input_dev *dev, if (is_event_supported(type, dev->evbit, EV_MAX)) { spin_lock_irqsave(&dev->event_lock, flags); - add_input_randomness(type, code, value); input_handle_event(dev, type, code, value); spin_unlock_irqrestore(&dev->event_lock, flags); } @@ -831,10 +902,12 @@ int input_set_keycode(struct input_dev *dev, if (test_bit(EV_KEY, dev->evbit) && !is_event_supported(old_keycode, dev->keybit, KEY_MAX) && __test_and_clear_bit(old_keycode, dev->key)) { + struct input_value vals[] = { + { EV_KEY, old_keycode, 0 }, + input_value_sync + }; - input_pass_event(dev, EV_KEY, old_keycode, 0); - if (dev->sync) - input_pass_event(dev, EV_SYN, SYN_REPORT, 1); + input_pass_values(dev, vals, ARRAY_SIZE(vals)); } out: @@ -1416,6 +1489,7 @@ static void input_dev_release(struct device *device) input_ff_destroy(dev); input_mt_destroy_slots(dev); kfree(dev->absinfo); + kfree(dev->vals); kfree(dev); module_put(THIS_MODULE); @@ -1778,6 +1852,9 @@ static unsigned int input_estimate_events_per_packet(struct input_dev *dev) if (test_bit(i, dev->relbit)) events++; + /* Make room for KEY and MSC events */ + events += 7; + return events; } @@ -1816,6 +1893,7 @@ int input_register_device(struct input_dev *dev) { static atomic_t input_no = ATOMIC_INIT(0); struct input_handler *handler; + unsigned int packet_size; const char *path; int error; @@ -1828,9 +1906,14 @@ int input_register_device(struct input_dev *dev) /* Make sure that bitmasks not mentioned in dev->evbit are clean. */ input_cleanse_bitmasks(dev); - if (!dev->hint_events_per_packet) - dev->hint_events_per_packet = - input_estimate_events_per_packet(dev); + packet_size = input_estimate_events_per_packet(dev); + if (dev->hint_events_per_packet < packet_size) + dev->hint_events_per_packet = packet_size; + + dev->max_vals = max(dev->hint_events_per_packet, packet_size) + 2; + dev->vals = kcalloc(dev->max_vals, sizeof(*dev->vals), GFP_KERNEL); + if (!dev->vals) + return -ENOMEM; /* * If delay and period are pre-set by the driver, then autorepeating diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 69319792e98..b1d82022917 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -885,3 +885,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Phil Blundell "); MODULE_DESCRIPTION("Keyboard driver for GPIOs"); MODULE_ALIAS("platform:gpio-keys"); + diff --git a/drivers/input/lid.c b/drivers/input/lid.c index 1d27609c187..26e3d6490e0 100644 --- a/drivers/input/lid.c +++ b/drivers/input/lid.c @@ -163,7 +163,9 @@ static int __init lid_init(void) return -ENOMEM; } - sysfs_create_group((struct kobject*)&lid_dev->dev.kobj, &lid_attr_group); + err_code = sysfs_create_group((struct kobject*)&lid_dev->dev.kobj, &lid_attr_group); + if (err_code != 0) + return err_code; err_code = lid_input_device_create(); if(err_code != 0) diff --git a/drivers/input/proximity/cap1106.c b/drivers/input/proximity/cap1106.c index c78299616e8..570a18fac7d 100644 --- a/drivers/input/proximity/cap1106.c +++ b/drivers/input/proximity/cap1106.c @@ -72,6 +72,7 @@ static int prev_c6_status = 0; static int c2_acc_cnt = 0; static int c6_acc_cnt = 0; static int acc_limit = 10; +static int force_enable = 1; /*---------------------------------------------------------------------------- ** FUNCTION DECLARATION @@ -472,6 +473,7 @@ static ssize_t store_sensor_onoff(struct device *dev, struct device_attribute *a return -EINVAL; mutex_lock(&prox_mtx); + force_enable = enable; cap1106_enable_sensor(client, enable); mutex_unlock(&prox_mtx); @@ -892,7 +894,8 @@ static int cap1106_resume(struct i2c_client *client) { PROX_DEBUG("+\n"); mutex_lock(&prox_mtx); - cap1106_enable_sensor(client, 1); + if (force_enable) + cap1106_enable_sensor(client, 1); mutex_unlock(&prox_mtx); PROX_DEBUG("-\n"); return 0; diff --git a/drivers/input/touchscreen/rmi4/rmi_f09.c b/drivers/input/touchscreen/rmi4/rmi_f09.c index 0ec980d7db0..1c93451ca17 100644 --- a/drivers/input/touchscreen/rmi4/rmi_f09.c +++ b/drivers/input/touchscreen/rmi4/rmi_f09.c @@ -107,7 +107,7 @@ static struct device_attribute attrs[] = { __ATTR(HostTestEn, RMI_RW_ATTR, rmi_f09_HostTestEn_show, rmi_f09_HostTestEn_store), __ATTR(InternalLimits, RMI_RO_ATTR, - rmi_f09_Limit_Register_Count_show, rmi_store_error), + rmi_f09_InternalLimits_show, rmi_store_error), __ATTR(Result_Register_Count, RMI_RO_ATTR, rmi_f09_Result_Register_Count_show, rmi_store_error), }; @@ -169,11 +169,6 @@ static int rmi_f09_init(struct rmi_function_container *fc) static void rmi_f09_remove(struct rmi_function_container *fc) { - struct rmi_fn_09_data *data = fc->data; - if (data) { - kfree(data->query.Limit_Register_Count); - kfree(data->query.f09_bist_query1); - } kfree(fc->data); } diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index f4d859fca7f..5f020f6a7d7 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -149,7 +149,7 @@ #define SMMU_ADDR_TO_PFN(addr) ((addr) >> 12) #define SMMU_ADDR_TO_PDN(addr) ((addr) >> 22) -#define SMMU_PDN_TO_ADDR(addr) ((pdn) << 22) +#define SMMU_PDN_TO_ADDR(pdn) ((pdn) << 22) #define _READABLE (1 << SMMU_PTB_DATA_ASID_READABLE_SHIFT) #define _WRITABLE (1 << SMMU_PTB_DATA_ASID_WRITABLE_SHIFT) diff --git a/drivers/media/video/cx18/cx18-mailbox.h b/drivers/media/video/cx18/cx18-mailbox.h index 05fe6bdbe06..b63fdfaac49 100644 --- a/drivers/media/video/cx18/cx18-mailbox.h +++ b/drivers/media/video/cx18/cx18-mailbox.h @@ -69,7 +69,7 @@ struct cx18_mailbox { /* Each command can have up to 6 arguments */ u32 args[MAX_MB_ARGUMENTS]; /* The return code can be one of the codes in the file cx23418.h. If the - command is completed successfuly, the error will be ERR_SYS_SUCCESS. + command is completed successfully, the error will be ERR_SYS_SUCCESS. If it is pending, the code is ERR_SYS_PENDING. If it failed, the error code would indicate the task from which the error originated and will be one of the errors in cx23418.h. In that case, the following diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 7c26f45f609..ffb00c2a8a1 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -59,3 +59,4 @@ obj-$(CONFIG_BCM4330_RFKILL) += bcm4330_rfkill.o obj-$(CONFIG_TEGRA_CRYPTO_DEV) += tegra-cryptodev.o obj-$(CONFIG_TEGRA_BB_SUPPORT) += tegra-baseband/ obj-$(CONFIG_MAX1749_VIBRATOR) += max1749.o +obj-$(CONFIG_FSYNC_CONTROL) += fsync_control.o \ No newline at end of file diff --git a/drivers/misc/nct1008.c b/drivers/misc/nct1008.c index 735df920055..98f2abfed15 100755 --- a/drivers/misc/nct1008.c +++ b/drivers/misc/nct1008.c @@ -129,7 +129,10 @@ static int nct1008_get_temp(struct device *dev, long *pTemp) /* Return max between Local and External Temp */ *pTemp = max(temp_local_milli, temp_ext_milli); - printk("%s: ret temp=%dC \n", __func__, MILLICELSIUS_TO_CELSIUS(*pTemp)); + + /* Only log when temp is getting closer to Dynamic EDP limit */ + if (MILLICELSIUS_TO_CELSIUS(*pTemp) > 56) + pr_info("%s: ret temp=%dC \n", __func__, MILLICELSIUS_TO_CELSIUS(*pTemp)); return 0; error: dev_err(&client->dev, "\n error in file=: %s %s() line=%d: " diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 691e74d9d42..66b621a4c84 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -697,17 +697,6 @@ static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) unsigned int from, nr, arg; int err = 0; - /* - * The Nexus 7 ships with several emmc chips. The ext4 discard - * mount option is required to prevent performance issues on - * one chip, but hurts performance on others. However, if this - * is a secure erase request, we want this to work on all chips, - * as this is used in factory wipe. So this test will enable the - * discard option for the one chip, and secure erase for all chips. - */ - if (!(req->cmd_flags & REQ_SECURE) && !(card->cid.manfid == 0x15)) - goto out; - if (!mmc_can_erase(card)) { err = -EOPNOTSUPP; goto out; diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 8c87096531e..16e7aadde0c 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -57,6 +57,17 @@ config MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER This is the case for the Freescale eSDHC and Nintendo Wii SDHCI. +config MMC_SDHCI_NATIVE_BLOCKSIZE + bool "Use Host Controller's actual blocksize" + depends on MMC_SDHCI && EXPERIMENTAL + default n + help + Normally, the SDHCI host controller will limit the xfer blocksize + to 512 bytes, even if the hardware host controller can do more. + This option uses the maximum blocksize supported by the hardware. + + I have seen no corruption on a Tegra2, but if unsure, say N. + config MMC_SDHCI_PCI tristate "SDHCI support on PCI bus" depends on MMC_SDHCI && PCI diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 8f450fafbf3..e95f9d852ae 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -111,9 +111,13 @@ struct tegra_sdhci_host { unsigned int vddio_max_uv; /* max clk supported by the platform */ unsigned int max_clk_limit; + /* max ddr clk supported by the platform */ + unsigned int ddr_clk_limit; struct tegra_io_dpd *dpd; bool card_present; bool is_rail_enabled; + struct clk *emc_clk; + unsigned int emc_max_clk; }; static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg) @@ -363,6 +367,7 @@ static void tegra_sdhci_set_clk_rate(struct sdhci_host *sdhci, struct sdhci_pltfm_host *pltfm_host = sdhci_priv(sdhci); struct tegra_sdhci_host *tegra_host = pltfm_host->priv; unsigned int clk_rate; + unsigned int emc_clk; if (sdhci->mmc->card && mmc_card_ddr_mode(sdhci->mmc->card)) { @@ -370,7 +375,16 @@ static void tegra_sdhci_set_clk_rate(struct sdhci_host *sdhci, * In ddr mode, tegra sdmmc controller clock frequency * should be double the card clock frequency. */ - clk_rate = clock * 2; + if (tegra_host->ddr_clk_limit) { + clk_rate = tegra_host->ddr_clk_limit * 2; + if (tegra_host->emc_clk) { + emc_clk = clk_get_rate(tegra_host->emc_clk); + if (emc_clk == tegra_host->emc_max_clk) + clk_rate = clock * 2; + } + } else { + clk_rate = clock * 2; + } } else { if (clock <= tegra_sdhost_min_freq) clk_rate = tegra_sdhost_min_freq; @@ -1109,10 +1123,23 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev) rc = clk_enable(clk); if (rc != 0) goto err_clk_put; + + if (!strcmp(dev_name(mmc_dev(host->mmc)), "sdhci-tegra.3")) { + tegra_host->emc_clk = clk_get(mmc_dev(host->mmc), "emc"); + if (IS_ERR(tegra_host->emc_clk)) { + dev_err(mmc_dev(host->mmc), "clk err\n"); + rc = PTR_ERR(tegra_host->emc_clk); + goto err_clk_put; + } + tegra_host->emc_max_clk = + clk_round_rate(tegra_host->emc_clk, ULONG_MAX); + } + pltfm_host->clk = clk; pltfm_host->priv = tegra_host; tegra_host->clk_enabled = true; tegra_host->max_clk_limit = plat->max_clk_limit; + tegra_host->ddr_clk_limit = plat->ddr_clk_limit; tegra_host->instance = pdev->id; tegra_host->dpd = tegra_io_dpd_get(mmc_dev(host->mmc)); @@ -1152,6 +1179,7 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev) return 0; err_add_host: + clk_put(tegra_host->emc_clk); clk_disable(pltfm_host->clk); err_clk_put: clk_put(pltfm_host->clk); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 3163d8373d7..a767234d0d0 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2812,11 +2812,13 @@ int sdhci_add_host(struct sdhci_host *host) } else { mmc->max_blk_size = (caps[0] & SDHCI_MAX_BLOCK_MASK) >> SDHCI_MAX_BLOCK_SHIFT; +#ifndef CONFIG_MMC_SDHCI_NATIVE_BLOCKSIZE if (mmc->max_blk_size >= 3) { printk(KERN_WARNING "%s: Invalid maximum block size, " "assuming 512 bytes\n", mmc_hostname(mmc)); mmc->max_blk_size = 0; } +#endif } mmc->max_blk_size = 512 << mmc->max_blk_size; @@ -2826,6 +2828,12 @@ int sdhci_add_host(struct sdhci_host *host) */ mmc->max_blk_count = (host->quirks & SDHCI_QUIRK_NO_MULTIBLOCK) ? 1 : 65535; +#ifdef CONFIG_MMC_SDHCI_NATIVE_BLOCKSIZE + printk(KERN_INFO "%s: mss %u mrs %u mbs %u mbc %u\n", mmc_hostname(mmc), + mmc->max_seg_size, mmc->max_req_size, mmc->max_blk_size, + mmc->max_blk_count); +#endif + /* * Init tasklets. */ diff --git a/drivers/net/igb/e1000_mbx.c b/drivers/net/igb/e1000_mbx.c index 74f2f11ac29..469d95eaa15 100644 --- a/drivers/net/igb/e1000_mbx.c +++ b/drivers/net/igb/e1000_mbx.c @@ -34,7 +34,7 @@ * @size: Length of buffer * @mbx_id: id of mailbox to read * - * returns SUCCESS if it successfuly read message from buffer + * returns SUCCESS if it successfully read message from buffer **/ s32 igb_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) { diff --git a/drivers/net/igbvf/mbx.c b/drivers/net/igbvf/mbx.c index 3d6f4cc3998..048aae248d0 100644 --- a/drivers/net/igbvf/mbx.c +++ b/drivers/net/igbvf/mbx.c @@ -288,7 +288,7 @@ static s32 e1000_write_mbx_vf(struct e1000_hw *hw, u32 *msg, u16 size) * @msg: The message buffer * @size: Length of buffer * - * returns SUCCESS if it successfuly read message from buffer + * returns SUCCESS if it successfully read message from buffer **/ static s32 e1000_read_mbx_vf(struct e1000_hw *hw, u32 *msg, u16 size) { diff --git a/drivers/net/ixgbe/ixgbe_mbx.c b/drivers/net/ixgbe/ixgbe_mbx.c index 1ff0eefcfd0..3f725d48336 100644 --- a/drivers/net/ixgbe/ixgbe_mbx.c +++ b/drivers/net/ixgbe/ixgbe_mbx.c @@ -38,7 +38,7 @@ * @size: Length of buffer * @mbx_id: id of mailbox to read * - * returns SUCCESS if it successfuly read message from buffer + * returns SUCCESS if it successfully read message from buffer **/ s32 ixgbe_read_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id) { diff --git a/drivers/net/ixgbevf/mbx.c b/drivers/net/ixgbevf/mbx.c index 7a883312577..930fa83f256 100644 --- a/drivers/net/ixgbevf/mbx.c +++ b/drivers/net/ixgbevf/mbx.c @@ -276,7 +276,7 @@ static s32 ixgbevf_write_mbx_vf(struct ixgbe_hw *hw, u32 *msg, u16 size) * @msg: The message buffer * @size: Length of buffer * - * returns 0 if it successfuly read message from buffer + * returns 0 if it successfully read message from buffer **/ static s32 ixgbevf_read_mbx_vf(struct ixgbe_hw *hw, u32 *msg, u16 size) { diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index d84c4224dd1..e8be47d6d7d 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -553,7 +553,7 @@ static int bcm5481_config_aneg(struct phy_device *phydev) /* * There is no BCM5481 specification available, so down * here is everything we know about "register 0x18". This - * at least helps BCM5481 to successfuly receive packets + * at least helps BCM5481 to successfully receive packets * on MPC8360E-RDK board. Peter Barada * says: "This sets delay between the RXD and RXC signals * instead of using trace lengths to achieve timing". diff --git a/drivers/net/tile/tilepro.c b/drivers/net/tile/tilepro.c index 1e2af96fc29..7b46e75deb5 100644 --- a/drivers/net/tile/tilepro.c +++ b/drivers/net/tile/tilepro.c @@ -177,7 +177,7 @@ struct tile_net_cpu { struct tile_net_stats_t stats; /* True iff NAPI is enabled. */ bool napi_enabled; - /* True if this tile has succcessfully registered with the IPP. */ + /* True if this tile has successfully registered with the IPP. */ bool registered; /* True if the link was down last time we tried to register. */ bool link_down; diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 1d93133e9b7..a90faaba5a7 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -45,6 +45,12 @@ #define DM_MCAST_ADDR 0x16 /* 8 bytes */ #define DM_GPR_CTRL 0x1e #define DM_GPR_DATA 0x1f +#define DM_CHIP_ID 0x2c +#define DM_MODE_CTRL 0x91 /* only on dm9620 */ + +/* chip id values */ +#define ID_DM9601 0 +#define ID_DM9620 1 #define DM_MAX_MCAST 64 #define DM_MCAST_SIZE 8 @@ -432,7 +438,8 @@ static const struct net_device_ops dm9601_netdev_ops = { .ndo_set_mac_address = dm9601_set_mac_address, }; -static int dm9601_bind(struct usbnet *dev, struct usb_interface *intf) +static int dm9601_bind_common( + struct usbnet *dev, struct usb_interface *intf, int dev_type) { int ret; u8 mac[ETH_ALEN]; @@ -476,6 +483,18 @@ static int dm9601_bind(struct usbnet *dev, struct usb_interface *intf) __dm9601_set_mac_address(dev); } + /* put dm9620 devices in dm9601 mode */ + if (dev_type == ID_DM9620) { + u8 mode; + + if (dm_read_reg(dev, DM_MODE_CTRL, &mode) < 0) { + netdev_err(dev->net, "Error reading MODE_CTRL\n"); + ret = -ENODEV; + goto out; + } + dm_write_reg(dev, DM_MODE_CTRL, mode & 0x7f); + } + /* power up phy */ dm_write_reg(dev, DM_GPR_CTRL, 1); dm_write_reg(dev, DM_GPR_DATA, 0); @@ -492,6 +511,16 @@ static int dm9601_bind(struct usbnet *dev, struct usb_interface *intf) return ret; } +static int dm9601_bind(struct usbnet *dev, struct usb_interface *intf) +{ + return dm9601_bind_common(dev, intf, ID_DM9601); +} + +static int dm9620_bind(struct usbnet *dev, struct usb_interface *intf) +{ + return dm9601_bind_common(dev, intf, ID_DM9620); +} + static int dm9601_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { u8 status; @@ -621,6 +650,17 @@ static const struct driver_info dm9601_info = { .reset = dm9601_link_reset, }; +static const struct driver_info dm9620_info = { + .description = "Davicom DM9620 USB Ethernet", + .flags = FLAG_ETHER | FLAG_LINK_INTR, + .bind = dm9620_bind, + .rx_fixup = dm9601_rx_fixup, + .tx_fixup = dm9601_tx_fixup, + .status = dm9601_status, + .link_reset = dm9601_link_reset, + .reset = dm9601_link_reset, +}; + static const struct usb_device_id products[] = { { USB_DEVICE(0x07aa, 0x9601), /* Corega FEther USB-TXC */ @@ -658,13 +698,25 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0a46, 0x9000), /* DM9000E */ .driver_info = (unsigned long)&dm9601_info, }, + { + USB_DEVICE(0x0a46, 0x9620), /* DM9620 USB to Fast Ethernet Adapter */ + .driver_info = (unsigned long)&dm9620_info, + }, + { + USB_DEVICE(0x0a46, 0x9621), /* DM9621 USB to Fast Ethernet Adapter */ + .driver_info = (unsigned long)&dm9620_info, + }, + { + USB_DEVICE(0x0a46, 0x9622), /* DM9622 USB to Fast Ethernet Adapter */ + .driver_info = (unsigned long)&dm9620_info, + }, {}, // END }; MODULE_DEVICE_TABLE(usb, products); static struct usb_driver dm9601_driver = { - .name = "dm9601", + .name = "dm9601-962X", .id_table = products, .probe = usbnet_probe, .disconnect = usbnet_disconnect, diff --git a/drivers/net/wireless/bcmdhd/Makefile b/drivers/net/wireless/bcmdhd/Makefile index 44aaa65bc27..40816c4ac57 100644 --- a/drivers/net/wireless/bcmdhd/Makefile +++ b/drivers/net/wireless/bcmdhd/Makefile @@ -8,7 +8,7 @@ DHDCFLAGS = -Wall -Wstrict-prototypes -Dlinux -DBCMDRIVER \ -DNEW_COMPAT_WIRELESS -DWIFI_ACT_FRAME -DARP_OFFLOAD_SUPPORT \ -DKEEP_ALIVE -DCSCAN -DGET_CUSTOM_MAC_ENABLE -DPKT_FILTER_SUPPORT \ -DEMBEDDED_PLATFORM -DENABLE_INSMOD_NO_FW_LOAD -DPNO_SUPPORT \ - -DSET_RANDOM_MAC_SOFTAP -DWL_CFG80211_STA_EVENT \ + -DSET_RANDOM_MAC_SOFTAP -DWL_CFG80211_STA_EVENT -DSUPPORT_PM2_ONLY \ -Idrivers/net/wireless/bcmdhd -Idrivers/net/wireless/bcmdhd/include DHDOFILES = aiutils.o bcmsdh_sdmmc_linux.o dhd_linux.o siutils.o bcmutils.o \ diff --git a/drivers/net/wireless/bcmdhd/dhd.h b/drivers/net/wireless/bcmdhd/dhd.h index 8426949a640..c50afa5a125 100755 --- a/drivers/net/wireless/bcmdhd/dhd.h +++ b/drivers/net/wireless/bcmdhd/dhd.h @@ -24,7 +24,7 @@ * software in any way with any other Broadcom software provided under a license * other than the GPL, without Broadcom's express prior written consent. * - * $Id: dhd.h 344123 2012-07-11 09:33:49Z $ + * $Id: dhd.h 357954 2012-09-20 18:22:31Z $ */ /**************** @@ -308,6 +308,8 @@ extern int dhd_os_wake_unlock(dhd_pub_t *pub); extern int dhd_os_wake_lock_timeout(dhd_pub_t *pub); extern int dhd_os_wake_lock_rx_timeout_enable(dhd_pub_t *pub, int val); extern int dhd_os_wake_lock_ctrl_timeout_enable(dhd_pub_t *pub, int val); +extern int dhd_os_wd_wake_lock(dhd_pub_t *pub); +extern int dhd_os_wd_wake_unlock(dhd_pub_t *pub); inline static void MUTEX_LOCK_SOFTAP_SET_INIT(dhd_pub_t * dhdp) { @@ -330,8 +332,10 @@ inline static void MUTEX_UNLOCK_SOFTAP_SET(dhd_pub_t * dhdp) #endif /* (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)) */ } -#define DHD_OS_WAKE_LOCK(pub) dhd_os_wake_lock(pub) -#define DHD_OS_WAKE_UNLOCK(pub) dhd_os_wake_unlock(pub) +#define DHD_OS_WAKE_LOCK(pub) dhd_os_wake_lock(pub) +#define DHD_OS_WAKE_UNLOCK(pub) dhd_os_wake_unlock(pub) +#define DHD_OS_WD_WAKE_LOCK(pub) dhd_os_wd_wake_lock(pub) +#define DHD_OS_WD_WAKE_UNLOCK(pub) dhd_os_wd_wake_unlock(pub) #define DHD_OS_WAKE_LOCK_TIMEOUT(pub) dhd_os_wake_lock_timeout(pub) #define DHD_OS_WAKE_LOCK_RX_TIMEOUT_ENABLE(pub, val) dhd_os_wake_lock_rx_timeout_enable(pub, val) #define DHD_OS_WAKE_LOCK_CTRL_TIMEOUT_ENABLE(pub, val) dhd_os_wake_lock_ctrl_timeout_enable(pub, val) @@ -614,9 +618,14 @@ extern uint dhd_pktgen_len; #define MAX_PKTGEN_LEN 1800 #endif +/* hooks for custom glom setting option via Makefile */ +#define DEFAULT_GLOM_VALUE -1 +#ifndef CUSTOM_GLOM_SETTING +#define CUSTOM_GLOM_SETTING DEFAULT_GLOM_VALUE +#endif /* hooks for custom Roaming Trigger setting via Makefile */ -#define DEFAULT_ROAM_TRIGGER_VALUE -75 /* dBm default roam trigger all band */ +#define DEFAULT_ROAM_TRIGGER_VALUE -65 /* dBm default roam trigger all band */ #define DEFAULT_ROAM_TRIGGER_SETTING -1 #ifndef CUSTOM_ROAM_TRIGGER_SETTING #define CUSTOM_ROAM_TRIGGER_SETTING DEFAULT_ROAM_TRIGGER_VALUE diff --git a/drivers/net/wireless/bcmdhd/dhd_common.c b/drivers/net/wireless/bcmdhd/dhd_common.c index d5af27f40b7..d46864c3a2a 100644 --- a/drivers/net/wireless/bcmdhd/dhd_common.c +++ b/drivers/net/wireless/bcmdhd/dhd_common.c @@ -21,7 +21,7 @@ * software in any way with any other Broadcom software provided under a license * other than the GPL, without Broadcom's express prior written consent. * - * $Id: dhd_common.c 331276 2012-05-04 08:05:57Z $ + * $Id: dhd_common.c 380760 2013-01-23 21:59:27Z $ */ #include #include @@ -1767,14 +1767,11 @@ bool dhd_is_associated(dhd_pub_t *dhd, void *bss_buf, int *retval) int dhd_get_dtim_skip(dhd_pub_t *dhd) { - int bcn_li_dtim; + int bcn_li_dtim = 1; + char buf[128]; int ret = -1; int dtim_assoc = 0; - - if ((dhd->dtim_skip == 0) || (dhd->dtim_skip == 1)) - bcn_li_dtim = 3; - else - bcn_li_dtim = dhd->dtim_skip; + int ap_beacon = 0; /* Check if associated */ if (dhd_is_associated(dhd, NULL, NULL) == FALSE) { @@ -1782,15 +1779,34 @@ dhd_get_dtim_skip(dhd_pub_t *dhd) goto exit; } - /* if assoc grab ap's dtim value */ - if ((ret = dhd_wl_ioctl_cmd(dhd, WLC_GET_DTIMPRD, - &dtim_assoc, sizeof(dtim_assoc), FALSE, 0)) < 0) { + /* read AP beacon if do nother if APs Beacon more that 100msec */ + bcm_mkiovar("bi_assoc", 0, 0, buf, sizeof(buf)); + if ((ret = dhd_wl_ioctl_cmd(dhd, WLC_GET_VAR, buf, sizeof(buf), FALSE, 0)) < 0) { + DHD_ERROR(("%s failed code %d\n", __FUNCTION__, ret)); + goto exit; + } + + ap_beacon = dtoh32(*(int *)buf); + + /* if APs Beacon more that 100msec do no dtim skip */ + if (ap_beacon > 100) { + DHD_ERROR(("%s no dtim skip for AP with %d beacon\n", __FUNCTION__, ap_beacon)); + goto exit; + } + + + /* Read DTIM value if associated */ + memset(buf, 0, sizeof(buf)); + bcm_mkiovar("dtim_assoc", 0, 0, buf, sizeof(buf)); + if ((ret = dhd_wl_ioctl_cmd(dhd, WLC_GET_VAR, buf, sizeof(buf), FALSE, 0)) < 0) { DHD_ERROR(("%s failed code %d\n", __FUNCTION__, ret)); goto exit; } - DHD_ERROR(("%s bcn_li_dtim=%d DTIM=%d Listen=%d\n", - __FUNCTION__, bcn_li_dtim, dtim_assoc, LISTEN_INTERVAL)); + dtim_assoc = dtoh32(*(int *)buf); + + DHD_ERROR(("%s beacom=%d msec bcn_li_dtim=%d DTIM=%d Listen=%d\n", + __FUNCTION__, ap_beacon, bcn_li_dtim, dtim_assoc, LISTEN_INTERVAL)); /* if not assocated just eixt */ if (dtim_assoc == 0) { @@ -1800,12 +1816,16 @@ dhd_get_dtim_skip(dhd_pub_t *dhd) /* check if sta listen interval fits into AP dtim */ if (dtim_assoc > LISTEN_INTERVAL) { /* AP DTIM to big for our Listen Interval : no dtim skiping */ - bcn_li_dtim = 1; DHD_ERROR(("%s DTIM=%d > Listen=%d : too big ...\n", __FUNCTION__, dtim_assoc, LISTEN_INTERVAL)); goto exit; } + if ((dhd->dtim_skip == 0) || (dhd->dtim_skip == 1)) + bcn_li_dtim = 3; + else + bcn_li_dtim = dhd->dtim_skip; + if ((bcn_li_dtim * dtim_assoc) > LISTEN_INTERVAL) { /* Round up dtim_skip to fit into STAs Listen Interval */ bcn_li_dtim = (int)(LISTEN_INTERVAL / dtim_assoc); diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index c2f4d33e470..b26e2419699 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -261,6 +261,7 @@ typedef struct dhd_info { struct wake_lock wl_wifi; /* Wifi wakelock */ struct wake_lock wl_rxwake; /* Wifi rx wakelock */ struct wake_lock wl_ctrlwake; /* Wifi ctrl wakelock */ + struct wake_lock wl_wdwake; /* Wifi wd wakelock */ #endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)) @@ -272,6 +273,7 @@ typedef struct dhd_info { #endif spinlock_t wakelock_spinlock; int wakelock_counter; + int wakelock_wd_counter; int wakelock_rx_timeout_enable; int wakelock_ctrl_timeout_enable; @@ -633,7 +635,9 @@ dhd_dynamic_dtim_skip_release(dhd_pub_t *dhdp) static int dhd_set_suspend(int value, dhd_pub_t *dhd) { +#if !defined(SUPPORT_PM2_ONLY) int power_mode = PM_MAX; +#endif /* wl_pkt_filter_enable_t enable_parm; */ char iovbuf[32]; int bcn_li_dtim = 3; @@ -649,8 +653,10 @@ static int dhd_set_suspend(int value, dhd_pub_t *dhd) /* Kernel suspended */ DHD_ERROR(("%s: force extra Suspend setting\n", __FUNCTION__)); +#if !defined(SUPPORT_PM2_ONLY) dhd_wl_ioctl_cmd(dhd, WLC_SET_PM, (char *)&power_mode, sizeof(power_mode), TRUE, 0); +#endif /* Enable packet filter, only allow unicast packet to send up */ dhd_set_packet_filter(1, dhd); @@ -677,9 +683,11 @@ static int dhd_set_suspend(int value, dhd_pub_t *dhd) /* Kernel resumed */ DHD_ERROR(("%s: Remove extra suspend setting\n", __FUNCTION__)); +#if !defined(SUPPORT_PM2_ONLY) power_mode = PM_FAST; dhd_wl_ioctl_cmd(dhd, WLC_SET_PM, (char *)&power_mode, sizeof(power_mode), TRUE, 0); +#endif /* disable pkt filter */ dhd_set_packet_filter(0, dhd); @@ -1637,14 +1645,14 @@ dhd_rx_frame(dhd_pub_t *dhdp, int ifidx, void *pktbuf, int numpkt, uint8 chan) wl_event_to_host_order(&event); if (!tout_ctrl) tout_ctrl = DHD_PACKET_TIMEOUT_MS; - if (event.event_type == WLC_E_BTA_HCI_EVENT) { - dhd_bta_doevt(dhdp, data, event.datalen); - } #ifdef PNO_SUPPORT if (event.event_type == WLC_E_PFN_NET_FOUND) { - tout_ctrl *= 2; + tout_ctrl = 7 * DHD_PACKET_TIMEOUT_MS; } #endif /* PNO_SUPPORT */ + if (event.event_type == WLC_E_BTA_HCI_EVENT) { + dhd_bta_doevt(dhdp, data, event.datalen); + } } else { tout_rx = DHD_PACKET_TIMEOUT_MS; } @@ -1809,7 +1817,6 @@ dhd_watchdog_thread(void *data) dhd_os_spin_unlock(&dhd->pub, flags); } dhd_os_sdunlock(&dhd->pub); - DHD_OS_WAKE_UNLOCK(&dhd->pub); } else { break; } @@ -1823,9 +1830,7 @@ static void dhd_watchdog(ulong data) dhd_info_t *dhd = (dhd_info_t *)data; unsigned long flags; - DHD_OS_WAKE_LOCK(&dhd->pub); if (dhd->pub.dongle_reset) { - DHD_OS_WAKE_UNLOCK(&dhd->pub); return; } @@ -1849,7 +1854,6 @@ static void dhd_watchdog(ulong data) mod_timer(&dhd->timer, jiffies + msecs_to_jiffies(dhd_watchdog_ms)); dhd_os_spin_unlock(&dhd->pub, flags); dhd_os_sdunlock(&dhd->pub); - DHD_OS_WAKE_UNLOCK(&dhd->pub); } #ifdef DHDTHREAD @@ -2793,12 +2797,14 @@ dhd_attach(osl_t *osh, struct dhd_bus *bus, uint bus_hdrlen) /* Initialize Wakelock stuff */ spin_lock_init(&dhd->wakelock_spinlock); dhd->wakelock_counter = 0; + dhd->wakelock_wd_counter = 0; dhd->wakelock_rx_timeout_enable = 0; dhd->wakelock_ctrl_timeout_enable = 0; #ifdef CONFIG_HAS_WAKELOCK wake_lock_init(&dhd->wl_wifi, WAKE_LOCK_SUSPEND, "wlan_wake"); wake_lock_init(&dhd->wl_rxwake, WAKE_LOCK_SUSPEND, "wlan_rx_wake"); wake_lock_init(&dhd->wl_ctrlwake, WAKE_LOCK_SUSPEND, "wlan_ctrl_wake"); + wake_lock_init(&dhd->wl_wdwake, WAKE_LOCK_SUSPEND, "wlan_wd_wake"); #endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)) mutex_init(&dhd->dhd_net_if_mutex); @@ -2991,12 +2997,12 @@ dhd_bus_start(dhd_pub_t *dhdp) dhd->wd_timer_valid = FALSE; dhd_os_spin_unlock(&dhd->pub, flags); del_timer_sync(&dhd->timer); - DHD_ERROR(("%s Host failed to register for OOB\n", __FUNCTION__)); #ifdef DHDTHREAD if (dhd->threads_only) dhd_os_sdunlock(dhdp); #endif /* DHDTHREAD */ + DHD_OS_WD_WAKE_UNLOCK(&dhd->pub); return -ENODEV; } @@ -3015,6 +3021,7 @@ dhd_bus_start(dhd_pub_t *dhdp) if (dhd->threads_only) dhd_os_sdunlock(dhdp); #endif /* DHDTHREAD */ + DHD_OS_WD_WAKE_UNLOCK(&dhd->pub); return -ENODEV; } @@ -3380,7 +3387,6 @@ dhd_preinit_ioctls(dhd_pub_t *dhd) setbit(eventmask, WLC_E_ACTION_FRAME_RX); setbit(eventmask, WLC_E_ACTION_FRAME_COMPLETE); setbit(eventmask, WLC_E_ACTION_FRAME_OFF_CHAN_COMPLETE); - setbit(eventmask, WLC_E_P2P_PROBREQ_MSG); setbit(eventmask, WLC_E_P2P_DISC_LISTEN_COMPLETE); } #endif /* WL_CFG80211 */ @@ -3913,10 +3919,15 @@ void dhd_detach(dhd_pub_t *dhdp) if (dhd->dhd_state & DHD_ATTACH_STATE_WAKELOCKS_INIT) { #ifdef CONFIG_HAS_WAKELOCK + dhd->wakelock_counter = 0; + dhd->wakelock_wd_counter = 0; + dhd->wakelock_rx_timeout_enable = 0; + dhd->wakelock_ctrl_timeout_enable = 0; wake_lock_destroy(&dhd->wl_wifi); wake_lock_destroy(&dhd->wl_rxwake); wake_lock_destroy(&dhd->wl_ctrlwake); -#endif + wake_lock_destroy(&dhd->wl_wdwake); +#endif /* CONFIG_HAS_WAKELOCK */ } } @@ -4109,11 +4120,16 @@ dhd_os_wd_timer(void *bus, uint wdtick) DHD_TRACE(("%s: Enter\n", __FUNCTION__)); + if (!dhd) + return; + flags = dhd_os_spin_lock(pub); /* don't start the wd until fw is loaded */ if (pub->busstate == DHD_BUS_DOWN) { dhd_os_spin_unlock(pub, flags); + if (!wdtick) + DHD_OS_WD_WAKE_UNLOCK(pub); return; } @@ -4126,10 +4142,12 @@ dhd_os_wd_timer(void *bus, uint wdtick) #else del_timer(&dhd->timer); #endif /* DHDTHREAD */ + DHD_OS_WD_WAKE_UNLOCK(pub); return; } if (wdtick) { + DHD_OS_WD_WAKE_LOCK(pub); dhd_watchdog_ms = (uint)wdtick; /* Re arm the timer, at last watchdog period */ mod_timer(&dhd->timer, jiffies + msecs_to_jiffies(dhd_watchdog_ms)); @@ -4987,7 +5005,8 @@ int dhd_os_check_wakelock(void *dhdp) return 0; dhd = (dhd_info_t *)(pub->info); - if (dhd && wake_lock_active(&dhd->wl_wifi)) + if (dhd && (wake_lock_active(&dhd->wl_wifi) || + wake_lock_active(&dhd->wl_wdwake))) return 1; #endif return 0; @@ -5003,6 +5022,44 @@ int net_os_wake_unlock(struct net_device *dev) return ret; } +int dhd_os_wd_wake_lock(dhd_pub_t *pub) +{ + dhd_info_t *dhd = (dhd_info_t *)(pub->info); + unsigned long flags; + int ret = 0; + + if (dhd) { + spin_lock_irqsave(&dhd->wakelock_spinlock, flags); +#ifdef CONFIG_HAS_WAKELOCK + if (!dhd->wakelock_wd_counter) + wake_lock(&dhd->wl_wdwake); +#endif + dhd->wakelock_wd_counter++; + ret = dhd->wakelock_wd_counter; + spin_unlock_irqrestore(&dhd->wakelock_spinlock, flags); + } + return ret; +} + +int dhd_os_wd_wake_unlock(dhd_pub_t *pub) +{ + dhd_info_t *dhd = (dhd_info_t *)(pub->info); + unsigned long flags; + int ret = 0; + + if (dhd) { + spin_lock_irqsave(&dhd->wakelock_spinlock, flags); + if (dhd->wakelock_wd_counter) { + dhd->wakelock_wd_counter = 0; +#ifdef CONFIG_HAS_WAKELOCK + wake_unlock(&dhd->wl_wdwake); +#endif + } + spin_unlock_irqrestore(&dhd->wakelock_spinlock, flags); + } + return ret; +} + int dhd_os_check_if_up(void *dhdp) { dhd_pub_t *pub = (dhd_pub_t *)dhdp; diff --git a/drivers/net/wireless/bcmdhd/include/epivers.h b/drivers/net/wireless/bcmdhd/include/epivers.h index 37c07e6ec37..fac87f500d1 100644 --- a/drivers/net/wireless/bcmdhd/include/epivers.h +++ b/drivers/net/wireless/bcmdhd/include/epivers.h @@ -33,17 +33,17 @@ #define EPI_RC_NUMBER 195 -#define EPI_INCREMENTAL_NUMBER 104 +#define EPI_INCREMENTAL_NUMBER 114 #define EPI_BUILD_NUMBER 0 -#define EPI_VERSION 5, 90, 195, 104 +#define EPI_VERSION 5, 90, 195, 114 -#define EPI_VERSION_NUM 0x055ac368 +#define EPI_VERSION_NUM 0x055ac372 #define EPI_VERSION_DEV 5.90.195 -#define EPI_VERSION_STR "5.90.195.104" +#define EPI_VERSION_STR "5.90.195.114" #endif diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index b4f47d1393c..b16d0bb5031 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -289,6 +289,7 @@ static void wl_ch_to_chanspec(int ch, */ static void wl_rst_ie(struct wl_priv *wl); static __used s32 wl_add_ie(struct wl_priv *wl, u8 t, u8 l, u8 *v); +static void wl_update_hidden_ap_ie(struct wl_bss_info *bi, u8 *ie_stream, u32 *ie_size); static s32 wl_mrg_ie(struct wl_priv *wl, u8 *ie_stream, u16 ie_size); static s32 wl_cp_ie(struct wl_priv *wl, u8 *dst, u16 dst_size); static u32 wl_get_ielen(struct wl_priv *wl); @@ -331,7 +332,6 @@ static __used bool wl_is_ibssstarter(struct wl_priv *wl); */ static s32 __wl_cfg80211_up(struct wl_priv *wl); static s32 __wl_cfg80211_down(struct wl_priv *wl); -static s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add); static bool wl_is_linkdown(struct wl_priv *wl, const wl_event_msg_t *e); static bool wl_is_linkup(struct wl_priv *wl, const wl_event_msg_t *e, struct net_device *ndev); static bool wl_is_nonetwork(struct wl_priv *wl, const wl_event_msg_t *e); @@ -1572,8 +1572,13 @@ __wl_cfg80211_scan(struct wiphy *wiphy, struct net_device *ndev, WL_DBG(("Enter wiphy (%p)\n", wiphy)); if (wl_get_drv_status_all(wl, SCANNING)) { - WL_ERR(("Scanning already\n")); - return -EAGAIN; + if (wl->scan_request == NULL) { + wl_clr_drv_status_all(wl, SCANNING); + WL_DBG(("<<<<<<<<<<>>>>>>>>>>\n")); + } else { + WL_ERR(("Scanning already\n")); + return -EAGAIN; + } } if (wl_get_drv_status(wl, SCAN_ABORTING, ndev)) { WL_ERR(("Scanning being aborted\n")); @@ -1615,7 +1620,7 @@ __wl_cfg80211_scan(struct wiphy *wiphy, struct net_device *ndev, WL_DBG(("P2P: GO_NEG_PHASE status cleared \n")); p2p_scan(wl) = true; } - } else { + } else if (wl_get_mode_by_netdev(wl, ndev) != WL_MODE_IBSS) { /* legacy scan trigger * So, we have to disable p2p discovery if p2p discovery is on */ @@ -2787,7 +2792,9 @@ wl_cfg80211_add_key(struct wiphy *wiphy, struct net_device *dev, bssidx = wl_cfgp2p_find_idx(wl, dev); - if (mac_addr) { + if (mac_addr && + ((params->cipher != WLAN_CIPHER_SUITE_WEP40) && + (params->cipher != WLAN_CIPHER_SUITE_WEP104))) { wl_add_keyext(wiphy, dev, key_idx, mac_addr, params); goto exit; } @@ -3065,7 +3072,68 @@ wl_cfg80211_get_station(struct wiphy *wiphy, struct net_device *dev, wl_link_down(wl); } } + else if (wl_get_mode_by_netdev(wl, dev) == WL_MODE_IBSS) { + u8 *curmacp = wl_read_prof(wl, dev, WL_PROF_BSSID); + + memset(&scb_val, 0, sizeof(scb_val)); + bcopy(mac, &scb_val.ea, 6); + + err = wldev_ioctl(dev, WLC_GET_RSSI, &scb_val, + sizeof(scb_val_t), false); + if (err) { + WL_ERR(("Could not get rssi (%d)\n", err)); + return err; + } + rssi = dtoh32(scb_val.val); + + /* the RSSI value from the firmware is an average but user-space + expects it as signal, so we fill in both */ + sinfo->filled |= STATION_INFO_SIGNAL; + sinfo->signal = rssi; + sinfo->filled |= STATION_INFO_SIGNAL_AVG; + sinfo->signal_avg = rssi; + + if (!memcmp(mac, curmacp, ETHER_ADDR_LEN)) { + // BSSID is not a real station. Can't get sta_info; Done + return 0; + } + + err = wldev_iovar_getbuf(dev, "sta_info", (struct ether_addr *)mac, + ETHER_ADDR_LEN, wl->ioctl_buf, WLC_IOCTL_MAXLEN, &wl->ioctl_buf_sync); + if (err < 0) { + WL_ERR(("GET STA INFO failed, %d\n", err)); + return err; + } + + sta = (sta_info_t *)wl->ioctl_buf; + sta->len = dtoh16(sta->len); + sta->cap = dtoh16(sta->cap); + sta->flags = dtoh32(sta->flags); + sta->idle = dtoh32(sta->idle); + sta->in = dtoh32(sta->in); + sta->listen_interval_inms = dtoh32(sta->listen_interval_inms); + sta->tx_pkts = dtoh32(sta->tx_pkts); + sta->tx_failures = dtoh32(sta->tx_failures); + sta->rx_ucast_pkts = dtoh32(sta->rx_ucast_pkts); + sta->rx_mcast_pkts = dtoh32(sta->rx_mcast_pkts); + sta->tx_rate = dtoh32(sta->tx_rate); + sta->rx_rate = dtoh32(sta->rx_rate); + sta->rx_decrypt_succeeds = dtoh32(sta->rx_decrypt_succeeds); + sta->rx_decrypt_failures = dtoh32(sta->rx_decrypt_failures); + + sinfo->filled |= STATION_INFO_INACTIVE_TIME | STATION_INFO_TX_PACKETS | + STATION_INFO_TX_FAILED | STATION_INFO_RX_PACKETS | + STATION_INFO_TX_BITRATE | STATION_INFO_RX_BITRATE | + STATION_INFO_RX_DROP_MISC; + sinfo->inactive_time = sta->idle * 1000; + sinfo->tx_packets = sta->tx_pkts; + sinfo->tx_failed = sta->tx_failures; + sinfo->rx_packets = sta->rx_ucast_pkts + sta->rx_mcast_pkts; + sinfo->txrate.legacy = sta->tx_rate / 100; + sinfo->rxrate.legacy = sta->rx_rate / 100; + sinfo->rx_dropped_misc = sta->rx_decrypt_failures; + } return err; } @@ -3093,8 +3161,9 @@ wl_cfg80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, s32 pm; s32 err = 0; struct wl_priv *wl = wiphy_priv(wiphy); +#if !defined(SUPPORT_PM2_ONLY) dhd_pub_t *dhd = (dhd_pub_t *)(wl->pub); - +#endif CHECK_SYS_UP(wl); WL_DBG(("Enter : power save %s\n", (enabled ? "enable" : "disable"))); @@ -3102,7 +3171,11 @@ wl_cfg80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, return err; } +#if !defined(SUPPORT_PM2_ONLY) pm = enabled ? ((dhd->in_suspend) ? PM_MAX : PM_FAST) : PM_OFF; +#else + pm = enabled ? PM_FAST : PM_OFF; +#endif pm = htod32(pm); err = wldev_ioctl(dev, WLC_SET_PM, &pm, sizeof(pm), true); if (unlikely(err)) { @@ -4470,8 +4543,8 @@ wl_cfg80211_add_set_beacon(struct wiphy *wiphy, struct net_device *dev, } #ifdef WL_SCHED_SCAN -#define PNO_TIME 30 -#define PNO_REPEAT 4 +#define PNO_TIME 30 +#define PNO_REPEAT 4 #define PNO_FREQ_EXPO_MAX 2 int wl_cfg80211_sched_scan_start(struct wiphy *wiphy, struct net_device *dev, @@ -4487,9 +4560,11 @@ int wl_cfg80211_sched_scan_start(struct wiphy *wiphy, int i; int ret = 0; - WL_DBG(("Enter n_match_sets:%d n_ssids:%d \n", + WL_DBG(("Enter \n")); + WL_PNO((">>> SCHED SCAN START\n")); + WL_PNO(("Enter n_match_sets:%d n_ssids:%d \n", request->n_match_sets, request->n_ssids)); - WL_DBG(("ssids:%d pno_time:%d pno_repeat:%d pno_freq:%d \n", + WL_PNO(("ssids:%d pno_time:%d pno_repeat:%d pno_freq:%d \n", request->n_ssids, pno_time, pno_repeat, pno_freq_expo_max)); #if defined(WL_ENABLE_P2P_IF) @@ -4512,7 +4587,7 @@ int wl_cfg80211_sched_scan_start(struct wiphy *wiphy, ssid = &request->match_sets[i].ssid; memcpy(ssids_local[i].SSID, ssid->ssid, ssid->ssid_len); ssids_local[i].SSID_len = ssid->ssid_len; - WL_DBG((">>> PNO filter set for ssid (%s) \n", ssid->ssid)); + WL_PNO((">>> PNO filter set for ssid (%s) \n", ssid->ssid)); ssid_count++; } } @@ -4520,7 +4595,7 @@ int wl_cfg80211_sched_scan_start(struct wiphy *wiphy, if (request->n_ssids > 0) { for (i = 0; i < request->n_ssids; i++) { /* Active scan req for ssids */ - WL_DBG((">>> Active scan req for ssid (%s) \n", request->ssids[i].ssid)); + WL_PNO((">>> Active scan req for ssid (%s) \n", request->ssids[i].ssid)); /* match_set ssids is a supert set of n_ssid list, so we need * not add these set seperately @@ -4553,6 +4628,7 @@ int wl_cfg80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev) struct wl_priv *wl = wiphy_priv(wiphy); WL_DBG(("Enter \n")); + WL_PNO((">>> SCHED SCAN STOP\n")); if (dhd_dev_pno_enable(dev, 0) < 0) WL_ERR(("PNO disable failed")); @@ -4561,6 +4637,7 @@ int wl_cfg80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev) WL_ERR(("PNO reset failed")); if (wl->scan_request && wl->sched_scan_running) { + WL_PNO((">>> Sched scan running. Aborting it..\n")); wl_notify_escan_complete(wl, dev, true, true); } @@ -4650,7 +4727,7 @@ static s32 wl_setup_wiphy(struct wireless_dev *wdev, struct device *sdiofunc_dev wdev->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; #endif /* WL_SCHED_SCAN */ wdev->wiphy->interface_modes = - BIT(NL80211_IFTYPE_STATION) + BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_MONITOR); wdev->wiphy->bands[IEEE80211_BAND_2GHZ] = &__wl_band_2ghz; @@ -4676,6 +4753,9 @@ static s32 wl_setup_wiphy(struct wireless_dev *wdev, struct device *sdiofunc_dev #endif /* AP_SME flag can be advertised to remove patch from wpa_supplicant */ wdev->wiphy->flags |= WIPHY_FLAG_HAVE_AP_SME; +#if defined(CONFIG_PM) + wdev->wiphy->wowlan.flags = WIPHY_WOWLAN_ANY; +#endif WL_DBG(("Registering custom regulatory)\n")); wdev->wiphy->flags |= WIPHY_FLAG_CUSTOM_REGULATORY; wiphy_apply_custom_regulatory(wdev->wiphy, &brcm_regdom); @@ -4779,6 +4859,7 @@ static s32 wl_inform_single_bss(struct wl_priv *wl, struct wl_bss_info *bi) beacon_proberesp->capab_info = cpu_to_le16(bi->capability); wl_rst_ie(wl); + wl_update_hidden_ap_ie(bi, ((u8 *) bi) + bi->ie_offset, &bi->ie_length); wl_mrg_ie(wl, ((u8 *) bi) + bi->ie_offset, bi->ie_length); wl_cp_ie(wl, beacon_proberesp->variable, WL_BSS_INFO_MAX - offsetof(struct wl_cfg80211_bss_info, frame_buf)); @@ -5376,6 +5457,7 @@ static s32 wl_update_bss_info(struct wl_priv *wl, struct net_device *ndev) } bi = (struct wl_bss_info *)(wl->extra_buf + 4); if (memcmp(bi->BSSID.octet, curbssid, ETHER_ADDR_LEN)) { + WL_ERR(("Bssid doesn't match\n")); err = -EIO; goto update_bss_info_out; } @@ -5415,6 +5497,9 @@ static s32 wl_update_bss_info(struct wl_priv *wl, struct net_device *ndev) wl_update_prof(wl, ndev, NULL, &dtim_period, WL_PROF_DTIMPERIOD); update_bss_info_out: + if (unlikely(err)) { + WL_ERR(("Failed with error %d\n", err)); + } mutex_unlock(&wl->usr_sync); return err; } @@ -5548,19 +5633,19 @@ static s32 wl_notify_pfn_status(struct wl_priv *wl, struct net_device *ndev, const wl_event_msg_t *e, void *data) { - WL_ERR((" PNO Event\n")); + WL_ERR((">>> PNO Event\n")); - mutex_lock(&wl->usr_sync); #ifndef WL_SCHED_SCAN + mutex_lock(&wl->usr_sync); /* TODO: Use cfg80211_sched_scan_results(wiphy); */ cfg80211_disconnected(ndev, 0, NULL, 0, GFP_KERNEL); + mutex_unlock(&wl->usr_sync); #else /* If cfg80211 scheduled scan is supported, report the pno results via sched * scan results */ wl_notify_sched_scan_results(wl, ndev, e, data); #endif /* WL_SCHED_SCAN */ - mutex_unlock(&wl->usr_sync); return 0; } #endif /* PNO_SUPPORT */ @@ -5617,14 +5702,15 @@ wl_notify_scan_status(struct wl_priv *wl, struct net_device *ndev, del_timer_sync(&wl->scan_timeout); spin_lock_irqsave(&wl->cfgdrv_lock, flags); if (wl->scan_request) { - WL_DBG(("cfg80211_scan_done\n")); cfg80211_scan_done(wl->scan_request, false); wl->scan_request = NULL; } spin_unlock_irqrestore(&wl->cfgdrv_lock, flags); + WL_DBG(("cfg80211_scan_done\n")); mutex_unlock(&wl->usr_sync); return err; } + static s32 wl_frame_get_mgmt(u16 fc, const struct ether_addr *da, const struct ether_addr *sa, const struct ether_addr *bssid, @@ -5787,7 +5873,7 @@ wl_notify_rx_mgmt_frame(struct wl_priv *wl, struct net_device *ndev, /* If target scan is not reliable, set the below define to "1" to do a * full escan */ -#define FULL_ESCAN_ON_PFN_NET_FOUND 0 +#define FULL_ESCAN_ON_PFN_NET_FOUND 1 static s32 wl_notify_sched_scan_results(struct wl_priv *wl, struct net_device *ndev, const wl_event_msg_t *e, void *data) @@ -5805,10 +5891,10 @@ wl_notify_sched_scan_results(struct wl_priv *wl, struct net_device *ndev, WL_DBG(("Enter\n")); if (e->event_type == WLC_E_PFN_NET_LOST) { - WL_DBG(("PFN NET LOST event. Do Nothing \n")); + WL_PNO(("PFN NET LOST event. Do Nothing \n")); return 0; } - WL_DBG(("PFN NET FOUND event. count:%d \n", pfn_result->count)); + WL_PNO((">>> PFN NET FOUND event. count:%d \n", pfn_result->count)); if (pfn_result->count > 0) { int i; @@ -5834,7 +5920,7 @@ wl_notify_sched_scan_results(struct wl_priv *wl, struct net_device *ndev, err = -EINVAL; goto out_err; } - WL_DBG(("SSID:%s Channel:%d \n", + WL_PNO((">>> SSID:%s Channel:%d \n", netinfo->pfnsubnet.SSID, netinfo->pfnsubnet.channel)); /* PFN result doesn't have all the info which are required by the supplicant * (For e.g IEs) Do a target Escan so that sched scan results are reported @@ -5867,6 +5953,7 @@ wl_notify_sched_scan_results(struct wl_priv *wl, struct net_device *ndev, } if (wl_get_p2p_status(wl, DISCOVERY_ON)) { + WL_PNO((">>> P2P discovery was ON. Disabling it\n")); err = wl_cfgp2p_discover_enable_search(wl, false); if (unlikely(err)) { wl_clr_drv_status(wl, SCANNING, ndev); @@ -5876,8 +5963,10 @@ wl_notify_sched_scan_results(struct wl_priv *wl, struct net_device *ndev, wl_set_drv_status(wl, SCANNING, ndev); #if FULL_ESCAN_ON_PFN_NET_FOUND + WL_PNO((">>> Doing Full ESCAN on PNO event\n")); err = wl_do_escan(wl, wiphy, ndev, NULL); #else + WL_PNO((">>> Doing targeted ESCAN on PNO event\n")); err = wl_do_escan(wl, wiphy, ndev, &request); #endif if (err) { @@ -6365,9 +6454,9 @@ static s32 wl_notify_escan_complete(struct wl_priv *wl, dev = wl->scan_request->dev; } else { - WL_ERR(("wl->scan_request is NULL may be internal scan." - "doing scan_abort for ndev %p primary %p p2p_net %p", - ndev, wl_to_prmry_ndev(wl), wl->p2p_net)); + WL_DBG(("wl->scan_request is NULL may be internal scan." + "doing scan_abort for ndev %p primary %p", + ndev, wl_to_prmry_ndev(wl))); dev = ndev; } if (fw_abort && !in_atomic()) { @@ -6387,19 +6476,15 @@ static s32 wl_notify_escan_complete(struct wl_priv *wl, if (timer_pending(&wl->scan_timeout)) del_timer_sync(&wl->scan_timeout); spin_lock_irqsave(&wl->cfgdrv_lock, flags); - #ifdef WL_SCHED_SCAN if (wl->sched_scan_req && !wl->scan_request) { - WL_DBG((" REPORTING SCHED SCAN RESULTS \n")); - if (aborted) - cfg80211_sched_scan_stopped(wl->sched_scan_req->wiphy); - else + WL_PNO((">>> REPORTING SCHED SCAN RESULTS \n")); + if (!aborted) cfg80211_sched_scan_results(wl->sched_scan_req->wiphy); wl->sched_scan_running = FALSE; wl->sched_scan_req = NULL; } #endif /* WL_SCHED_SCAN */ - if (likely(wl->scan_request)) { cfg80211_scan_done(wl->scan_request, aborted); wl->scan_request = NULL; @@ -6424,9 +6509,9 @@ static s32 wl_escan_handler(struct wl_priv *wl, wl_escan_result_t *escan_result; wl_bss_info_t *bss = NULL; wl_scan_results_t *list; + wifi_p2p_ie_t * p2p_ie; u32 bi_length; u32 i; - wifi_p2p_ie_t * p2p_ie; u8 *p2p_dev_addr = NULL; WL_DBG((" enter event type : %d, status : %d \n", @@ -6442,15 +6527,18 @@ static s32 wl_escan_handler(struct wl_priv *wl, } if (!ndev || !wl->escan_on || - !wl_get_drv_status(wl, SCANNING, ndev)) { - WL_ERR(("escan is not ready ndev %p wl->escan_on %d drv_status 0x%x\n", - ndev, wl->escan_on, wl_get_drv_status(wl, SCANNING, ndev))); + (!wl_get_drv_status(wl, SCANNING, ndev) && + !wl->sched_scan_running)) { + WL_ERR(("escan is not ready ndev %p wl->escan_on %d" + " drv_status 0x%x e_type %d e_states %d\n", + ndev, wl->escan_on, wl_get_drv_status(wl, SCANNING, ndev), + ntoh32(e->event_type), ntoh32(e->status))); goto exit; } + escan_result = (wl_escan_result_t *)data; if (status == WLC_E_STATUS_PARTIAL) { WL_INFO(("WLC_E_STATUS_PARTIAL \n")); - escan_result = (wl_escan_result_t *) data; if (!escan_result) { WL_ERR(("Invalid escan result (NULL pointer)\n")); goto exit; @@ -6732,11 +6820,10 @@ s32 wl_cfg80211_attach_post(struct net_device *ndev) if (wl && !wl_get_drv_status(wl, READY, ndev)) { if (wl->wdev && wl_cfgp2p_supported(wl, ndev)) { -#if !defined(WL_ENABLE_P2P_IF) wl->wdev->wiphy->interface_modes |= (BIT(NL80211_IFTYPE_P2P_CLIENT)| BIT(NL80211_IFTYPE_P2P_GO)); -#endif + if ((err = wl_cfgp2p_init_priv(wl)) != 0) goto fail; @@ -7089,7 +7176,7 @@ static s32 wl_config_ifmode(struct wl_priv *wl, struct net_device *ndev, s32 ift return 0; } -static s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add) +s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add) { s8 iovbuf[WL_EVENTING_MASK_LEN + 12]; @@ -7593,6 +7680,29 @@ static __used s32 wl_add_ie(struct wl_priv *wl, u8 t, u8 l, u8 *v) return err; } +static void wl_update_hidden_ap_ie(struct wl_bss_info *bi, u8 *ie_stream, u32 *ie_size) +{ + u8 *ssidie; + + ssidie = (u8 *)cfg80211_find_ie(WLAN_EID_SSID, ie_stream, *ie_size); + if (!ssidie) + return; + if (ssidie[1] != bi->SSID_len) { + if (ssidie[1]) { + WL_ERR(("%s: Wrong SSID len: %d != %d\n", __func__, ssidie[1], bi->SSID_len)); + return; + } + memmove(ssidie + bi->SSID_len + 2, ssidie + 2, *ie_size - (ssidie + 2 - ie_stream)); + memcpy(ssidie + 2, bi->SSID, bi->SSID_len); + *ie_size = *ie_size + bi->SSID_len; + ssidie[1] = bi->SSID_len; + return; + } + if (*(ssidie + 2) == '\0') + memcpy(ssidie + 2, bi->SSID, bi->SSID_len); + return; +} + static s32 wl_mrg_ie(struct wl_priv *wl, u8 *ie_stream, u16 ie_size) { struct wl_ie *ie = wl_to_ie(wl); diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.h b/drivers/net/wireless/bcmdhd/wl_cfg80211.h index dfb0d0de2f7..aeb63674b83 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.h +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.h @@ -120,7 +120,7 @@ do { \ #else /* !(WL_DBG_LEVEL > 0) */ #define WL_DBG(args) #endif /* (WL_DBG_LEVEL > 0) */ - +#define WL_PNO(args) #define WL_SCAN_RETRY_MAX 3 #define WL_NUM_PMKIDS_MAX MAXPMKID @@ -549,6 +549,29 @@ wl_get_status_all(struct wl_priv *wl, s32 status) return cnt? true: false; } + +static inline void +wl_set_status_all(struct wl_priv *wl, s32 status, u32 op) +{ + struct net_info *_net_info, *next; + + list_for_each_entry_safe(_net_info, next, &wl->net_list, list) { + switch (op) { + case 1: + return; /* set all status is not allowed */ + case 2: + clear_bit(status, &_net_info->sme_state); + break; + case 4: + return; /* change all status is not allowed */ + default: + return; /* unknown operation */ + } + } + +} + + static inline void wl_set_status_by_netdev(struct wl_priv *wl, s32 status, struct net_device *ndev, u32 op) @@ -639,6 +662,8 @@ wl_get_profile_by_netdev(struct wl_priv *wl, struct net_device *ndev) (wl_set_status_by_netdev(wl, WL_STATUS_ ## stat, ndev, 1)) #define wl_clr_drv_status(wl, stat, ndev) \ (wl_set_status_by_netdev(wl, WL_STATUS_ ## stat, ndev, 2)) +#define wl_clr_drv_status_all(wl, stat) \ + (wl_set_status_all(wl, WL_STATUS_ ## stat, 2)) #define wl_chg_drv_status(wl, stat, ndev) \ (wl_set_status_by_netdev(wl, WL_STATUS_ ## stat, ndev, 4)) @@ -689,4 +714,5 @@ void wl_cfg80211_enable_trace(int level); extern s32 wl_update_wiphybands(struct wl_priv *wl); extern s32 wl_cfg80211_if_is_group_owner(void); extern int wl_cfg80211_update_power_mode(struct net_device *dev); +extern s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add); #endif /* _wl_cfg80211_h_ */ diff --git a/drivers/net/wireless/bcmdhd/wl_cfgp2p.c b/drivers/net/wireless/bcmdhd/wl_cfgp2p.c index 7bcd14486dd..8fcc13c4d30 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfgp2p.c +++ b/drivers/net/wireless/bcmdhd/wl_cfgp2p.c @@ -641,7 +641,7 @@ wl_cfgp2p_enable_discovery(struct wl_priv *wl, struct net_device *dev, } set_ie: ret = wl_cfgp2p_set_management_ie(wl, dev, - wl_cfgp2p_find_idx(wl, dev), + wl_to_p2p_bss_bssidx(wl, P2PAPI_BSSCFG_DEVICE), VNDR_IE_PRBREQ_FLAG, ie, ie_len); if (unlikely(ret < 0)) { @@ -1230,6 +1230,10 @@ wl_cfgp2p_listen_complete(struct wl_priv *wl, struct net_device *ndev, } cfg80211_remain_on_channel_expired(ndev, wl->last_roc_id, &wl->remain_on_chan, wl->remain_on_chan_type, GFP_KERNEL); + if (wl_add_remove_eventmsg(wl_to_prmry_ndev(wl), + WLC_E_P2P_PROBREQ_MSG, false) != BCME_OK) { + CFGP2P_ERR((" failed to unset WLC_E_P2P_PROPREQ_MSG\n")); + } } else wl_clr_p2p_status(wl, LISTEN_EXPIRED); @@ -1321,6 +1325,9 @@ wl_cfgp2p_discover_listen(struct wl_priv *wl, s32 channel, u32 duration_ms) } else wl_clr_p2p_status(wl, LISTEN_EXPIRED); + if (wl_add_remove_eventmsg(wl_to_prmry_ndev(wl), WLC_E_P2P_PROBREQ_MSG, true) != BCME_OK) { + CFGP2P_ERR((" failed to set WLC_E_P2P_PROPREQ_MSG\n")); + } wl_cfgp2p_set_p2p_mode(wl, WL_P2P_DISC_ST_LISTEN, channel, (u16) duration_ms, wl_to_p2p_bss_bssidx(wl, P2PAPI_BSSCFG_DEVICE)); _timer = &wl->p2p->listen_timer; @@ -1751,6 +1758,10 @@ wl_cfgp2p_set_p2p_ps(struct wl_priv *wl, struct net_device *ndev, char* buf, int if (legacy_ps != -1) { s32 pm = legacy_ps ? PM_MAX : PM_OFF; +#if defined(SUPPORT_PM2_ONLY) + if (pm == PM_MAX) + pm = PM_FAST; +#endif /* SUPPORT_PM2_ONLY */ ret = wldev_ioctl(wl_to_p2p_bss_ndev(wl, P2PAPI_BSSCFG_CONNECTION), WLC_SET_PM, &pm, sizeof(pm), true); if (unlikely(ret)) { @@ -1819,6 +1830,38 @@ wl_cfgp2p_retreive_p2pattrib(void *buf, u8 element_id) } #define P2P_GROUP_CAPAB_GO_BIT 0x01 + +u8* +wl_cfgp2p_find_attrib_in_all_p2p_Ies(u8 *parse, u32 len, u32 attrib) +{ + bcm_tlv_t *ie; + u8* pAttrib; + + CFGP2P_INFO(("Starting parsing parse %p attrib %d remaining len %d ", parse, attrib, len)); + while ((ie = bcm_parse_tlvs(parse, (int)len, DOT11_MNG_VS_ID))) { + if (wl_cfgp2p_is_p2p_ie((uint8*)ie, &parse, &len) == TRUE) { + /* Have the P2p ie. Now check for attribute */ + if ((pAttrib = wl_cfgp2p_retreive_p2pattrib(parse, attrib)) != NULL) { + CFGP2P_INFO(("P2P attribute %d was found at parse %p", + attrib, parse)); + return pAttrib; + } + else { + parse += (ie->len + TLV_HDR_LEN); + len -= (ie->len + TLV_HDR_LEN); + CFGP2P_INFO(("P2P Attribute %d not found Moving parse" + " to %p len to %d", attrib, parse, len)); + } + } + else { + /* It was not p2p IE. parse will get updated automatically to next TLV */ + CFGP2P_INFO(("IT was NOT P2P IE parse %p len %d", parse, len)); + } + } + CFGP2P_ERR(("P2P attribute %d was NOT found", attrib)); + return NULL; +} + u8 * wl_cfgp2p_retreive_p2p_dev_addr(wl_bss_info_t *bi, u32 bi_length) { @@ -1827,12 +1870,8 @@ wl_cfgp2p_retreive_p2p_dev_addr(wl_bss_info_t *bi, u32 bi_length) bool p2p_go = 0; u8 *ptr = NULL; - if (!(p2p_ie = wl_cfgp2p_find_p2pie(((u8 *) bi) + bi->ie_offset, bi->ie_length))) { - WL_ERR(("P2P IE not found")); - return NULL; - } - - if (!(capability = wl_cfgp2p_retreive_p2pattrib(p2p_ie, P2P_SEID_P2P_INFO))) { + if ((capability = wl_cfgp2p_find_attrib_in_all_p2p_Ies(((u8 *) bi) + bi->ie_offset, + bi->ie_length, P2P_SEID_P2P_INFO)) == NULL) { WL_ERR(("P2P Capability attribute not found")); return NULL; } diff --git a/drivers/net/wireless/bcmdhd/wl_cfgp2p.h b/drivers/net/wireless/bcmdhd/wl_cfgp2p.h index 03a645aea31..be5ddba73a4 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfgp2p.h +++ b/drivers/net/wireless/bcmdhd/wl_cfgp2p.h @@ -254,6 +254,9 @@ wl_cfgp2p_set_p2p_ps(struct wl_priv *wl, struct net_device *ndev, char* buf, int extern u8 * wl_cfgp2p_retreive_p2pattrib(void *buf, u8 element_id); +extern u8* +wl_cfgp2p_find_attrib_in_all_p2p_Ies(u8 *parse, u32 len, u32 attrib); + extern u8 * wl_cfgp2p_retreive_p2p_dev_addr(wl_bss_info_t *bi, u32 bi_length); diff --git a/drivers/power/bq27541_battery.c b/drivers/power/bq27541_battery.c index 736785e5a58..4a905d37c6b 100755 --- a/drivers/power/bq27541_battery.c +++ b/drivers/power/bq27541_battery.c @@ -61,10 +61,11 @@ /* Battery flags bit definitions */ #define BATT_STS_DSG 0x0001 #define BATT_STS_FC 0x0200 +#define BATT_STS_CHG_INH 0x0800 /* Debug Message */ #define BAT_NOTICE(format, arg...) \ - printk(KERN_NOTICE "%s " format , __FUNCTION__ , ## arg) + pr_debug(KERN_NOTICE "%s " format , __FUNCTION__ , ## arg) #define BAT_ERR(format, arg...) \ printk(KERN_ERR format , ## arg) @@ -74,6 +75,8 @@ unsigned battery_cable_status = 0; unsigned battery_driver_ready = 0; static int ac_on ; static int usb_on ; +unsigned int bq27541_i2c_error; +static unsigned int ota_flag = 0; static unsigned int battery_current; static unsigned int battery_remaining_capacity; static atomic_t device_count; @@ -85,6 +88,8 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp,union static int bq27541_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val); extern unsigned get_usb_cable_status(void); +extern int smb347_charger_enable(bool enable); +extern int smb347_config_thermal_charging(int temp); module_param(battery_current, uint, 0644); module_param(battery_remaining_capacity, uint, 0644); @@ -142,8 +147,10 @@ static enum power_supply_property bq27541_properties[] = { POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_TECHNOLOGY, POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_CURRENT_NOW, POWER_SUPPLY_PROP_CAPACITY, POWER_SUPPLY_PROP_TEMP, + POWER_SUPPLY_PROP_CURRENT_NOW, }; void check_cabe_type(void) @@ -234,6 +241,7 @@ static struct bq27541_device_info { struct miscdevice battery_misc; struct wake_lock low_battery_wake_lock; struct wake_lock cable_wake_lock; + char device_name[5]; int smbus_status; int battery_present; int low_battery_present; @@ -339,6 +347,26 @@ static const struct attribute_group battery_smbus_group = { .attrs = battery_smbus_attributes, }; +static int bq27541_battery_current(void) +{ + int ret; + int curr = 0; + + ret = bq27541_read_i2c(bq27541_data[REG_CURRENT].addr, &curr, 0); + if (ret) { + BAT_ERR("error reading current ret = %x\n", ret); + return 0; + } + + curr = (s16)curr; + + if (curr >= bq27541_data[REG_CURRENT].min_value && + curr <= bq27541_data[REG_CURRENT].max_value) { + return curr; + } else + return 0; +} + static void battery_status_poll(struct work_struct *work) { struct bq27541_device_info *batt_dev = container_of(work, struct bq27541_device_info, status_poll_work.work); @@ -348,6 +376,10 @@ static void battery_status_poll(struct work_struct *work) power_supply_changed(&bq27541_supply[Charger_Type_Battery]); + if (!bq27541_device->temp_err) + if (ac_on || usb_on) + smb347_config_thermal_charging(bq27541_device->old_temperature/10); + /* Schedule next polling */ queue_delayed_work(battery_work_queue, &batt_dev->status_poll_work, bat_check_interval*HZ); } @@ -486,7 +518,23 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, if ((bq27541_device->smbus_status < 0) && (psp != POWER_SUPPLY_PROP_TEMP)) { dev_err(&bq27541_device->client->dev, "%s: i2c read for %d failed\n", __func__, reg_offset); + + if (bq27541_i2c_error < 3) { + bq27541_i2c_error++; + if (battery_driver_ready) { + cancel_delayed_work(&bq27541_device->status_poll_work); + queue_delayed_work(battery_work_queue,&bq27541_device->status_poll_work, 1*HZ); + } + if(bq27541_i2c_error == 3) { + BAT_NOTICE("charger disable !!\n"); + smb347_charger_enable(0); + } + BAT_NOTICE("bq27541_i2c_error=%d\n", bq27541_i2c_error); + } return -EINVAL; + } else if (bq27541_device->smbus_status >= 0) { + if (bq27541_i2c_error) + bq27541_i2c_error--; } if (psp == POWER_SUPPLY_PROP_VOLTAGE_NOW) { @@ -494,6 +542,7 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, rt_value <= bq27541_data[REG_VOLTAGE].max_value) { if (rt_value > BATTERY_PROTECTED_VOLT) { val->intval = bq27541_device->bat_vol = rt_value*1000; + bq27541_i2c_error = 0; } else { val->intval = bq27541_device->bat_vol; } @@ -502,6 +551,18 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, } BAT_NOTICE("voltage_now= %u uV\n", val->intval); } + if (psp == POWER_SUPPLY_PROP_CURRENT_NOW) { + val->intval = rt_value; + /* Returns a signed 16-bit value in mA */ + if (val->intval & 0x8000) { + /* Negative */ + val->intval = ~val->intval & 0x7fff; + val->intval++; + val->intval *= -1; + } + val->intval *= 1000; + BAT_NOTICE("current_now= %d uA\n", val->intval); + } if (psp == POWER_SUPPLY_PROP_STATUS) { ret = bq27541_device->bat_status = rt_value; static char *status_text[] = {"Unknown", "Charging", "Discharging", "Not charging", "Full"}; @@ -509,8 +570,23 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, if (ac_on || usb_on) { /* Charging detected */ if (bq27541_device->old_capacity == 100) val->intval = POWER_SUPPLY_STATUS_FULL; - else + else { val->intval = POWER_SUPPLY_STATUS_CHARGING; + if (ret & BATT_STS_CHG_INH) { + if (ota_flag == 0) { + BAT_NOTICE("charger disable !!\n"); + smb347_charger_enable(0); + ota_flag = 1; + } + val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING; + } else { + if (ota_flag) { + BAT_NOTICE("charger enable !!\n"); + smb347_charger_enable(1); + ota_flag = 0; + } + } + } } else if (ret & BATT_STS_FC) { /* Full-charged condition reached */ if (!ac_on) val->intval = POWER_SUPPLY_STATUS_DISCHARGING; @@ -522,7 +598,7 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING; } BAT_NOTICE("status: %s ret= 0x%04x\n", status_text[val->intval], ret); - + bq27541_i2c_error = 0; } else if (psp == POWER_SUPPLY_PROP_TEMP) { ret = bq27541_device->bat_temp = rt_value; @@ -562,6 +638,11 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, bq27541_device->old_temperature = val->intval = ret; BAT_NOTICE("temperature= %u (0.1�XC)\n", val->intval); } + if (psp == POWER_SUPPLY_PROP_CURRENT_NOW) { + val->intval = bq27541_device->bat_current + = bq27541_battery_current(); + BAT_NOTICE("current = %d mA\n", val->intval); + } return 0; } @@ -713,6 +794,26 @@ static int bq27541_get_property(struct power_supply *psy, return -EINVAL; } +static int is_legal_pack(void) +{ + char data[7]; + int ret, retry = 3; + + while(--retry > 0) + { + ret = i2c_smbus_read_i2c_block_data(bq27541_device->client, 0x63, 7, data); + if (ret >= 0) { + if(!strncmp(data, "ME370", 5)) { + strncpy(bq27541_device->device_name, data, 5); + BAT_NOTICE("device name: %s\n", bq27541_device->device_name); + return 1; + } + } + } + BAT_NOTICE("device name: not found\n"); + return 0; +} + #include "stress_test.c" static int bq27541_probe(struct i2c_client *client, const struct i2c_device_id *id) @@ -737,6 +838,11 @@ static int bq27541_probe(struct i2c_client *client, bq27541_device->shutdown_disable = 1; bq27541_device->cap_zero_count = 0; + if(!is_legal_pack()) { + BAT_NOTICE("charger disable !!\n"); + smb347_charger_enable(0); + } + for (i = 0; i < ARRAY_SIZE(bq27541_supply); i++) { ret = power_supply_register(&client->dev, &bq27541_supply[i]); if (ret) { diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index ff3d2338fad..1c5d1366df2 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -108,6 +108,8 @@ #define DELAY_FOR_CURR_LIMIT_RECONF (60) #define ADAPTER_PROTECT_DELAY (4*HZ) #define GPIO_AC_OK TEGRA_GPIO_PV1 +#define ENABLE_PIN_CTRL_MASK 0x60 +#define BAT_Hot_Limit 45 /* Functions declaration */ static int smb347_configure_charger(struct i2c_client *client, int value); @@ -128,6 +130,8 @@ struct wake_lock charger_wakelock; static unsigned int project_id; static unsigned int pcba_ver; static int gpio_dock_in = 0; +static int charge_en_flag = 1; +static unsigned usb_det_cable_type = non_cable; /* Sysfs interface */ static DEVICE_ATTR(reg_status, S_IWUSR | S_IRUGO, smb347_reg_show, NULL); @@ -384,18 +388,19 @@ static int smb347_configure_charger(struct i2c_client *client, int value) return ret; } -static int smb347_charger_enable(bool enable) +static int smb347_pin_control(bool state) { struct i2c_client *client = charger->client; u8 ret = 0; - if (enable) { + mutex_lock(&charger->pinctrl_lock); + + if (state) { /*Pin Controls -active low */ ret = smb347_update_reg(client, smb347_PIN_CTRL, PIN_ACT_LOW); if (ret < 0) { dev_err(&client->dev, "%s(): Failed to" "enable charger\n", __func__); - return ret; } } else { /*Pin Controls -active high */ @@ -403,12 +408,38 @@ static int smb347_charger_enable(bool enable) if (ret < 0) { dev_err(&client->dev, "%s(): Failed to" "disable charger\n", __func__); - return ret; } } + + mutex_unlock(&charger->pinctrl_lock); return ret; } +int smb347_charger_enable(bool state) +{ + struct i2c_client *client = charger->client; + u8 ret = 0; + + ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); + if (ret < 0) { + dev_err(&client->dev, "%s() error in configuring charger..\n", + __func__); + goto error; + } + charge_en_flag = state; + smb347_pin_control(state); + + ret = smb347_volatile_writes(client, smb347_DISABLE_WRITE); + if (ret < 0) { + dev_err(&client->dev, "%s() error in configuring charger..\n", + __func__); + goto error; + } + +error: + return ret; +} +EXPORT_SYMBOL_GPL(smb347_charger_enable); static int smb347_set_InputCurrentlimit(struct i2c_client *client, u32 current_limit) @@ -429,7 +460,8 @@ smb347_set_InputCurrentlimit(struct i2c_client *client, u32 current_limit) } /* disable charger */ - smb347_charger_enable(0); + if (charge_en_flag) + smb347_pin_control(0); /* AICL disable */ retval = smb347_read(client, smb347_VRS_FUNC); @@ -499,7 +531,8 @@ smb347_set_InputCurrentlimit(struct i2c_client *client, u32 current_limit) } /* enable charger */ - smb347_charger_enable(1); + if (charge_en_flag) + smb347_pin_control(1); /* Disable volatile writes to registers */ ret = smb347_volatile_writes(client, smb347_DISABLE_WRITE); @@ -944,9 +977,12 @@ static int cable_type_detect(void) } else { charger->cur_cable_type = unknow_cable; printk(KERN_INFO "Unkown Plug In Cable type !\n"); - if (gpio_get_value(dock_in)) { - charger->cur_cable_type = usb_cable; - success = battery_callback(usb_cable); + + if(usb_det_cable_type) { + printk(KERN_INFO "Use usb det %s cable to report\n", + (usb_det_cable_type == ac_cable) ? "ac" : "usb"); + charger->cur_cable_type = usb_det_cable_type; + success = battery_callback(usb_det_cable_type); } } } else { @@ -973,6 +1009,16 @@ static int cable_type_detect(void) return success; } +void usb_det_cable_callback(unsigned cable_type) +{ + usb_det_cable_type = cable_type; + SMB_NOTICE("usb_det_cable_type=%d\n", usb_det_cable_type); + + if(unknow_cable == charger->cur_cable_type) { + cable_type_detect(); + } +} + static void inok_isr_work_function(struct work_struct *dat) { struct i2c_client *client = charger->client; @@ -1016,42 +1062,37 @@ static void dockin_isr_work_function(struct work_struct *dat) static ssize_t smb347_reg_show(struct device *dev, struct device_attribute *attr, char *buf) { struct i2c_client *client = charger->client; - uint8_t config_reg[14], cmd_reg[1], status_reg[10]; - int i, ret = 0; - - ret += i2c_smbus_read_i2c_block_data(client, smb347_CHARGE, 15, config_reg) - + i2c_smbus_read_i2c_block_data(client, smb347_CMD_REG, 2, cmd_reg) - + i2c_smbus_read_i2c_block_data(client, smb347_INTR_STS_A, 11, status_reg); - - if (ret < 0) - SMB_ERR("failed to read charger reg !\n"); - - SMB_INFO("smb347 Registers\n"); - SMB_INFO("------------------\n"); - for(i=0;i<=14;i++) - SMB_INFO("Reg[%02xh]=0x%02x\n", i, config_reg[i]); - for(i=0;i<=1;i++) - SMB_INFO("Reg[%02xh]=0x%02x\n", 48+i, cmd_reg[i]); - for(i=0;i<=10;i++) - SMB_INFO("Reg[%02xh]=0x%02x\n", 53+i, status_reg[i]); - - return sprintf(buf, "Reg[06h]=0x%02x\n" - "Reg[08h]=0x%02x\n" - "Reg[30h]=0x%02x\n" - "Reg[31h]=0x%02x\n" - "Reg[39h]=0x%02x\n" - "Reg[3dh]=0x%02x\n" - "Reg[3eh]=0x%02x\n" - "Reg[3fh]=0x%02x\n", - config_reg[6], - config_reg[8], - cmd_reg[0], - cmd_reg[1], - status_reg[4], - status_reg[8], - status_reg[9], - status_reg[10]); - + uint8_t config_reg[15], cmd_reg[1], status_reg[10]; + char tmp_buf[64]; + int i, cfg_ret, cmd_ret, sts_ret = 0; + + cfg_ret = i2c_smbus_read_i2c_block_data(client, smb347_CHARGE, 15, config_reg); + cmd_ret = i2c_smbus_read_i2c_block_data(client, smb347_CMD_REG, 2, cmd_reg); + sts_ret = i2c_smbus_read_i2c_block_data(client, smb347_INTR_STS_A, 11, status_reg); + + sprintf(tmp_buf, "SMB34x Configuration Registers Detail\n" + "==================\n"); + strcpy(buf, tmp_buf); + + if (cfg_ret > 0) { + for(i=0;i<=14;i++) { + sprintf(tmp_buf, "Reg%02xh:\t0x%02x\n", i, config_reg[i]); + strcat(buf, tmp_buf); + } + } + if (cmd_ret > 0) { + for(i=0;i<=1;i++) { + sprintf(tmp_buf, "Reg%02xh:\t0x%02x\n", 48+i, cmd_reg[i]); + strcat(buf, tmp_buf); + } + } + if (sts_ret > 0) { + for(i=0;i<=10;i++) { + sprintf(tmp_buf, "Reg%02xh:\t0x%02x\n", 53+i, status_reg[i]); + strcat(buf, tmp_buf); + } + } + return strlen(buf); } static void smb347_default_setback(void) @@ -1079,6 +1120,91 @@ static void smb347_default_setback(void) } } +static int smb347_temp_limit_setting(void) +{ + struct i2c_client *client = charger->client; + int ret = 0, retval, val; + + /* Enable volatile writes to registers */ + ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); + if (ret < 0) { + dev_err(&client->dev, "%s() error in configuring charger..\n", + __func__); + goto error; + } + val = smb347_read(client, smb347_HRD_SFT_TEMP); + if (val < 0) { + dev_err(&client->dev, "%s(): Failed in reading 0x%02x", + __func__, smb347_HRD_SFT_TEMP); + goto error; + } + val &= 0xcf; + /* Set Hard Limit Hot Temperature 59 Degree */ + ret = smb347_write(client, smb347_HRD_SFT_TEMP, val | 0x20); + if (ret < 0) { + dev_err(&client->dev, "%s(): Failed in writing 0x%02x to register" + "0x%02x\n", __func__, val, smb347_HRD_SFT_TEMP); + goto error; + } + /* Disable volatile writes to registers */ + ret = smb347_volatile_writes(client, smb347_DISABLE_WRITE); + if (ret < 0) { + dev_err(&client->dev, "%s() error in configuring charger..\n", + __func__); + goto error; + } + return 0; +error: + return -1; +} + +int smb347_config_thermal_charging(int temp) +{ + struct i2c_client *client = charger->client; + int ret = 0, retval, setting = 0; + + mdelay(150); + SMB_NOTICE("temp=%d\n", temp); + + ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); + if (ret < 0) { + dev_err(&client->dev, "%s() charger enable write error..\n", __func__); + goto error; + } + + /*charger enable/disable*/ + retval = smb347_read(client, smb347_PIN_CTRL); + if (retval < 0) { + dev_err(&client->dev, "%s(): Failed in reading 0x%02x", + __func__, smb347_PIN_CTRL); + goto error; + } + + setting = retval & ENABLE_PIN_CTRL_MASK; + if (temp > BAT_Hot_Limit) { + if (setting != 0x40) { + SMB_NOTICE("Charger disable\n"); + smb347_charger_enable(false); + } else + SMB_NOTICE("Bypass charger disable\n"); + } else { + if (setting != 0x60) { + SMB_NOTICE("Charger enable\n"); + smb347_charger_enable(true); + } else + SMB_NOTICE("Bypass charger enable\n"); + } + + ret = smb347_volatile_writes(client, smb347_DISABLE_WRITE); + if (ret < 0) { + dev_err(&client->dev, "%s() charger enable write error..\n", __func__); + goto error; + } +error: + return ret; +} +EXPORT_SYMBOL(smb347_config_thermal_charging); + static int __devinit smb347_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -1098,6 +1224,7 @@ static int __devinit smb347_probe(struct i2c_client *client, i2c_set_clientdata(client, charger); /* Restore default setting: APSD Enable & 5/1/HC mode Pin control */ + smb347_temp_limit_setting(); smb347_default_setback(); ret = sysfs_create_group(&client->dev.kobj, &smb347_group); @@ -1107,6 +1234,7 @@ static int __devinit smb347_probe(struct i2c_client *client, mutex_init(&charger->cable_lock); mutex_init(&charger->dockin_lock); + mutex_init(&charger->pinctrl_lock); wake_lock_init(&charger->wake_lock_dockin, WAKE_LOCK_SUSPEND, "wake_lock_dockin"); diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 1b7d64118e4..9cf23798216 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1785,6 +1785,8 @@ int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV) { struct regulator_dev *rdev = regulator->rdev; int ret = 0; + int old_min_uV; + int old_max_uV; mutex_lock(&rdev->mutex); @@ -1806,6 +1808,8 @@ int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV) ret = regulator_check_voltage(rdev, &min_uV, &max_uV); if (ret < 0) goto out; + old_min_uV = regulator->min_uV; + old_max_uV = regulator->max_uV; regulator->min_uV = min_uV; regulator->max_uV = max_uV; @@ -1814,6 +1818,10 @@ int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV) goto out; ret = _regulator_do_set_voltage(rdev, min_uV, max_uV); + if (ret < 0) { + regulator->min_uV = old_min_uV; + regulator->max_uV = old_max_uV; + } out: mutex_unlock(&rdev->mutex); diff --git a/drivers/spi/spi-tegra.c b/drivers/spi/spi-tegra.c index 3f913389dd7..cfc610102c1 100644 --- a/drivers/spi/spi-tegra.c +++ b/drivers/spi/spi-tegra.c @@ -1265,7 +1265,7 @@ static int __init spi_tegra_probe(struct platform_device *pdev) sprintf(tspi->port_name, "tegra_spi_%d", pdev->id); ret = request_threaded_irq(tspi->irq, spi_tegra_isr, - spi_tegra_isr_thread, IRQF_DISABLED, + spi_tegra_isr_thread, IRQF_ONESHOT, tspi->port_name, tspi); if (ret < 0) { dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n", diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index e13b4c48340..618ad8a443f 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "binder.h" @@ -1467,6 +1468,10 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_DEAD_REPLY; goto err_dead_binder; } + if (security_binder_transaction(proc->tsk, target_proc->tsk) < 0) { + return_error = BR_FAILED_REPLY; + goto err_invalid_target_handle; + } if (!(tr->flags & TF_ONE_WAY) && thread->transaction_stack) { struct binder_transaction *tmp; tmp = thread->transaction_stack; @@ -1612,6 +1617,10 @@ static void binder_transaction(struct binder_proc *proc, fp->cookie, node->cookie); goto err_binder_get_ref_for_node_failed; } + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { + return_error = BR_FAILED_REPLY; + goto err_binder_get_ref_for_node_failed; + } ref = binder_get_ref_for_node(target_proc, node); if (ref == NULL) { return_error = BR_FAILED_REPLY; @@ -1641,6 +1650,10 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_FAILED_REPLY; goto err_binder_get_ref_failed; } + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { + return_error = BR_FAILED_REPLY; + goto err_binder_get_ref_failed; + } if (ref->node->proc == target_proc) { if (fp->type == BINDER_TYPE_HANDLE) fp->type = BINDER_TYPE_BINDER; @@ -1694,6 +1707,11 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_FAILED_REPLY; goto err_fget_failed; } + if (security_binder_transfer_file(proc->tsk, target_proc->tsk, file) < 0) { + fput(file); + return_error = BR_FAILED_REPLY; + goto err_get_unused_fd_failed; + } target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC); if (target_fd < 0) { fput(file); @@ -2699,6 +2717,9 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ret = -EBUSY; goto err; } + ret = security_binder_set_context_mgr(proc->tsk); + if (ret < 0) + goto err; if (binder_context_mgr_uid != -1) { if (binder_context_mgr_uid != current->cred->euid) { printk(KERN_ERR "binder: BINDER_SET_" diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 86d51959b29..aa3f7b4b655 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -34,7 +34,9 @@ #include #include #include +#include #include +#include static uint32_t lowmem_debug_level = 2; static int lowmem_adj[6] = { @@ -44,7 +46,7 @@ static int lowmem_adj[6] = { 12, }; static int lowmem_adj_size = 4; -static size_t lowmem_minfree[6] = { +static int lowmem_minfree[6] = { 3 * 512, /* 6MB */ 2 * 1024, /* 8MB */ 4 * 1024, /* 16MB */ @@ -55,6 +57,8 @@ static int lowmem_minfree_size = 4; static struct task_struct *lowmem_deathpending; static unsigned long lowmem_deathpending_timeout; +extern int compact_nodes(bool sync); + #define lowmem_print(level, x...) \ do { \ if (lowmem_debug_level >= (level)) \ @@ -81,7 +85,7 @@ task_notify_func(struct notifier_block *self, unsigned long val, void *data) static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { - struct task_struct *p; + struct task_struct *tsk; struct task_struct *selected = NULL; int rem = 0; int tasksize; @@ -131,25 +135,24 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) } selected_oom_adj = min_adj; - read_lock(&tasklist_lock); - for_each_process(p) { - struct mm_struct *mm; - struct signal_struct *sig; + rcu_read_lock(); + for_each_process(tsk) { + struct task_struct *p; int oom_adj; - task_lock(p); - mm = p->mm; - sig = p->signal; - if (!mm || !sig) { - task_unlock(p); + if (tsk->flags & PF_KTHREAD) continue; - } - oom_adj = sig->oom_adj; + + p = find_lock_task_mm(tsk); + if (!p) + continue; + + oom_adj = p->signal->oom_adj; if (oom_adj < min_adj) { task_unlock(p); continue; } - tasksize = get_mm_rss(mm); + tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; @@ -172,12 +175,14 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) selected_oom_adj, selected_tasksize); lowmem_deathpending = selected; lowmem_deathpending_timeout = jiffies + HZ; - force_sig(SIGKILL, selected); + send_sig(SIGKILL, selected, 0); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); - read_unlock(&tasklist_lock); + rcu_read_unlock(); + if (selected) + compact_nodes(false); return rem; } diff --git a/drivers/staging/iio/imu/mpu/Kconfig b/drivers/staging/iio/imu/mpu/Kconfig index a19f9809754..21cc237cc2a 100644 --- a/drivers/staging/iio/imu/mpu/Kconfig +++ b/drivers/staging/iio/imu/mpu/Kconfig @@ -1,12 +1,24 @@ # -# inv-mpu drivers for Invensense MPU devices and combos +# inv-mpu-iio driver for Invensense MPU devices and combos # config INV_MPU_IIO - tristate "Invensense MPU devices" - depends on I2C && SYSFS && IIO && IIO_KFIFO_BUF && IIO_TRIGGER && !INV_MPU - default n - help - This driver supports the Invensense MPU devices. - This driver can be built as a module. The module will be called - inv-mpu-iio. + tristate "Invensense MPU devices" + depends on I2C && SYSFS && IIO && IIO_KFIFO_BUF && IIO_TRIGGER && !INV_MPU + default n + help + This driver supports the Invensense MPU devices. + This includes MPU6050/MPU3050/MPU9150/ITG3500/MPU6500/MPU9250. + This driver can be built as a module. The module will be called + inv-mpu-iio. + +config INV_IIO_MPU3050_ACCEL_SLAVE_BMA250 + bool "Invensense MPU3050 slave accelerometer device for bma250" + depends on INV_MPU_IIO + default n + help + This is slave device enable MPU3050 accelerometer slave device. + Right now, it is only bma250. For other acceleromter device, + it can be added to this menu if the proper interface is filled. + There are some interface function to be defined. + diff --git a/drivers/staging/iio/imu/mpu/Makefile b/drivers/staging/iio/imu/mpu/Makefile index 0efcc6d7065..3bbb26997af 100644 --- a/drivers/staging/iio/imu/mpu/Makefile +++ b/drivers/staging/iio/imu/mpu/Makefile @@ -11,7 +11,17 @@ inv-mpu-iio-objs += inv_mpu_misc.o inv-mpu-iio-objs += inv_mpu3050_iio.o inv-mpu-iio-objs += dmpDefaultMPU6050.o +CFLAGS_inv_mpu_core.o += -Idrivers/staging/iio +CFLAGS_inv_mpu_ring.o += -Idrivers/staging/iio +CFLAGS_inv_mpu_trigger.o += -Idrivers/staging/iio +CFLAGS_inv_mpu_misc.o += -Idrivers/staging/iio +CFLAGS_inv_mpu3050_iio.o += -Idrivers/staging/iio +CFLAGS_dmpDefaultMPU6050.o += -Idrivers/staging/iio + # the Bosch BMA250 driver is added to the inv-mpu device driver because it # must be connected to an MPU3050 device on the secondary slave bus. +ifeq ($(CONFIG_INV_IIO_MPU3050_ACCEL_SLAVE_BMA250), y) inv-mpu-iio-objs += inv_slave_bma250.o +CFLAGS_inv_slave_bma250.o += -Idrivers/staging/iio +endif diff --git a/drivers/staging/iio/imu/mpu/README b/drivers/staging/iio/imu/mpu/README index a0cfb3f2b7b..a0a954852f5 100644 --- a/drivers/staging/iio/imu/mpu/README +++ b/drivers/staging/iio/imu/mpu/README @@ -1,56 +1,133 @@ Kernel driver inv-mpu-iio Author: Invensense +Table of Contents: +================== +- Description +- Integrating the Driver in the Linux Kernel +- Board and Platform Data + > Interrupt Pin + > Platform Data +- Board File Modifications for Secondary I2C Configuration + > MPU-6050 + AKM8963 on the secondary I2C interface + > MPU-6500 + AKM8963 on the secondary I2C interface + > MPU-9150 + > MPU-9250 + > MPU-3050 + BMA250 on the secondary I2C interface +- Board File Modifications for Invensense Devices + > MPU-3050 + > ITG-3500 + > MPU-6050 + > MPU-6500 + > MPU-6XXX + > MPU-9150 + > MPU-9250 +- IIO Subsystem + > Communicating with the Driver in Userspace + > ITG-3500 + > MPU-6050 and MPU-6500 + > MPU-9150 + > MPU-9250 + > MPU-3050 + BMA250 on the secondary I2C interface +- Suspend and Resume +- DMP Event +- Motion Event +- Streaming Data to an Userspace Application +- Recommended Sysfs Entry Setup Sequence + > With DMP Firmware + > Without DMP Firmware +- Test Applications + > Running Test Applications with MPU-9150/MPU-6050/MPU-6500/MPU-9250 + > Running Test Applications with MPU-3050/ITG-3500 + + Description ------------ +=========== This document describes how to install the Invensense device driver into a -Linux kernel. At the moment, this driver supports the ITG3500/MPU6050/MPU9150/MPU3050. The slave -address of these four chips are 0x68. However, the actual slave address depends on the board -configuration. The driver does not assume anything about it. - -Files included in this package: -Kconfig -Makefile -inv_mpu_core.c -inv_mpu_misc.c -inv_mpu_trigger.c -inv_mpu3050_iio.c -inv_mpu_iio.h -inv_mpu_ring.c -inv_slave_bma250.c -dmpDefaultMPU6050.c -dmpkey.h -dmpmap.h -mpu.h -Including the driver in the Linux kernel ----------------------------------------- -mpu.h should be added to "kernel/include/linux". -Other files listed should be added to the drivers/staging/iio/imu/mpu directory (or another -directory of your choosing). When building the kernel, the driver will not -appear in menuconfig without modifications similar to those below: - -modify "drivers/staging/iio/imu/Kconfig" like -source "drivers/staging/iio/imu/mpu/Kconfig" - -modify "drivers/staging/iio/imu/Makefile" -obj-y += mpu/ +Linux kernel. The Invensense driver currently supports the following sensors: +- ITG-3500 +- MPU-6050 +- MPU-9150 +- MPU-6500 +- MPU-9250 +- MPU-3050 +- MPU-6XXX(either MPU6050 or MPU6500, driver to do auto detection) + +The slave address of each device is either 0x68 or 0x69, depending on the AD0 +pin value of the device. Please refer to the appropriate product specification +document for further information regarding the AD0 pin. The driver supports both +addresses. + +The following files are included in this package: +- Kconfig +- Makefile +- inv_mpu_core.c +- inv_mpu_misc.c +- inv_mpu_trigger.c +- inv_mpu3050_iio.c +- inv_mpu_iio.h +- inv_mpu_ring.c +- inv_slave_bma250.c +- dmpDefaultMPU6050.c +- dmpkey.h +- dmpmap.h +- mpu.h + + +Integrating the Driver in the Linux Kernel +========================================== +Please add the files as follows: +- Add mpu.h to "kernel/include/linux". +- Add all other files to drivers/staging/iio/imu/inv_mpu +(another directory is acceptable, but this is the recommended destination) + +In order to see the driver in menuconfig when building the kernel, please +make modifications as shown below: + + modify "drivers/staging/iio/imu/Kconfig" with: + >> source "drivers/staging/iio/imu/inv_mpu/Kconfig" + + modify "drivers/staging/iio/imu/Makefile" with: + >> obj-y += inv_mpu/ + Board and Platform Data ------------------------ -The board file needs to be modified to register the device on an I2C bus. An -i2c_board_info instance must be defined as seen below. The hardcoded value of -140 corresponds to the GPIO input pin wired to the device's interrupt pin. -This pin will most likely be different for your platform. -platform data is for orientation matrix, and secondary bus situations. -For MPU9150, it regarded as a MPU9150 and AKM8975 in the secondary. -So the secondary i2c address must be filled. ------------------------------------------------------------------ -The board file is arch/arm/mach-omap2/board-omap4panda.c or -modify the board file in your system as below: --------------------------------------------------------- -For AKM8963 in the secondary i2c bus of MPU6050, +======================= +In order to recognize the Invensense device on the I2C bus, the board file must +be modified. +The i2c_board_info instance must be defined as shown below. + +Interrupt Pin +------------- +The hardcoded value of 140 corresponds to the GPIO input pin connected to the +Invensense device's interrupt pin. +This pin will most likely be different for your platform, and the value should +be changed accordingly. + +Platform Data +------------- +The platform data (orientation matrix and secondary bus configurations) must be +modified as show below, according to your particular platform configuration. + +Please note that the MPU-9150 it is treated as a MPU-6050 with AKM8975 on the +device's secondary I2C interface. Thus the secondary I2C address must be +provided. + +Please note that the MPU-9250 it is treated as a MPU-6500 with AKM8963 on the +device's secondary I2C interface. Thus the secondary I2C address must be +provided. + +Board File Modifications for Secondary I2C Configuration +======================================================== +For the Panda Board, the board file can be found at +arch/arm/mach-omap2/board-omap4panda.c. +Please modify the pertinent baord file in your system according to the examples +shown below: + +MPU-6050 + AKM8963 on the secondary I2C interface +------------------------------------------------- static struct mpu_platform_data gyro_platform_data = { - .int_config = 0x00, + .int_config = 0x10, .level_shifter = 0, .orientation = { -1, 0, 0, 0, 1, 0, @@ -59,10 +136,26 @@ static struct mpu_platform_data gyro_platform_data = { .sec_slave_id = COMPASS_ID_AK8963, .secondary_i2c_addr = 0x0E }; ------------------------------------------------------------ -For MPU9150, the secondary i2c bus address must be filled as below. + +MPU-6500 + AKM8963 on the secondary I2C interface +------------------------------------------------- static struct mpu_platform_data gyro_platform_data = { - .int_config = 0x00, + .int_config = 0x10, + .level_shifter = 0, + .orientation = { -1, 0, 0, + 0, 1, 0, + 0, 0, -1 }, + .sec_slave_type = SECONDARY_SLAVE_TYPE_COMPASS, + .sec_slave_id = COMPASS_ID_AK8963, + .secondary_i2c_addr = 0x0E +}; + +MPU-9150 +-------- +For MPU-9150, please provide the following secondary I2C bus information. + +static struct mpu_platform_data gyro_platform_data = { + .int_config = 0x10, .level_shifter = 0, .orientation = { -1, 0, 0, 0, 1, 0, @@ -71,10 +164,28 @@ static struct mpu_platform_data gyro_platform_data = { .sec_slave_id = COMPASS_ID_AK8975, .secondary_i2c_addr = 0x0E }; ------------------------------------------------------------ -for BMA250 in the secondary, please use the platform data as: + +MPU-9250 +-------- +For MPU-9250, please provide the following secondary I2C bus information. + +static struct mpu_platform_data gyro_platform_data = { + .int_config = 0x10, + .level_shifter = 0, + .orientation = { -1, 0, 0, + 0, 1, 0, + 0, 0, -1 }, + .sec_slave_type = SECONDARY_SLAVE_TYPE_COMPASS, + .sec_slave_id = COMPASS_ID_AK8963, + .secondary_i2c_addr = 0x0C +}; + +MPU-3050 + BMA250 on the secondary I2C interface +------------------------------------------------ +For BMA250 on the secondary I2C bus, please provide the following information. + static struct mpu_platform_data gyro_platform_data = { - .int_config = 0x00, + .int_config = 0x10, .level_shifter = 0, .orientation = { -1, 0, 0, 0, 1, 0, @@ -83,10 +194,23 @@ static struct mpu_platform_data gyro_platform_data = { .sec_slave_id = ACCEL_ID_BMA250, .secondary_i2c_addr = 0x18, }; ---------------------------------------------------------------- -the i2c init data is: ----------------------------------------------------------------- -For MPU3050, + + +Board File Modifications for Invensense Devices +=============================================== +For Invensense devices, please provide the i2c init data as shown in the +examples below. + +In the _i2c_init function, the device is registered in the following manner: + + // arch/arm/mach-omap2/board-omap4panda.c + // in static int __init omap4_panda_i2c_init(void) + omap_register_i2c_bus(4, 400, + single_chip_board_info, + ARRAY_SIZE(single_chip_board_info)); + +MPU-3050 +-------- static struct i2c_board_info __initdata single_chip_board_info[] = { { I2C_BOARD_INFO("mpu3050", 0x68), @@ -94,8 +218,9 @@ static struct i2c_board_info __initdata single_chip_board_info[] = { .platform_data = &gyro_platform_data, }, }; ----------------------------------------------------------------- -for ITG3500: + +ITG-3050 +-------- static struct i2c_board_info __initdata single_chip_board_info[] = { { I2C_BOARD_INFO("itg3500", 0x68), @@ -103,7 +228,9 @@ static struct i2c_board_info __initdata single_chip_board_info[] = { .platform_data = &gyro_platform_data, }, }; -for MPU6050 + +MPU6050 +------- static struct i2c_board_info __initdata single_chip_board_info[] = { { I2C_BOARD_INFO("mpu6050", 0x68), @@ -111,7 +238,29 @@ static struct i2c_board_info __initdata single_chip_board_info[] = { .platform_data = &gyro_platform_data, }, }; -for MPU9150 + +MPU6500 +------- +static struct i2c_board_info __initdata single_chip_board_info[] = { + { + I2C_BOARD_INFO("mpu6500", 0x68), + .irq = (IH_GPIO_BASE + MPUIRQ_GPIO), + .platform_data = &gyro_platform_data, + }, +}; + +MPU6XXX +------- +static struct i2c_board_info __initdata single_chip_board_info[] = { + { + I2C_BOARD_INFO("mpu6xxx", 0x68), + .irq = (IH_GPIO_BASE + MPUIRQ_GPIO), + .platform_data = &gyro_platform_data, + }, +}; + +MPU9150 +------- arch/arm/mach-omap2/board-omap4panda.c static struct i2c_board_info __initdata single_chip_board_info[] = { { @@ -121,204 +270,319 @@ static struct i2c_board_info __initdata single_chip_board_info[] = { }, }; -In the _i2c_init function, the device is registered in the following manner: - +MPU9250 +------- arch/arm/mach-omap2/board-omap4panda.c - in static int __init omap4_panda_i2c_init(void) -omap_register_i2c_bus(4, 400, single_chip_board_info, ARRAY_SIZE(single_chip_board_info)); +static struct i2c_board_info __initdata single_chip_board_info[] = { + { + I2C_BOARD_INFO("mpu9250", 0x68), + .irq = (IH_GPIO_BASE + MPUIRQ_GPIO), + .platform_data = &gyro_platform_data, + }, +}; IIO subsystem ----------------------------------------------- -successful installation will create two directories under /sys/bus/iio/devices -iio:device0 -trigger0 -Under /dev/ diretory, a file "iio:device0" will also be created(or iio:deviceX, if -you have more than one iio devices). -Communicating with the driver in userspace +============= +A successful installation will create the following two new directories under +/sys/bus/iio/devices: + - iio:device0 + - trigger0 + +Also, a new file, "iio:device0", will be created in the /dev/ diretory. +(if you have more than one IIO device, the file will be named "iio:deviceX", +where X is a number) + + +Communicating with the Driver in Userspace ------------------------------------------ -Upon installation, the driver generates several files in sysfs. If your -platform is configured as detailed above, navigate to the following path to -find these files: -/sys/bus/iio/devices/iio:device0 - -The list below provides a brief description for each file. --------------------------------------- -For ITG3500: +The driver generates several files in sysfs upon installation. +These files are used to communicate with the driver. The files can be found +at /sys/bus/iio/devices/iio:device0 (or ../iio:deviceX as shown above). + +A brief description of the pertinent files for each Invensense device is shown +below: + +ITG-3500 +-------- temperature (Read-only) -Read temperature data directly from the temperature register. +--Read temperature data directly from the temperature register. sampling_frequency (Read/write) -Configure the ADC sampling rate and FIFO output rate. +--Configure the ADC sampling rate and FIFO output rate. sampling_frequency_available(read-only) -show commonly used frequency +--show commonly used frequency clock_source (Read-only) -Check which clock-source is used by the chip. +--Check which clock-source is used by the chip. power_state (Read/write) -turn on/off the power supply +--turn on/off the power supply self_test (read-only) -read this entry trigger self test. The return value is D. +--read this entry trigger self test. The return value is D. D is the success/fail. For different chip, the result is different for success/fail. 1 means success 0 means fail. The LSB of D is for gyro; the bit next to LSB of D is for accel. The bit 2 of D is for compass result. key (read-only) -show the key value of this driver. Used by MPL. +--show the key value of this driver. Used by MPL. gyro_matrix (read-only) -show the orient matrix obtained from board file. +--show the orientation matrix obtained from the board file. -------------------------------------------------------------- -For MPU6050: -MPU6050 has all the sysfs files that ITG3500 has. It has additional files list below: +MPU-6050 and MPU-6500 +--------------------- +MPU-6050 and MPU-6500 have all sysfs files belonging to ITG-3500 (shown above). +In addition, it has the files below: gyro_enable (read/write) -enable/disable gyro functionality. affect raw_gyro. turn off this will shut down gyro and save power. +--enable/disable gyro functionality. Affects raw_gyro. Turning this off this + will shut down gyro and save power. accl_enable (read/write) -enable/disable accelerometer functionality. affect raw_accl. turn off this will shut down accel and save power. +--enable/disable accelerometer functionality. Affects raw_accl. +Turning this off this will shut down accel and save power. firmware_loaded (read/write) -Flag indicate the whether firmware is loaded or not in the DMP engine. 0 means no firmware loaded. -1 means firmware is already loaded . This flag can only be written as 0. 1 is updated -internally. +--Flag indicating the whether firmware is loaded or not in the DMP engine. +0 means no firmware loaded. 1 means firmware is already loaded . This +flag can only be written as 0. It internally updates to 1. dmp_on(read/write) -This entry controls whether to run DMP or not. To enable DMP , firmware_loaded must be 1. write 1 to enable -DMP and write 0 to disable dmp. - -dmp_in_on(read/write) -This entry controls whether dmp interrupt is on/off. firmware_loaded must be 1. sometimes, it is desirable -that interrupt is off while DMP is running. +--This entry controls whether to run DMP or not. +Write 1 to enable DMP and write 0 to disable dmp. +Please note that firmware_loaded must be 1 in order to enable DMP. + +dmp_int_on(read/write) +--This entry controls whether dmp interrupt is on/off. +Please note that firmware_loaded must be 1. +Also, we'd like to remind you that it is sometimes advantageous to +turn interrupts off while the DMP is running. + +dmp_output_rate +--control dmp output rate when dmp is on. + +dmp_event_int_on(read/write) +--This entry controls whether dmp event interrupt is on/off. +Please note that turning this on will turn off the data interrupt. +Interrupts will be generated only when events occur. +This is useful for saving power when the system is waiting for a special event +to wake up. dmp_firmware (write only binary file) -This is the entry that firmware code is loaded into. If the action is succeful, firmware_loaded will -be updated as 1. In order to load new firmware, firmware_loaded flag should be set 0. - -lpa_mode(read-write) -Low power accelerometer mode -lpa_freq(read-write) -low power acceleromter frequency. +--DMP firmware code is loaded into this entry. +If loading is successful, the firmware_loaded flag will be updated to 1. +In order to load new firmware, the firmware_loaded flag must be first set to 0. accel_matrix -orient matrix for accel +--orientation matrix for accelerometer. -flick_lower, -flick_upper, -flick_counter, -flick_message_on, -flick_int_on, -flick_axis, -Flick related entry +quaternion_on +--Turn on/off quaterniion data output. DMP is required for this feature. pedometer_time pedometer_steps, -Pedometer related entry +--Pedometer related entries -event_flick event_tap -event_orientation event_display_orientation -event related entry +event_accel_motion +event_smd +--Event related entries. +Please poll these entries to read their values. Direct reads will yield +meaningless results. +Further details are provided in the DMP Events section of this README. tap_on -control tap function of DMP +--Controls tap function of DMP tap_time tap_min_count tap_threshold -tap related entries. control various parameters of tap function. - -orientation_on -turn on/off orientation function of DMP. +--Tap related entries. Controls various parameters of tap function. display_orientation_on -turn on/off display orientation function of DMP. +--Turn on/off display orientation function of DMP. + +smd_enable +enable SMD(Significant Motion Detection) detection. + +smd_threshold +This set the threshold of the motion when SMD start to be triggered. The +value is in acclerometer counts. + +smd_delay_threshold +This sets the threshold of time after which SMD can be triggered. +The value is in seconds. + +smd_delay_threshold2 +This sets the threshold of time during which SMD can be triggered (after the +smd_delay_threshold timer has expired). +The value is in seconds. quaternion_on -turn on/off quaterniion data output. must use DMP. -------------------------------------------------------------------- -for MPU9150 and secondary compass -MPU9150 has every entry MPU6050 has. It has additional entries: +--Turn on/off quaterniion data output. DMP is required for this feature. + +Low power accel motion interrupt related settings. +if motion_lpa_on is set, this will disable all engines except accel. Accel will +enter low power mode and the whole chip will be turned on/off at specific frequency. +----------------------------------------------------------------------------- +motion_lpa_duration +--set motion duration. in ms. This means filtered out all the motino interrupts + during this period. + +motion_lpa_threshold +--set motion threshold. in mg. The maximum is 1020mg and resolution is 32mg. + +motion_lpa_on +--turn on/off motion function. + +motion_lpa_freq +--motion lpa frequency. which determines power on/off frequency. +------------------------------------------------------------------------------ +MPU-9150 +-------- +MPU-9150 has all of MPU-6050's entries. It also has two additional entries, +described below. compass_enable (read/write) -enable this will enable compass function. +--Enables compass function. compass_matrix (read-only) -compass orient matrix ---------------------- -for MPU3050 and secondary accelerometer(only BMA250 is supported right now) -It has every entry ITG3500 has and has two addiontal entries. +--Compass orientation matrix + +MPU-3050 with BMA250 on secondary I2C interface +----------------------------------------------- +MPU-3050 with BMA250 on the secondary I2C interface has ever ITG-3500 entry. +It also has two additional entries, shown below: + accl_matrix + accl_enable ----------------------------------------------------------------------------------- -low power accelerometer mode -Lower power accelerometer mode is a special mode. It works only for accelerometer. -It has two entries, lpa_mode and lpa_freq. Only MPU6050 and MPU9150 has this mode. -To run low power accel mode, set lpa_mode to 1, set lpa_freq to 0~3, which corresponds -to 1.25Hz, 5Hz, 20Hz, 40Hz. "gyro_enable" and "compass_enable" must be zero. "dmp_on" -must be zero. ------------------------------------------------------------------------------------ -dmp event. -dmp event is event out by the DMP unit inside MPU. Only MPU6050 and MPU9150 supports this. -There are four sysfs entreis, event_flick, event_tap and event_orientation and -event_display_orientation. These four events must -be polled before read. The proper method to poll sysfs is: + +Suspend and Resume +=================================================== +The suspend and resume functions are call backs registered to the system +and executed when the system goes in suspend and resumes. +It is enabled when CONFIG_PM is defined. +The current behavior is simple: +- suspend will turn off the chip +- resume will turn on the chip + +However, it is possible for the driver to do more complex things; +for example, leaving pedometers running when system is off. This can save whole +system power while letting pedometer working. Other behaviors are possible +too. + +DMP Event +========= +A DMP Event is an event that is output by the DMP unit within the Invensense +device (MPU). +Only the MPU-6050, MPU-6500, MPU-9250, MPU-9150, MPU-9250 feature the DMP. + +There are four sysfs entries for DMP events: +- event_tap +- event_display_orientation +- event_accel_motion +- event_smd + +These events must be polled before reading. + +The proper method to poll sysfs is as follows: 1. open file. 2. dummy read. 3. poll. 4. once the poll passed, use fopen and fread to read the sysfs entry. 5. interpret the data. ------------------------------------------------------------------------------- -If streaming to a userspace application, the recommended way to access gyro/accel/compass -data is via /dev/iio:device0. Follow these steps to get constant readings from -the driver: + +Streaming Data to an Userspace Application +========================================== +When streaming data to an userspace application, we recommend that you access +gyro/accel/compass data via /dev/iio:device0. + +Please follow the steps below to read data at a constant rate from the driver: 1. Write a 1 to power_state to turn on the chip. This is the default setting after installing the driver. 2. Write the desired output rate to fifo_rate. -3. write 1 to enable to turn on the event. +3. Write 1 to enable to turn on the event. 4. Read /dev/iio:device0 to get a string of gyro/accel/compass data. 5. Parse this string to obtain each gyro/accel/compass element. -6. If dmp firmware code is loaded, using "dmp_on" to enable/disable dmp . -7. If compass is enabled, output will have compass data. -=========================================================================== - Recommended sysfs entry setup senquence -1. without DMP firmware -1.1 set "power_state" to 1, -1.2 change scale and fifo rate value to your need. -1.3 change gyro_enable and accle_enable and compass_enable to your needs. For example, -if you want gyro only, set accl_enable to 0 or set accl_enable to zero and compass_enable to zero. -If you want accel only, set gyro_enable to 0 or set gyro_enable to zero and compass_enable to zero. -If you want compass only, disable gyro and accel. -1.4 set "enable" to 1. you will get output you want. - -2. With DMP firmware -2.1 set "power_state" to 1, -2.2 write "0" to firmware_loaded if it is not zero already. -2.3 load firmware into "dmp_firmware" as a whole. Don't split the DMP firmware image. -2.4 make sure firmware_loaded is 1 after loading. -2.5 make other configurations similar to the situation as without DMP firmware. -2.6 set dmp_on to 1. -2.7 set "enable" to 1. -======================================================= -The enable function is using enable entry under "/sys/bus/iio/devices/iio:device0/buffer" -========================================================== -test applications: -Test application is under ARTHROPOD/trunk/software/simple_apps/mpu_iio ------------------------------------------- -To run with MPU9150/MPU6050: -using the following command: -for orientation/tap/flick/display orientation event: -mpu_iio -c 10 -l 3 -p -for normal data print -mpu_iio -c 10 -l 3 ----------------------------------------- -To run with MPU3050/ITG3500: -mpu_iio -c 10 -l 3 -r ------------------------------------------ -Please use mpu_iio.c and iio_utils.h as the sample code for your development. +6. If dmp firmware code is loaded, use "dmp_on" to enable/disable dmp. +7. If compass is enabled, the output will contain compass data. + + +Recommended Sysfs Entry Setup Senquence +======================================= + +Without DMP Firmware +-------------------- +1. Set "power_state" to 1, +2. Set the scale and fifo rate values according to your needs. +3. Set gyro_enable, accel_enable, and compass_enable according to your needs. + For example: + - If you only want gyro data, set accel_enable to 0 (and compass_enable to + 0, if applicable). + - If you only want accel data, set gyro_enable to 0 (and compass_enable to + 0, if applicable). + - If you only want compass data, set gyro_enable to 0 and accel_enable to 0. +4. Set "enable" to 1. +5. You will now get the output that you want. + +With DMP Firmware +----------------- +1. Set "power_state" to 1. +2. Write "0" to firmware_loaded if it is not zero already. +3. Load firmware into "dmp_firmware" as a whole. Don't split the DMP firmware + image. +4. Make sure firmware_loaded is 1 after loading the DMP image. +5. Make appropriate configurations as shown above in the "without DMP firmware" + case. +6. Set dmp_on to 1. +7. Set "enable" to 1. + +Please note that the enable function uses the enable entry under +"/sys/bus/iio/devices/iio:device0/buffer" + +Test Applications +================= +A test application is located under software/simple_apps/mpu_iio. +This application is stand-alone in that it cannot be run concurrently with other +entities trying to access the device node(s) or sysfs entries; in particular, +the + +Running Test Applications with MPU-9150/MPU-6050/MPU-6500/MPU-9250 +--------------------------------------------------------- +To run test applications with MPU-9150, MPU-9250, MPU-6050, or MPU-6500 devices, +please use the following commands: + +1. For tap/display orientation events: + mpu_iio -c 10 -l 3 -p + +2. In addition, to test the motion interrupt (and no_motion on MPU6050) use: + mpu_iio -c 10 -l 3 -p -m + +3. For printing data normally: + mpu_iio -c 10 -l 3 -r + +Running Test Applications with MPU-3050/ITG-3500 +------------------------------------------------ +To run test applications with MPU-3050 or ITG-3500 devices, +please use the following command: + +1. For printing data normally: + mpu_iio -c 10 -l 3 -r + +Please use mpu_iio.c and iio_utils.h as example code for your development +purposes. + +Stress test application +================================= +A stress test application is located under software/simple_apps/stress_iio. +This application simulates HAL's usage calls to the driver. It creates three +threads. One for data read; one for event read; one for sysfs control. +It can run without any parameters or run with some control parameters. Please +see README in the same directories for details. + diff --git a/drivers/staging/iio/imu/mpu/dmpDefaultMPU6050.c b/drivers/staging/iio/imu/mpu/dmpDefaultMPU6050.c index ff327c79c52..0242e10d201 100644 --- a/drivers/staging/iio/imu/mpu/dmpDefaultMPU6050.c +++ b/drivers/staging/iio/imu/mpu/dmpDefaultMPU6050.c @@ -9,48 +9,65 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include "inv_mpu_iio.h" #include "dmpKey.h" #include "dmpmap.h" -#define CFG_27 (2740) -#define CFG_20 (2078) -#define CFG_23 (2743) -#define CFG_FIFO_ON_EVENT (2689) -#define CFG_ORIENT_IRQ_1 (2533) -#define CGNOTICE_INTR (2636) -#define X_GRT_Y_TMP (1318) +#define CFG_LP_QUAT (2500) +#define END_ORIENT_TEMP (2063) +#define CFG_27 (2530) +#define CFG_23 (2533) +#define CFG_PED_ENABLE (2620) +#define CFG_FIFO_ON_EVENT (2475) +#define CFG_PED_INT (2873) +#define END_PREDICTION_UPDATE (1958) +#define X_GRT_Y_TMP (1555) #define CFG_DR_INT (1029) #define CFG_AUTH (1035) -#define FCFG_1 (1062) -#define SKIP_X_GRT_Y_TMP (1319) -#define SKIP_END_COMPARE (1395) -#define FCFG_3 (1110) +#define UPDATE_PROP_ROT (2032) +#define END_COMPARE_Y_X_TMP2 (1652) +#define SKIP_X_GRT_Y_TMP (1556) +#define SKIP_END_COMPARE (1632) +#define FCFG_3 (1087) #define FCFG_2 (1066) -#define END_COMPARE_Y_X_TMP2 (1415) -#define CFG_DISPLAY_ORIENT_INT (1706) -#define FCFG_7 (1076) -#define FCFG_6 (1128) -#define NO_ORIENT_INTERRUPT (1725) -#define CFG_8 (2718) -#define CFG_15 (2726) -#define CFG_16 (2744) -#define END_COMPARE_Y_X_TMP (1367) -#define CFG_6 (2751) -#define END_ORIENT_1 (1709) -#define END_COMPARE_Y_X (1444) -#define CFG_LP_QUAT (2712) -#define END_ORIENT (1738) -#define CFG_FLICK_IN (2589) -#define CFG_7 (1221) -#define CFG_MOTION_BIAS (1224) -#define X_GRT_Y (1368) -#define TEMPLABEL (2178) -#define NOT_TIME_MINUS_1 (1528) -#define END_COMPARE_Y_X_TMP3 (1394) -#define X_GRT_Y_TMP2 (1339) +#define FCFG_1 (1062) +#define END_COMPARE_Y_X_TMP3 (1631) +#define FCFG_7 (1073) +#define FCFG_6 (1105) +#define FLAT_STATE_END (1910) +#define SWING_END_4 (1813) +#define SWING_END_2 (1762) +#define SWING_END_3 (1784) +#define SWING_END_1 (1747) +#define CFG_8 (2506) +#define CFG_15 (2515) +#define CFG_16 (2534) +#define CFG_EXT_GYRO_BIAS (1184) +#define END_COMPARE_Y_X_TMP (1604) +#define DO_NOT_UPDATE_PROP_ROT (2036) +#define CFG_7 (1403) +#define FLAT_STATE_END_TEMP (1880) +#define END_COMPARE_Y_X (1681) +#define SMD_TP2 (1366) +#define SKIP_SWING_END_1 (1748) +#define SKIP_SWING_END_3 (1785) +#define SKIP_SWING_END_2 (1763) +#define SMD_TP1 (1343) +#define TILTG75_START (1869) +#define CFG_6 (2541) +#define TILTL75_END (1866) +#define END_ORIENT (2081) +#define TILTL75_START (1840) +#define CFG_MOTION_BIAS (1405) +#define X_GRT_Y (1605) +#define TEMPLABEL (2105) +#define CFG_DISPLAY_ORIENT_INT (2050) + +#define CFG_GYRO_RAW_DATA (2510) +#define X_GRT_Y_TMP2 (1576) #define D_0_22 (22+512) #define D_0_24 (24+512) @@ -122,6 +139,9 @@ #define CPASS_MTX_20 (37 * 16 + 8) #define CPASS_MTX_21 (37 * 16 + 12) #define CPASS_MTX_22 (43 * 16 + 12) +#define D_EXT_GYRO_BIAS_X (61 * 16) +#define D_EXT_GYRO_BIAS_Y (61 * 16 + 4) +#define D_EXT_GYRO_BIAS_Z (61 * 16 + 8) #define D_ACT0 (40 * 16) #define D_ACSX (40 * 16 + 4) #define D_ACSY (40 * 16 + 8) @@ -132,69 +152,80 @@ #define FLICK_LOWER (45 * 16 + 12) #define FLICK_UPPER (46 * 16 + 12) -#define D_AUTH_OUT (992) -#define D_AUTH_IN (996) -#define D_AUTH_A (1000) -#define D_AUTH_B (1004) +#define D_SMD_ENABLE (18 * 16) +#define D_SMD_MOT_THLD (20 * 16) +#define D_SMD_DELAY_THLD (21 * 16 + 4) +#define D_SMD_DELAY2_THLD (21 * 16 + 12) +#define D_SMD_EXE_STATE (22 * 16) +#define D_SMD_DELAY_CNTR (21 * 16) + +#define D_AUTH_OUT (992) +#define D_AUTH_IN (996) +#define D_AUTH_A (1000) +#define D_AUTH_B (1004) + +#define D_PEDSTD_BP_B (768 + 0x1C) +#define D_PEDSTD_HP_A (768 + 0x78) +#define D_PEDSTD_HP_B (768 + 0x7C) +#define D_PEDSTD_BP_A4 (768 + 0x40) +#define D_PEDSTD_BP_A3 (768 + 0x44) +#define D_PEDSTD_BP_A2 (768 + 0x48) +#define D_PEDSTD_BP_A1 (768 + 0x4C) +#define D_PEDSTD_INT_THRSH (768 + 0x68) +#define D_PEDSTD_CLIP (768 + 0x6C) +#define D_PEDSTD_SB (768 + 0x28) +#define D_PEDSTD_SB_TIME (768 + 0x2C) +#define D_PEDSTD_PEAKTHRSH (768 + 0x98) +#define D_PEDSTD_TIML (768 + 0x2A) +#define D_PEDSTD_TIMH (768 + 0x2E) +#define D_PEDSTD_PEAK (768 + 0X94) +#define D_PEDSTD_STEPCTR (768 + 0x60) +#define D_PEDSTD_TIMECTR (964) +#define D_PEDSTD_DECI (768 + 0xA0) -#define D_PEDSTD_BP_B (768 + 0x1C) -#define D_PEDSTD_HP_A (768 + 0x78) -#define D_PEDSTD_HP_B (768 + 0x7C) -#define D_PEDSTD_BP_A4 (768 + 0x40) -#define D_PEDSTD_BP_A3 (768 + 0x44) -#define D_PEDSTD_BP_A2 (768 + 0x48) -#define D_PEDSTD_BP_A1 (768 + 0x4C) -#define D_PEDSTD_INT_THRSH (768 + 0x68) -#define D_PEDSTD_CLIP (768 + 0x6C) -#define D_PEDSTD_SB (768 + 0x28) -#define D_PEDSTD_SB_TIME (768 + 0x2C) -#define D_PEDSTD_PEAKTHRSH (768 + 0x98) -#define D_PEDSTD_TIML (768 + 0x2A) -#define D_PEDSTD_TIMH (768 + 0x2E) -#define D_PEDSTD_PEAK (768 + 0X94) -#define D_PEDSTD_STEPCTR (768 + 0x60) -#define D_PEDSTD_TIMECTR (964) -#define D_PEDSTD_DECI (768 + 0xA0) +#define D_HOST_NO_MOT (976) +#define D_ACCEL_BIAS (660) -#define D_HOST_NO_MOT (976) +#define D_ORIENT_GAP (76) + +#define D_TILT0_H (48) +#define D_TILT0_L (50) +#define D_TILT1_H (52) +#define D_TILT1_L (54) +#define D_TILT2_H (56) +#define D_TILT2_L (58) +#define D_TILT3_H (60) +#define D_TILT3_L (62) + +/* Batch mode */ +#define D_BM_BATCH_CNTR (27*16+4) +#define D_BM_BATCH_THLD (27*16+8) +#define D_BM_ENABLE (28*16+6) +#define D_BM_NUMWORD_TOFILL (28*16+4) static const struct tKeyLabel dmpTConfig[] = { {KEY_CFG_27, CFG_27}, - {KEY_CFG_20, CFG_20}, {KEY_CFG_23, CFG_23}, + {KEY_CFG_PED_ENABLE, CFG_PED_ENABLE}, {KEY_CFG_FIFO_ON_EVENT, CFG_FIFO_ON_EVENT}, - {KEY_CFG_ORIENT_IRQ_1, CFG_ORIENT_IRQ_1}, - {KEY_CGNOTICE_INTR, CGNOTICE_INTR}, - {KEY_X_GRT_Y_TMP, X_GRT_Y_TMP}, {KEY_CFG_DR_INT, CFG_DR_INT}, {KEY_CFG_AUTH, CFG_AUTH}, {KEY_FCFG_1, FCFG_1}, - {KEY_SKIP_X_GRT_Y_TMP, SKIP_X_GRT_Y_TMP}, - {KEY_SKIP_END_COMPARE, SKIP_END_COMPARE}, {KEY_FCFG_3, FCFG_3}, {KEY_FCFG_2, FCFG_2}, - {KEY_END_COMPARE_Y_X_TMP2, END_COMPARE_Y_X_TMP2}, {KEY_CFG_DISPLAY_ORIENT_INT, CFG_DISPLAY_ORIENT_INT}, {KEY_FCFG_7, FCFG_7}, {KEY_FCFG_6, FCFG_6}, - {KEY_NO_ORIENT_INTERRUPT, NO_ORIENT_INTERRUPT}, {KEY_CFG_8, CFG_8}, {KEY_CFG_15, CFG_15}, {KEY_CFG_16, CFG_16}, - {KEY_END_COMPARE_Y_X_TMP, END_COMPARE_Y_X_TMP}, + {KEY_CFG_EXT_GYRO_BIAS, CFG_EXT_GYRO_BIAS}, {KEY_CFG_6, CFG_6}, - {KEY_END_ORIENT_1, END_ORIENT_1}, - {KEY_END_COMPARE_Y_X, END_COMPARE_Y_X}, {KEY_CFG_LP_QUAT, CFG_LP_QUAT}, - {KEY_END_ORIENT, END_ORIENT}, - {KEY_CFG_FLICK_IN, CFG_FLICK_IN}, {KEY_CFG_7, CFG_7}, {KEY_CFG_MOTION_BIAS, CFG_MOTION_BIAS}, - {KEY_X_GRT_Y, X_GRT_Y}, - {KEY_TEMPLABEL, TEMPLABEL}, - {KEY_NOT_TIME_MINUS_1, NOT_TIME_MINUS_1}, - {KEY_END_COMPARE_Y_X_TMP3, END_COMPARE_Y_X_TMP3}, - {KEY_X_GRT_Y_TMP2, X_GRT_Y_TMP2}, + {KEY_CFG_DISPLAY_ORIENT_INT, CFG_DISPLAY_ORIENT_INT}, + {KEY_CFG_GYRO_RAW_DATA, CFG_GYRO_RAW_DATA}, {KEY_D_0_22, D_0_22}, {KEY_D_0_96, D_0_96}, {KEY_D_0_104, D_0_104}, @@ -210,13 +241,10 @@ static const struct tKeyLabel dmpTConfig[] = { {KEY_D_1_92, D_1_92}, {KEY_D_1_160, D_1_160}, {KEY_D_1_176, D_1_176}, + {KEY_D_1_178, D_1_178}, {KEY_D_1_218, D_1_218}, {KEY_D_1_232, D_1_232}, {KEY_D_1_250, D_1_250}, - {KEY_DMP_TAPW_MIN, DMP_TAPW_MIN}, - {KEY_DMP_TAP_THR_X, DMP_TAP_THX}, - {KEY_DMP_TAP_THR_Y, DMP_TAP_THY}, - {KEY_DMP_TAP_THR_Z, DMP_TAP_THZ}, {KEY_DMP_SH_TH_Y, DMP_SH_TH_Y}, {KEY_DMP_SH_TH_X, DMP_SH_TH_X}, {KEY_DMP_SH_TH_Z, DMP_SH_TH_Z}, @@ -263,7 +291,33 @@ static const struct tKeyLabel dmpTConfig[] = { {KEY_D_PEDSTD_STEPCTR, D_PEDSTD_STEPCTR}, {KEY_D_PEDSTD_TIMECTR, D_PEDSTD_TIMECTR}, {KEY_D_PEDSTD_DECI, D_PEDSTD_DECI}, - {KEY_D_HOST_NO_MOT, D_HOST_NO_MOT} + {KEY_D_HOST_NO_MOT, D_HOST_NO_MOT}, + {KEY_D_ACCEL_BIAS, D_ACCEL_BIAS}, + {KEY_D_ORIENT_GAP, D_ORIENT_GAP}, + {KEY_D_TILT0_H, D_TILT0_H}, + {KEY_D_TILT0_L, D_TILT0_L}, + {KEY_D_TILT1_H, D_TILT1_H}, + {KEY_D_TILT1_L, D_TILT1_L}, + {KEY_D_TILT2_H, D_TILT2_H}, + {KEY_D_TILT2_L, D_TILT2_L}, + {KEY_D_TILT3_H, D_TILT3_H}, + {KEY_D_TILT3_L, D_TILT3_L}, + {KEY_CFG_EXT_GYRO_BIAS_X, D_EXT_GYRO_BIAS_X}, + {KEY_CFG_EXT_GYRO_BIAS_Y, D_EXT_GYRO_BIAS_Y}, + {KEY_CFG_EXT_GYRO_BIAS_Z, D_EXT_GYRO_BIAS_Z}, + {KEY_CFG_PED_INT, CFG_PED_INT}, + {KEY_SMD_ENABLE, D_SMD_ENABLE}, + {KEY_SMD_ACCEL_THLD, D_SMD_MOT_THLD}, + {KEY_SMD_DELAY_THLD, D_SMD_DELAY_THLD}, + {KEY_SMD_DELAY2_THLD, D_SMD_DELAY2_THLD}, + {KEY_SMD_ENABLE_TESTPT1, SMD_TP1}, + {KEY_SMD_ENABLE_TESTPT2, SMD_TP2}, + {KEY_SMD_EXE_STATE, D_SMD_EXE_STATE}, + {KEY_SMD_DELAY_CNTR, D_SMD_DELAY_CNTR}, + {KEY_BM_ENABLE, D_BM_ENABLE}, + {KEY_BM_BATCH_CNTR, D_BM_BATCH_CNTR}, + {KEY_BM_BATCH_THLD, D_BM_BATCH_THLD}, + {KEY_BM_NUMWORD_TOFILL, D_BM_NUMWORD_TOFILL} }; #define NUM_LOCAL_KEYS (sizeof(dmpTConfig)/sizeof(dmpTConfig[0])) @@ -272,6 +326,7 @@ static struct tKeyLabel keys[NUM_KEYS]; unsigned short inv_dmp_get_address(unsigned short key) { static int isSorted; + if (!isSorted) { int kk; for (kk = 0; kk < NUM_KEYS; ++kk) { @@ -282,10 +337,11 @@ unsigned short inv_dmp_get_address(unsigned short key) keys[dmpTConfig[kk].key].addr = dmpTConfig[kk].addr; isSorted = 1; } - if (key >= NUM_KEYS) + if (key >= NUM_KEYS) { + pr_err("ERROR!! key not exist=%d!\n", key); return 0xffff; + } + if (0xffff == keys[key].addr) + pr_err("ERROR!!key not local=%d!\n", key); return keys[key].addr; } -/** - * @} - */ diff --git a/drivers/staging/iio/imu/mpu/dmpKey.h b/drivers/staging/iio/imu/mpu/dmpKey.h index e8e19515172..4c70ec294a9 100644 --- a/drivers/staging/iio/imu/mpu/dmpKey.h +++ b/drivers/staging/iio/imu/mpu/dmpKey.h @@ -11,6 +11,17 @@ * GNU General Public License for more details. * */ +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file dmpKey.h + * @brief dmp Key definition + * @details This file is part of invensense mpu driver code + * + */ + #ifndef DMPKEY_H__ #define DMPKEY_H__ @@ -76,9 +87,34 @@ #define KEY_FCFG_MAG_VAL (KEY_CFG_ORIENT_IRQ_3 + 1) #define KEY_FCFG_MAG_MOV (KEY_FCFG_MAG_VAL + 1) #define KEY_CFG_LP_QUAT (KEY_FCFG_MAG_MOV + 1) +#define KEY_CFG_GYRO_RAW_DATA (KEY_CFG_LP_QUAT + 1) +#define KEY_CFG_EXT_GYRO_BIAS (KEY_CFG_GYRO_RAW_DATA + 1) +#define KEY_CFG_EXT_GYRO_BIAS_X (KEY_CFG_EXT_GYRO_BIAS + 1) +#define KEY_CFG_EXT_GYRO_BIAS_Y (KEY_CFG_EXT_GYRO_BIAS_X + 1) +#define KEY_CFG_EXT_GYRO_BIAS_Z (KEY_CFG_EXT_GYRO_BIAS_Y + 1) +#define KEY_bad_compass (KEY_CFG_EXT_GYRO_BIAS_Z + 1) +#define KEY_COMPASS_CHG_SENSITIVITY (KEY_bad_compass + 1) +#define KEY_CCS_HEADING_THLD (KEY_COMPASS_CHG_SENSITIVITY + 1) +#define KEY_CCS_TIME_THLD (KEY_CCS_HEADING_THLD + 1) +#define KEY_CCS_DOTP_THLD (KEY_CCS_TIME_THLD + 1) +#define KEY_CCS_COMP_CNTR (KEY_CCS_DOTP_THLD + 1) +#define KEY_CFG_NM_DET (KEY_CCS_COMP_CNTR + 1) +#define KEY_SMD_ENABLE (KEY_CFG_NM_DET + 1) +#define KEY_SMD_ACCEL_THLD (KEY_SMD_ENABLE + 1) +#define KEY_SMD_DELAY_THLD (KEY_SMD_ACCEL_THLD + 1) +#define KEY_SMD_DELAY2_THLD (KEY_SMD_DELAY_THLD + 1) +#define KEY_SMD_ENABLE_TESTPT1 (KEY_SMD_DELAY2_THLD + 1) +#define KEY_SMD_ENABLE_TESTPT2 (KEY_SMD_ENABLE_TESTPT1 + 1) +#define KEY_SMD_EXE_STATE (KEY_SMD_ENABLE_TESTPT2 + 1) +#define KEY_SMD_DELAY_CNTR (KEY_SMD_EXE_STATE + 1) + +#define KEY_BREAK (81) +#if KEY_SMD_DELAY_CNTR != KEY_BREAK +#error +#endif /* MPU6050 keys */ -#define KEY_CFG_ACCEL_FILTER (KEY_CFG_LP_QUAT + 1) +#define KEY_CFG_ACCEL_FILTER (KEY_BREAK + 1) #define KEY_CFG_MOTION_BIAS (KEY_CFG_ACCEL_FILTER + 1) #define KEY_TEMPLABEL (KEY_CFG_MOTION_BIAS + 1) @@ -152,16 +188,24 @@ #define KEY_D_GYRO_BIAS_X (KEY_D_2_252 + 1) #define KEY_D_GYRO_BIAS_Y (KEY_D_GYRO_BIAS_X + 1) #define KEY_D_GYRO_BIAS_Z (KEY_D_GYRO_BIAS_Y + 1) -#define KEY_D_GYRO_ENABLE (KEY_D_GYRO_BIAS_Z + 1) +#define KEY_D_ACC_BIAS_X (KEY_D_GYRO_BIAS_Z + 1) +#define KEY_D_ACC_BIAS_Y (KEY_D_ACC_BIAS_X + 1) +#define KEY_D_ACC_BIAS_Z (KEY_D_ACC_BIAS_Y + 1) +#define KEY_D_GYRO_ENABLE (KEY_D_ACC_BIAS_Z + 1) #define KEY_D_ACCEL_ENABLE (KEY_D_GYRO_ENABLE + 1) #define KEY_D_QUAT_ENABLE (KEY_D_ACCEL_ENABLE + 1) -#define KEY_D_CR_TIME_G (KEY_D_QUAT_ENABLE + 1) +#define KEY_D_OUTPUT_ENABLE (KEY_D_QUAT_ENABLE + 1) +#define KEY_D_ACCEL_CNTR (KEY_D_OUTPUT_ENABLE + 1) +#define KEY_D_GYRO_CNTR (KEY_D_ACCEL_CNTR + 1) +#define KEY_D_QUAT0_CNTR (KEY_D_GYRO_CNTR + 1) +#define KEY_D_QUAT1_CNTR (KEY_D_QUAT0_CNTR + 1) +#define KEY_D_QUAT2_CNTR (KEY_D_QUAT1_CNTR + 1) +#define KEY_D_CR_TIME_G (KEY_D_QUAT2_CNTR + 1) #define KEY_D_CR_TIME_A (KEY_D_CR_TIME_G + 1) #define KEY_D_CR_TIME_Q (KEY_D_CR_TIME_A + 1) #define KEY_D_CS_TAX (KEY_D_CR_TIME_Q + 1) #define KEY_D_CS_TAY (KEY_D_CS_TAX + 1) #define KEY_D_CS_TAZ (KEY_D_CS_TAY + 1) - #define KEY_D_CS_TGX (KEY_D_CS_TAZ + 1) #define KEY_D_CS_TGY (KEY_D_CS_TGX + 1) #define KEY_D_CS_TGZ (KEY_D_CS_TGY + 1) @@ -171,7 +215,8 @@ #define KEY_D_CS_TQ3 (KEY_D_CS_TQ2 + 1) /* Compass keys */ -#define KEY_CPASS_BIAS_X (KEY_D_CS_TQ3 + 1) +#define KEY_CPASS_GAIN (KEY_D_CS_TQ3 + 1) +#define KEY_CPASS_BIAS_X (KEY_CPASS_GAIN + 1) #define KEY_CPASS_BIAS_Y (KEY_CPASS_BIAS_X + 1) #define KEY_CPASS_BIAS_Z (KEY_CPASS_BIAS_Y + 1) #define KEY_CPASS_MTX_00 (KEY_CPASS_BIAS_Z + 1) @@ -213,8 +258,27 @@ #define KEY_END_COMPARE_Y_X_TMP3 (KEY_NOT_TIME_MINUS_1 + 1) #define KEY_X_GRT_Y_TMP2 (KEY_END_COMPARE_Y_X_TMP3 + 1) +/*Shake Keys */ +#define KEY_D_0_64 (KEY_X_GRT_Y_TMP2 + 1) +#define KEY_D_2_4 (KEY_D_0_64 + 1) +#define KEY_D_2_8 (KEY_D_2_4 + 1) +#define KEY_D_2_48 (KEY_D_2_8 + 1) +#define KEY_D_2_92 (KEY_D_2_48 + 1) +#define KEY_D_2_94 (KEY_D_2_92 + 1) +#define KEY_D_2_160 (KEY_D_2_94 + 1) +#define KEY_D_3_180 (KEY_D_2_160 + 1) +#define KEY_D_3_184 (KEY_D_3_180 + 1) +#define KEY_D_3_188 (KEY_D_3_184 + 1) +#define KEY_D_3_208 (KEY_D_3_188 + 1) +#define KEY_D_3_240 (KEY_D_3_208 + 1) +#define KEY_RETRACTION_1 (KEY_D_3_240 + 1) +#define KEY_RETRACTION_2 (KEY_RETRACTION_1 + 1) +#define KEY_RETRACTION_3 (KEY_RETRACTION_2 + 1) +#define KEY_RETRACTION_4 (KEY_RETRACTION_3 + 1) +#define KEY_CFG_SHAKE_INT (KEY_RETRACTION_4 + 1) + /* Authenticate Keys */ -#define KEY_D_AUTH_OUT (KEY_X_GRT_Y_TMP2 + 1) +#define KEY_D_AUTH_OUT (KEY_CFG_SHAKE_INT + 1) #define KEY_D_AUTH_IN (KEY_D_AUTH_OUT + 1) #define KEY_D_AUTH_A (KEY_D_AUTH_IN + 1) #define KEY_D_AUTH_B (KEY_D_AUTH_A + 1) @@ -239,43 +303,28 @@ #define KEY_D_PEDSTD_STEPCTR (KEY_D_PEDSTD_TIMECTR + 1) #define KEY_D_PEDSTD_WALKTIME (KEY_D_PEDSTD_STEPCTR + 1) #define KEY_D_PEDSTD_DECI (KEY_D_PEDSTD_WALKTIME + 1) +#define KEY_CFG_PED_INT (KEY_D_PEDSTD_DECI + 1) +#define KEY_CFG_PED_ENABLE (KEY_CFG_PED_INT + 1) /*Host Based No Motion*/ -#define KEY_D_HOST_NO_MOT (KEY_D_PEDSTD_DECI + 1) - -/* EIS keys */ -#define KEY_P_EIS_FIFO_FOOTER (KEY_D_HOST_NO_MOT + 1) -#define KEY_P_EIS_FIFO_YSHIFT (KEY_P_EIS_FIFO_FOOTER + 1) -#define KEY_P_EIS_DATA_RATE (KEY_P_EIS_FIFO_YSHIFT + 1) -#define KEY_P_EIS_FIFO_XSHIFT (KEY_P_EIS_DATA_RATE + 1) -#define KEY_P_EIS_FIFO_SYNC (KEY_P_EIS_FIFO_XSHIFT + 1) -#define KEY_P_EIS_FIFO_ZSHIFT (KEY_P_EIS_FIFO_SYNC + 1) -#define KEY_P_EIS_FIFO_READY (KEY_P_EIS_FIFO_ZSHIFT + 1) -#define KEY_DMP_FOOTER (KEY_P_EIS_FIFO_READY + 1) -#define KEY_DMP_INTX_HC (KEY_DMP_FOOTER + 1) -#define KEY_DMP_INTX_PH (KEY_DMP_INTX_HC + 1) -#define KEY_DMP_INTX_SH (KEY_DMP_INTX_PH + 1) -#define KEY_DMP_AINV_SH (KEY_DMP_INTX_SH + 1) -#define KEY_DMP_A_INV_XH (KEY_DMP_AINV_SH + 1) -#define KEY_DMP_AINV_PH (KEY_DMP_A_INV_XH + 1) -#define KEY_DMP_CTHX_H (KEY_DMP_AINV_PH + 1) -#define KEY_DMP_CTHY_H (KEY_DMP_CTHX_H + 1) -#define KEY_DMP_CTHZ_H (KEY_DMP_CTHY_H + 1) -#define KEY_DMP_NCTHX_H (KEY_DMP_CTHZ_H + 1) -#define KEY_DMP_NCTHY_H (KEY_DMP_NCTHX_H + 1) -#define KEY_DMP_NCTHZ_H (KEY_DMP_NCTHY_H + 1) -#define KEY_DMP_CTSQ_XH (KEY_DMP_NCTHZ_H + 1) -#define KEY_DMP_CTSQ_YH (KEY_DMP_CTSQ_XH + 1) -#define KEY_DMP_CTSQ_ZH (KEY_DMP_CTSQ_YH + 1) -#define KEY_DMP_INTX_H (KEY_DMP_CTSQ_ZH + 1) -#define KEY_DMP_INTY_H (KEY_DMP_INTX_H + 1) -#define KEY_DMP_INTZ_H (KEY_DMP_INTY_H + 1) -#define KEY_DMP_HPX_H (KEY_DMP_INTZ_H + 1) -#define KEY_DMP_HPY_H (KEY_DMP_HPX_H + 1) -#define KEY_DMP_HPZ_H (KEY_DMP_HPY_H + 1) +#define KEY_D_HOST_NO_MOT (KEY_CFG_PED_ENABLE + 1) + +/*Host Based Accel Bias*/ +#define KEY_D_ACCEL_BIAS (KEY_D_HOST_NO_MOT + 1) + +/*Screen/Display Orientation Keys*/ +#define KEY_D_ORIENT_GAP (KEY_D_ACCEL_BIAS + 1) +#define KEY_D_TILT0_H (KEY_D_ORIENT_GAP + 1) +#define KEY_D_TILT0_L (KEY_D_TILT0_H + 1) +#define KEY_D_TILT1_H (KEY_D_TILT0_L + 1) +#define KEY_D_TILT1_L (KEY_D_TILT1_H + 1) +#define KEY_D_TILT2_H (KEY_D_TILT1_L + 1) +#define KEY_D_TILT2_L (KEY_D_TILT2_H + 1) +#define KEY_D_TILT3_H (KEY_D_TILT2_L + 1) +#define KEY_D_TILT3_L (KEY_D_TILT3_H + 1) /* Stream keys */ -#define KEY_STREAM_P_GYRO_Z (KEY_DMP_HPZ_H + 1) +#define KEY_STREAM_P_GYRO_Z (KEY_D_TILT3_L + 1) #define KEY_STREAM_P_GYRO_Y (KEY_STREAM_P_GYRO_Z + 1) #define KEY_STREAM_P_GYRO_X (KEY_STREAM_P_GYRO_Y + 1) #define KEY_STREAM_P_TEMP (KEY_STREAM_P_GYRO_X + 1) @@ -287,7 +336,33 @@ #define KEY_STREAM_P_FOOTER (KEY_STREAM_P_ACCEL_X + 1) #define KEY_STREAM_P_ACCEL_Z (KEY_STREAM_P_FOOTER + 1) -#define NUM_KEYS (KEY_STREAM_P_ACCEL_Z + 1) +/* Batch mode */ +#define KEY_BM_ENABLE (KEY_STREAM_P_ACCEL_Z + 1) +#define KEY_BM_BATCH_THLD (KEY_BM_ENABLE + 1) +#define KEY_BM_BATCH_CNTR (KEY_BM_BATCH_THLD + 1) +#define KEY_BM_NUMWORD_TOFILL (KEY_BM_BATCH_CNTR + 1) + +/* Watermark */ +#define KEY_CFG_WATERMARK_H (KEY_BM_NUMWORD_TOFILL + 1) +#define KEY_CFG_WATERMARK_L (KEY_CFG_WATERMARK_H + 1) + +/* FIFO output control */ +#define KEY_CFG_OUT_ACCL (KEY_CFG_WATERMARK_L + 1) +#define KEY_CFG_OUT_GYRO (KEY_CFG_OUT_ACCL + 1) +#define KEY_CFG_OUT_3QUAT (KEY_CFG_OUT_GYRO + 1) +#define KEY_CFG_OUT_6QUAT (KEY_CFG_OUT_3QUAT + 1) +#define KEY_CFG_OUT_PQUAT (KEY_CFG_OUT_6QUAT + 1) +#define KEY_CFG_FIFO_INT (KEY_CFG_OUT_PQUAT + 1) +/* Ped Step detection */ +#define KEY_CFG_PEDSTEP_DET (KEY_CFG_FIFO_INT + 1) + +/* Screen Orientation data */ +#define KEY_SO_DATA (KEY_CFG_PEDSTEP_DET + 1) + +/* MPU for DMP Android K */ +#define KEY_P_HW_ID (KEY_SO_DATA + 1) + +#define NUM_KEYS (KEY_P_HW_ID + 1) struct tKeyLabel { unsigned short key; diff --git a/drivers/staging/iio/imu/mpu/dmpmap.h b/drivers/staging/iio/imu/mpu/dmpmap.h index 7dc354a33a2..28f59af0157 100644 --- a/drivers/staging/iio/imu/mpu/dmpmap.h +++ b/drivers/staging/iio/imu/mpu/dmpmap.h @@ -1,7 +1,26 @@ /* - $License: - Copyright (C) 2011 InvenSense Corporation, All Rights Reserved. - $ +* Copyright (C) 2012 Invensense, Inc. +* +* This software is licensed under the terms of the GNU General Public +* License version 2, as published by the Free Software Foundation, and +* may be copied, distributed, and modified under those terms. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +*/ + +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file dmpmap.h + * @brief dmp map definition + * @details This file is part of invensense mpu driver code + * */ #ifndef DMPMAP_H #define DMPMAP_H diff --git a/drivers/staging/iio/imu/mpu/inv_mpu3050_iio.c b/drivers/staging/iio/imu/mpu/inv_mpu3050_iio.c index bac55f1f019..70b19018d6d 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu3050_iio.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu3050_iio.c @@ -17,9 +17,9 @@ * @brief Hardware drivers. * * @{ - * @file inv_mpu3050.c + * @file inv_mpu3050_iio.c * @brief A sysfs device driver for Invensense devices - * @details This file is part of inv_gyro driver code + * @details This file is part of invensense mpu driver code */ #include @@ -38,15 +38,21 @@ #include #include "inv_mpu_iio.h" -#define MPU3050_NACK_TIME (2*1000) -#define MPU3050_ONE_MPU_TIME (20) -#define MPU3050_BOGUS_ADDR (0x7F) +#define MPU3050_NACK_MIN_TIME (2 * 1000) +#define MPU3050_NACK_MAX_TIME (3 * 1000) -int set_3050_bypass(struct inv_gyro_state_s *st, int enable) +#define MPU3050_ONE_MPU_TIME 20 +#define MPU3050_BOGUS_ADDR 0x7F +int __attribute__((weak)) inv_register_mpu3050_slave(struct inv_mpu_iio_s *st) +{ + return 0; +} + +int set_3050_bypass(struct inv_mpu_iio_s *st, bool enable) { struct inv_reg_map_s *reg; int result; - unsigned char b; + u8 b; reg = &st->reg; result = inv_i2c_read(st, reg->user_ctrl, 1, &b); @@ -80,7 +86,7 @@ int set_3050_bypass(struct inv_gyro_state_s *st, int enable) * 2) wait enough time for a nack to occur, then go into * bypass mode: */ - usleep_range(MPU3050_NACK_TIME, MPU3050_NACK_TIME); + usleep_range(MPU3050_NACK_MIN_TIME, MPU3050_NACK_MAX_TIME); result = inv_i2c_single_write(st, reg->user_ctrl, b); if (result) return result; @@ -97,25 +103,65 @@ int set_3050_bypass(struct inv_gyro_state_s *st, int enable) result = inv_i2c_single_write(st, reg->user_ctrl, b); if (result) return result; - usleep_range(MPU3050_NACK_TIME, MPU3050_NACK_TIME); + usleep_range(MPU3050_NACK_MIN_TIME, MPU3050_NACK_MAX_TIME); } return 0; } void inv_setup_reg_mpu3050(struct inv_reg_map_s *reg) { - reg->fifo_en = 0x12; - reg->sample_rate_div = 0x15; - reg->lpf = 0x16; - reg->fifo_count_h = 0x3a; - reg->fifo_r_w = 0x3c; - reg->user_ctrl = 0x3d; - reg->pwr_mgmt_1 = 0x3e; - reg->raw_gyro = 0x1d; - reg->raw_accl = 0x23; - reg->temperature = 0x1b; - reg->int_enable = 0x17; - reg->int_status = 0x1a; + reg->fifo_en = REG_3050_FIFO_EN; + reg->sample_rate_div = REG_3050_SAMPLE_RATE_DIV; + reg->lpf = REG_3050_LPF; + reg->fifo_count_h = REG_3050_FIFO_COUNT_H; + reg->fifo_r_w = REG_3050_FIFO_R_W; + reg->user_ctrl = REG_3050_USER_CTRL; + reg->pwr_mgmt_1 = REG_3050_PWR_MGMT_1; + reg->raw_gyro = REG_3050_RAW_GYRO; + reg->raw_accl = REG_3050_AUX_XOUT_H; + reg->temperature = REG_3050_TEMPERATURE; + reg->int_enable = REG_3050_INT_ENABLE; + reg->int_status = REG_3050_INT_STATUS; +} + +int inv_switch_3050_gyro_engine(struct inv_mpu_iio_s *st, bool en) +{ + struct inv_reg_map_s *reg; + u8 data, p; + int result; + reg = &st->reg; + if (en) { + data = INV_CLK_PLL; + p = (BITS_3050_POWER1 | data); + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); + if (result) + return result; + p = (BITS_3050_POWER2 | data); + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); + if (result) + return result; + p = data; + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); + msleep(SENSOR_UP_TIME); + } else { + p = BITS_3050_GYRO_STANDBY; + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); + } + + return result; +} + +int inv_switch_3050_accl_engine(struct inv_mpu_iio_s *st, bool en) +{ + int result; + if (NULL == st->mpu_slave) + return -EPERM; + if (en) + result = st->mpu_slave->resume(st); + else + result = st->mpu_slave->suspend(st); + + return result; } /** @@ -131,8 +177,9 @@ int inv_init_config_mpu3050(struct iio_dev *indio_dev) { struct inv_reg_map_s *reg; int result; - unsigned char data; - struct inv_gyro_state_s *st = iio_priv(indio_dev); + u8 data; + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + if (st->chip_config.is_asleep) return -EPERM; /*reading AUX VDDIO register */ @@ -147,9 +194,6 @@ int inv_init_config_mpu3050(struct iio_dev *indio_dev) return result; reg = &st->reg; - result = set_inv_enable(indio_dev, 0); - if (result) - return result; /*2000dps full scale range*/ result = inv_i2c_single_write(st, reg->lpf, (INV_FSR_2000DPS << GYRO_CONFIG_FSR_SHIFT) @@ -163,11 +207,12 @@ int inv_init_config_mpu3050(struct iio_dev *indio_dev) if (result) return result; st->chip_config.fifo_rate = INIT_FIFO_RATE; - st->irq_dur_us = INIT_DUR_TIME; + st->irq_dur_ns = INIT_DUR_TIME; st->chip_config.prog_start_addr = DMP_START_ADDR; st->chip_config.gyro_enable = 1; st->chip_config.gyro_fifo_enable = 1; - if (SECONDARY_SLAVE_TYPE_ACCEL == st->plat_data.sec_slave_type) { + if ((SECONDARY_SLAVE_TYPE_ACCEL == st->plat_data.sec_slave_type) && + st->mpu_slave) { result = st->mpu_slave->setup(st); if (result) return result; @@ -180,23 +225,24 @@ int inv_init_config_mpu3050(struct iio_dev *indio_dev) st->chip_config.accl_enable = 1; st->chip_config.accl_fifo_enable = 1; } + return 0; } + /** * set_power_mpu3050() - set power of mpu3050. * @st: Device driver instance. * @power_on: on/off */ -int set_power_mpu3050(struct inv_gyro_state_s *st, - unsigned char power_on) +int set_power_mpu3050(struct inv_mpu_iio_s *st, bool power_on) { struct inv_reg_map_s *reg; - unsigned char data, p; + u8 data, p; int result; reg = &st->reg; - if (power_on) + if (power_on) { data = 0; - else { + } else { if (st->mpu_slave) { result = st->mpu_slave->suspend(st); if (result) @@ -219,14 +265,11 @@ int set_power_mpu3050(struct inv_gyro_state_s *st, result = inv_i2c_single_write(st, reg->pwr_mgmt_1, data | p); if (result) return result; - - st->chip_config.clk_src = INV_CLK_PLL; } else { data |= (BITS_3050_GYRO_STANDBY | INV_CLK_INTERNAL); result = inv_i2c_single_write(st, reg->pwr_mgmt_1, data); if (result) return result; - st->chip_config.clk_src = INV_CLK_INTERNAL; } if (power_on) { msleep(POWER_UP_TIME); @@ -235,9 +278,9 @@ int set_power_mpu3050(struct inv_gyro_state_s *st, if (result) return result; } - st->chip_config.is_asleep = 0; - } else - st->chip_config.is_asleep = 1; + } + st->chip_config.is_asleep = !power_on; + return 0; } /** diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_core.c b/drivers/staging/iio/imu/mpu/inv_mpu_core.c index 94a5ccfc1f8..6368818f3da 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_core.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_core.c @@ -17,11 +17,12 @@ * @brief Hardware drivers. * * @{ - * @file inv_gyro.c + * @file inv_mpu_core.c * @brief A sysfs device driver for Invensense devices - * @details This driver currently works for the ITG3500, MPU6050, MPU9150 - * MPU3050 + * @details This driver currently works for the + * MPU3050/MPU6050/MPU9150/MPU6500/MPU9250 devices. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include @@ -37,57 +38,75 @@ #include #include #include + #include "inv_mpu_iio.h" #include "../../sysfs.h" -#define CHECK_DMP do \ - { \ - if ((st->chip_config.is_asleep) || \ - (0 == st->chip_config.firmware_loaded)) \ - return -EPERM; \ - result = kstrtoul(buf, 10, (long unsigned int *)&data); \ - if (result) \ - return result; \ - } while (0); -static void inv_setup_reg(struct inv_reg_map_s *reg) +#include "../../inv_test/inv_counters.h" + +s64 get_time_ns(void) { - reg->who_am_i = 0x75; - reg->sample_rate_div = 0x19; - reg->lpf = 0x1A; - reg->product_id = 0x0C; - reg->bank_sel = 0x6D; - reg->user_ctrl = 0x6A; - reg->fifo_en = 0x23; - reg->gyro_config = 0x1B; - reg->accl_config = 0x1C; - reg->fifo_count_h = 0x72; - reg->fifo_r_w = 0x74; - reg->raw_gyro = 0x43; - reg->raw_accl = 0x3B; - reg->temperature = 0x41; - reg->int_enable = 0x38; - reg->int_status = 0x3A; - reg->pwr_mgmt_1 = 0x6B; - reg->pwr_mgmt_2 = 0x6C; - reg->mem_start_addr = 0x6E; - reg->mem_r_w = 0x6F; - reg->prgm_strt_addrh = 0x70; -}; + struct timespec ts; + ktime_get_ts(&ts); + return timespec_to_ns(&ts); +} + +static const short AKM8975_ST_Lower[3] = {-100, -100, -1000}; +static const short AKM8975_ST_Upper[3] = {100, 100, -300}; + +static const short AKM8972_ST_Lower[3] = {-50, -50, -500}; +static const short AKM8972_ST_Upper[3] = {50, 50, -100}; + +static const short AKM8963_ST_Lower[3] = {-200, -200, -3200}; +static const short AKM8963_ST_Upper[3] = {200, 200, -800}; + +/* This is for compatibility for power state. Should remove once HAL + does not use power_state sysfs entry */ +static bool fake_asleep; + static const struct inv_hw_s hw_info[INV_NUM_PARTS] = { {119, "ITG3500"}, { 63, "MPU3050"}, {117, "MPU6050"}, - {118, "MPU9150"} + {118, "MPU9150"}, + {119, "MPU6500"}, + {118, "MPU9250"}, +}; + +static void inv_setup_reg(struct inv_reg_map_s *reg) +{ + reg->sample_rate_div = REG_SAMPLE_RATE_DIV; + reg->lpf = REG_CONFIG; + reg->bank_sel = REG_BANK_SEL; + reg->user_ctrl = REG_USER_CTRL; + reg->fifo_en = REG_FIFO_EN; + reg->gyro_config = REG_GYRO_CONFIG; + reg->accl_config = REG_ACCEL_CONFIG; + reg->fifo_count_h = REG_FIFO_COUNT_H; + reg->fifo_r_w = REG_FIFO_R_W; + reg->raw_gyro = REG_RAW_GYRO; + reg->raw_accl = REG_RAW_ACCEL; + reg->temperature = REG_TEMPERATURE; + reg->int_enable = REG_INT_ENABLE; + reg->int_status = REG_INT_STATUS; + reg->pwr_mgmt_1 = REG_PWR_MGMT_1; + reg->pwr_mgmt_2 = REG_PWR_MGMT_2; + reg->mem_start_addr = REG_MEM_START_ADDR; + reg->mem_r_w = REG_MEM_RW; + reg->prgm_strt_addrh = REG_PRGM_STRT_ADDRH; }; + /** * inv_i2c_read() - Read one or more bytes from the device registers. * @st: Device driver instance. * @reg: First device register to be read from. * @length: Number of bytes to read. * @data: Data read from device. - * NOTE: The slave register will not increment when reading from the FIFO. + * NOTE:This is not re-implementation of i2c_smbus_read because i2c + * address could be specified in this case. We could have two different + * i2c address due to secondary i2c interface. */ -int inv_i2c_read_base(struct inv_gyro_state_s *st, unsigned short i2c_addr, - unsigned char reg, unsigned short length, unsigned char *data) +int inv_i2c_read_base(struct inv_mpu_iio_s *st, u16 i2c_addr, + u8 reg, u16 length, u8 *data) { struct i2c_msg msgs[2]; int res; @@ -106,12 +125,25 @@ int inv_i2c_read_base(struct inv_gyro_state_s *st, unsigned short i2c_addr, msgs[1].len = length; res = i2c_transfer(st->sl_handle, msgs, 2); + if (res < 2) { if (res >= 0) res = -EIO; - return res; } else - return 0; + res = 0; + + INV_I2C_INC_MPUWRITE(3); + INV_I2C_INC_MPUREAD(length); +#if CONFIG_DYNAMIC_DEBUG + { + char *read = 0; + pr_debug("%s RD%02X%02X%02X -> %s%s\n", st->hw->name, + i2c_addr, reg, length, + wr_pr_debug_begin(data, length, read), + wr_pr_debug_end(read)); + } +#endif + return res; } /** @@ -119,14 +151,16 @@ int inv_i2c_read_base(struct inv_gyro_state_s *st, unsigned short i2c_addr, * @st: Device driver instance. * @reg: Device register to be written to. * @data: Byte to write to device. + * NOTE:This is not re-implementation of i2c_smbus_write because i2c + * address could be specified in this case. We could have two different + * i2c address due to secondary i2c interface. */ -int inv_i2c_single_write_base(struct inv_gyro_state_s *st, - unsigned short i2c_addr, unsigned char reg, unsigned char data) +int inv_i2c_single_write_base(struct inv_mpu_iio_s *st, + u16 i2c_addr, u8 reg, u8 data) { - unsigned char tmp[2]; + u8 tmp[2]; struct i2c_msg msg; int res; - tmp[0] = reg; tmp[1] = data; @@ -135,7 +169,9 @@ int inv_i2c_single_write_base(struct inv_gyro_state_s *st, msg.buf = tmp; msg.len = 2; - /*printk(KERN_ERR "WS%02X%02X%02X\n", i2c_addr, reg, data);*/ + pr_debug("%s WR%02X%02X%02X\n", st->hw->name, i2c_addr, reg, data); + INV_I2C_INC_MPUWRITE(3); + res = i2c_transfer(st->sl_handle, &msg, 1); if (res < 1) { if (res == 0) @@ -144,64 +180,123 @@ int inv_i2c_single_write_base(struct inv_gyro_state_s *st, } else return 0; } -static int set_power_itg(struct inv_gyro_state_s *st, - unsigned char power_on) + +static int inv_switch_engine(struct inv_mpu_iio_s *st, bool en, u32 mask) { struct inv_reg_map_s *reg; - unsigned char data; + u8 data, mgmt_1; int result; - reg = &st->reg; - if (power_on) - data = 0; - else - data = BIT_SLEEP; - if (st->chip_config.lpa_mode) - data |= BIT_CYCLE; - if (st->chip_config.gyro_enable) { - result = inv_i2c_single_write(st, - reg->pwr_mgmt_1, data | INV_CLK_PLL); + /* switch clock needs to be careful. Only when gyro is on, can + clock source be switched to gyro. Otherwise, it must be set to + internal clock */ + if (BIT_PWR_GYRO_STBY == mask) { + result = inv_i2c_read(st, reg->pwr_mgmt_1, 1, &mgmt_1); if (result) return result; - st->chip_config.clk_src = INV_CLK_PLL; - } else { - result = inv_i2c_single_write(st, - reg->pwr_mgmt_1, data | INV_CLK_INTERNAL); + + mgmt_1 &= ~BIT_CLK_MASK; + } + + if ((BIT_PWR_GYRO_STBY == mask) && (!en)) { + /* turning off gyro requires switch to internal clock first. + Then turn off gyro engine */ + mgmt_1 |= INV_CLK_INTERNAL; + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, + mgmt_1); if (result) return result; - st->chip_config.clk_src = INV_CLK_INTERNAL; } - if (power_on) { - msleep(POWER_UP_TIME); - data = 0; - if (0 == st->chip_config.accl_enable) - data |= BIT_PWR_ACCL_STBY; - if (0 == st->chip_config.gyro_enable) - data |= BIT_PWR_GYRO_STBY; - data |= (st->chip_config.lpa_freq << LPA_FREQ_SHIFT); + result = inv_i2c_read(st, reg->pwr_mgmt_2, 1, &data); + if (result) + return result; + if (en) + data &= (~mask); + else + data |= mask; + result = inv_i2c_single_write(st, reg->pwr_mgmt_2, data); + if (result) + return result; - result = inv_i2c_single_write(st, reg->pwr_mgmt_2, data); + if ((BIT_PWR_GYRO_STBY == mask) && en) { + /* only gyro on needs sensor up time */ + msleep(SENSOR_UP_TIME); + /* after gyro is on & stable, switch internal clock to PLL */ + mgmt_1 |= INV_CLK_PLL; + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, + mgmt_1); if (result) return result; - msleep(POWER_UP_TIME); - st->chip_config.is_asleep = 0; - } else - st->chip_config.is_asleep = 1; + } + if ((BIT_PWR_ACCL_STBY == mask) && en) + msleep(REG_UP_TIME); + return 0; } + /** - * inv_set_power_state() - Turn device on/off. - * @st: Device driver instance. - * @power_on: 1 to turn on, 0 to suspend. + * inv_lpa_freq() - store current low power frequency setting. */ -int inv_set_power_state(struct inv_gyro_state_s *st, - unsigned char power_on) +static int inv_lpa_freq(struct inv_mpu_iio_s *st, int lpa_freq) { - if (INV_MPU3050 == st->chip_type) - return set_power_mpu3050(st, power_on); + unsigned long result; + u8 d; + struct inv_reg_map_s *reg; + /* this mapping makes 6500 and 6050 setting close */ + /* 2, 4, 6, 7 corresponds to 0.98, 3.91, 15.63, 31.25 */ + const u8 mpu6500_lpa_mapping[] = {2, 4, 6, 7}; + + if (lpa_freq > MAX_LPA_FREQ_PARAM) + return -EINVAL; + + if (INV_MPU6500 == st->chip_type) { + d = mpu6500_lpa_mapping[lpa_freq]; + result = inv_i2c_single_write(st, REG_6500_LP_ACCEL_ODR, d); + if (result) + return result; + } else { + reg = &st->reg; + result = inv_i2c_read(st, reg->pwr_mgmt_2, 1, &d); + if (result) + return result; + d &= ~BIT_LPA_FREQ; + d |= (u8)(lpa_freq << LPA_FREQ_SHIFT); + result = inv_i2c_single_write(st, reg->pwr_mgmt_2, d); + if (result) + return result; + } + st->chip_config.lpa_freq = lpa_freq; + + return 0; +} + +static int set_power_itg(struct inv_mpu_iio_s *st, bool power_on) +{ + struct inv_reg_map_s *reg; + u8 data; + int result; + + if ((!power_on) == st->chip_config.is_asleep) + return 0; + reg = &st->reg; + if (power_on) + data = 0; else - return set_power_itg(st, power_on); + data = BIT_SLEEP; + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, data); + if (result) + return result; + + if (power_on) { + if (INV_MPU6500 == st->chip_type) + msleep(POWER_UP_TIME); + else + msleep(REG_UP_TIME); + } + + st->chip_config.is_asleep = !power_on; + return 0; } @@ -212,25 +307,21 @@ int inv_set_power_state(struct inv_gyro_state_s *st, * FSR: +/- 2000DPS * DLPF: 42Hz * FIFO rate: 50Hz - * Clock source: Gyro PLL */ static int inv_init_config(struct iio_dev *indio_dev) { struct inv_reg_map_s *reg; int result; - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + - if (st->chip_config.is_asleep) - return -EPERM; reg = &st->reg; - result = set_inv_enable(indio_dev, 0); - if (result) - return result; result = inv_i2c_single_write(st, reg->gyro_config, INV_FSR_2000DPS << GYRO_CONFIG_FSR_SHIFT); if (result) return result; + st->chip_config.fsr = INV_FSR_2000DPS; result = inv_i2c_single_write(st, reg->lpf, INV_FILTER_42HZ); @@ -239,21 +330,17 @@ static int inv_init_config(struct iio_dev *indio_dev) st->chip_config.lpf = INV_FILTER_42HZ; result = inv_i2c_single_write(st, reg->sample_rate_div, - ONE_K_HZ/INIT_FIFO_RATE - 1); - if (result) - return result; - result = inv_i2c_single_write(st, REG_INT_PIN_CFG, - st->plat_data.int_config & (~BIT_BYPASS_EN)); + ONE_K_HZ / INIT_FIFO_RATE - 1); if (result) return result; st->chip_config.fifo_rate = INIT_FIFO_RATE; - st->irq_dur_us = INIT_DUR_TIME; + st->chip_config.new_fifo_rate = INIT_FIFO_RATE; + st->irq_dur_ns = INIT_DUR_TIME; st->chip_config.prog_start_addr = DMP_START_ADDR; - st->chip_config.gyro_enable = 1; - st->chip_config.gyro_fifo_enable = 1; + st->chip_config.dmp_output_rate = INIT_DMP_OUTPUT_RATE; + st->self_test.samples = INIT_ST_SAMPLES; + st->self_test.threshold = INIT_ST_THRESHOLD; if (INV_ITG3500 != st->chip_type) { - st->chip_config.accl_enable = 1; - st->chip_config.accl_fifo_enable = 1; st->chip_config.accl_fs = INV_FS_02G; result = inv_i2c_single_write(st, reg->accl_config, (INV_FS_02G << ACCL_CONFIG_FSR_SHIFT)); @@ -262,13 +349,30 @@ static int inv_init_config(struct iio_dev *indio_dev) st->tap.time = INIT_TAP_TIME; st->tap.thresh = INIT_TAP_THRESHOLD; st->tap.min_count = INIT_TAP_MIN_COUNT; + st->smd.threshold = MPU_INIT_SMD_THLD; + st->smd.delay = MPU_INIT_SMD_DELAY_THLD; + st->smd.delay2 = MPU_INIT_SMD_DELAY2_THLD; + + result = inv_i2c_single_write(st, REG_ACCEL_MOT_DUR, + INIT_MOT_DUR); + if (result) + return result; + st->mot_int.mot_dur = INIT_MOT_DUR; + + result = inv_i2c_single_write(st, REG_ACCEL_MOT_THR, + INIT_MOT_THR); + if (result) + return result; + st->mot_int.mot_thr = INIT_MOT_THR; } + return 0; } + /** * inv_compass_scale_show() - show compass scale. */ -static int inv_compass_scale_show(struct inv_gyro_state_s *st, int *scale) +static int inv_compass_scale_show(struct inv_mpu_iio_s *st, int *scale) { if (COMPASS_ID_AK8975 == st->plat_data.sec_slave_id) *scale = DATA_AKM8975_SCALE; @@ -281,7 +385,26 @@ static int inv_compass_scale_show(struct inv_gyro_state_s *st, int *scale) *scale = DATA_AKM8963_SCALE0; else return -EINVAL; - *scale *= (1L << 15); + + return IIO_VAL_INT; +} + +/** + * inv_sensor_show() - Read gyro/accel data directly from registers. + */ +static int inv_sensor_show(struct inv_mpu_iio_s *st, int reg, int axis, + int *val) +{ + int ind, result; + u8 d[2]; + + ind = (axis - IIO_MOD_X) * 2; + result = i2c_smbus_read_i2c_block_data(st->client, + reg + ind, 2, d); + if (result != 2) + return -EINVAL; + *val = (short)be16_to_cpup((__be16 *)(d)); + return IIO_VAL_INT; } @@ -289,59 +412,109 @@ static int inv_compass_scale_show(struct inv_gyro_state_s *st, int *scale) * mpu_read_raw() - read raw method. */ static int mpu_read_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, - int *val, - int *val2, - long mask) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct iio_chan_spec const *chan, + int *val, int *val2, long mask) +{ + struct inv_mpu_iio_s *st = iio_priv(indio_dev); int result; - if (st->chip_config.is_asleep) - return -EINVAL; + switch (mask) { case 0: - if (chan->type == IIO_ANGL_VEL) { - *val = st->raw_gyro[chan->channel2 - IIO_MOD_X]; - return IIO_VAL_INT; - } - if (chan->type == IIO_ACCEL) { - *val = st->raw_accel[chan->channel2 - IIO_MOD_X]; - return IIO_VAL_INT; - } - if (chan->type == IIO_MAGN) { + /* if enabled, power is on already */ + if (!st->chip_config.enable) + return -EBUSY; + switch (chan->type) { + case IIO_ANGL_VEL: + if (!st->chip_config.gyro_enable) + return -EPERM; + return inv_sensor_show(st, st->reg.raw_gyro, + chan->channel2, val); + case IIO_ACCEL: + if (!st->chip_config.accl_enable) + return -EPERM; + return inv_sensor_show(st, st->reg.raw_accl, + chan->channel2, val); + case IIO_MAGN: + if (!st->chip_config.compass_enable) + return -EPERM; *val = st->raw_compass[chan->channel2 - IIO_MOD_X]; return IIO_VAL_INT; + case IIO_QUATERNION: + if (!(st->chip_config.dmp_on + && st->chip_config.quaternion_on)) + return -EPERM; + if (IIO_MOD_R == chan->channel2) + *val = st->raw_quaternion[0]; + else + *val = st->raw_quaternion[chan->channel2 - + IIO_MOD_X + 1]; + return IIO_VAL_INT; + default: + return -EINVAL; } - return -EINVAL; case IIO_CHAN_INFO_SCALE: - if (chan->type == IIO_ANGL_VEL) { - *val = (1 << st->chip_config.fsr)*GYRO_DPS_SCALE; + switch (chan->type) { + case IIO_ANGL_VEL: + { + const s16 gyro_scale[] = {250, 500, 1000, 2000}; + + *val = gyro_scale[st->chip_config.fsr]; + return IIO_VAL_INT; } - if (chan->type == IIO_ACCEL) { - *val = (2 << st->chip_config.accl_fs); + case IIO_ACCEL: + { + const s16 accel_scale[] = {2, 4, 8, 16}; + *val = accel_scale[st->chip_config.accl_fs] * + st->chip_info.multi; return IIO_VAL_INT; } - if (chan->type == IIO_MAGN) + case IIO_MAGN: return inv_compass_scale_show(st, val); - return -EINVAL; + default: + return -EINVAL; + } case IIO_CHAN_INFO_CALIBBIAS: if (st->chip_config.self_test_run_once == 0) { + /* This can only be run when enable is zero */ + if (st->chip_config.enable) + return -EBUSY; + mutex_lock(&indio_dev->mlock); + + result = inv_power_up_self_test(st); + if (result) + goto error_info_calibbias; result = inv_do_test(st, 0, st->gyro_bias, st->accel_bias); if (result) - return result; + goto error_info_calibbias; st->chip_config.self_test_run_once = 1; +error_info_calibbias: + /* Reset Accel and Gyro full scale range + back to default value */ + inv_recover_setting(st); + mutex_unlock(&indio_dev->mlock); } - if (chan->type == IIO_ANGL_VEL) { + switch (chan->type) { + case IIO_ANGL_VEL: *val = st->gyro_bias[chan->channel2 - IIO_MOD_X]; return IIO_VAL_INT; + case IIO_ACCEL: + *val = st->accel_bias[chan->channel2 - IIO_MOD_X] * + st->chip_info.multi; + return IIO_VAL_INT; + default: + return -EINVAL; } - if (chan->type == IIO_ACCEL) { - *val = st->accel_bias[chan->channel2 - IIO_MOD_X]; + case IIO_CHAN_INFO_OFFSET: + switch (chan->type) { + case IIO_ACCEL: + *val = st->input_accel_bias[chan->channel2 - IIO_MOD_X]; return IIO_VAL_INT; + default: + return -EINVAL; } - return -EINVAL; default: return -EINVAL; } @@ -350,7 +523,7 @@ static int mpu_read_raw(struct iio_dev *indio_dev, /** * inv_write_fsr() - Configure the gyro's scale range. */ -static int inv_write_fsr(struct inv_gyro_state_s *st, int fsr) +static int inv_write_fsr(struct inv_mpu_iio_s *st, int fsr) { struct inv_reg_map_s *reg; int result; @@ -360,68 +533,65 @@ static int inv_write_fsr(struct inv_gyro_state_s *st, int fsr) if (fsr == st->chip_config.fsr) return 0; - if (INV_MPU3050 == st->chip_type) { + if (INV_MPU3050 == st->chip_type) result = inv_i2c_single_write(st, reg->lpf, (fsr << GYRO_CONFIG_FSR_SHIFT) | st->chip_config.lpf); - } else { + else result = inv_i2c_single_write(st, reg->gyro_config, fsr << GYRO_CONFIG_FSR_SHIFT); - } + if (result) return result; st->chip_config.fsr = fsr; + return 0; } /** * inv_write_accel_fs() - Configure the accelerometer's scale range. */ -static int inv_write_accel_fs(struct inv_gyro_state_s *st, int fs) +static int inv_write_accel_fs(struct inv_mpu_iio_s *st, int fs) { int result; struct inv_reg_map_s *reg; - reg = &st->reg; + reg = &st->reg; if (fs < 0 || fs > MAX_ACCL_FS_PARAM) return -EINVAL; if (fs == st->chip_config.accl_fs) return 0; - if (INV_MPU3050 == st->chip_type) { + if (INV_MPU3050 == st->chip_type) result = st->mpu_slave->set_fs(st, fs); - if (result) - return result; - } else { + else result = inv_i2c_single_write(st, reg->accl_config, (fs << ACCL_CONFIG_FSR_SHIFT)); - if (result) - return result; - } - /* reset fifo because the data could be mixed with old bad data */ + if (result) + return result; + st->chip_config.accl_fs = fs; + return 0; } + /** * inv_write_compass_scale() - Configure the compass's scale range. */ -static int inv_write_compass_scale(struct inv_gyro_state_s *st, int data) +static int inv_write_compass_scale(struct inv_mpu_iio_s *st, int data) { char d, en; int result; if (COMPASS_ID_AK8963 != st->plat_data.sec_slave_id) return 0; - if (data) - en = 1; - else - en = 0; + en = !!data; if (st->compass_scale == en) return 0; - d = (1 | (st->compass_scale << AKM8963_SCALE_SHIFT)); + d = (DATA_AKM_MODE_SM | (st->compass_scale << AKM8963_SCALE_SHIFT)); result = inv_i2c_single_write(st, REG_I2C_SLV1_DO, d); if (result) return result; st->compass_scale = en; - return 0; + return 0; } /** @@ -432,1083 +602,506 @@ static int mpu_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); int result; - if (st->chip_config.is_asleep) - return -EPERM; + + if (st->chip_config.enable) + return -EBUSY; + mutex_lock(&indio_dev->mlock); + result = st->set_power_state(st, true); + if (result) { + mutex_unlock(&indio_dev->mlock); + return result; + } + switch (mask) { case IIO_CHAN_INFO_SCALE: - result = -EINVAL; - if (chan->type == IIO_ANGL_VEL) + switch (chan->type) { + case IIO_ANGL_VEL: result = inv_write_fsr(st, val); - if (chan->type == IIO_ACCEL) + break; + case IIO_ACCEL: result = inv_write_accel_fs(st, val); - if (chan->type == IIO_MAGN) + break; + case IIO_MAGN: result = inv_write_compass_scale(st, val); - return result; + break; + default: + result = -EINVAL; + break; + } + break; + case IIO_CHAN_INFO_OFFSET: + switch (chan->type) { + case IIO_ACCEL: + if (!st->chip_config.firmware_loaded) { + result = -EPERM; + goto error_write_raw; + } + result = inv_set_accel_bias_dmp(st); + if (result) + goto error_write_raw; + st->input_accel_bias[chan->channel2 - IIO_MOD_X] = val; + result = 0; + break; + default: + result = -EINVAL; + break; + } + break; default: - return -EINVAL; + result = -EINVAL; + break; } - return 0; -} -/** - * inv_set_lpf() - set low pass filer based on fifo rate. - */ -static int inv_set_lpf(struct inv_gyro_state_s *st, int rate) -{ - const short hz[] = {188, 98, 42, 20, 10, 5}; - const int d[] = {INV_FILTER_188HZ, INV_FILTER_98HZ, - INV_FILTER_42HZ, INV_FILTER_20HZ, - INV_FILTER_10HZ, INV_FILTER_5HZ}; - int i, h, data, result; - struct inv_reg_map_s *reg; - reg = &st->reg; - h = (rate >> 1); - i = 0; - while ((h < hz[i]) && (i < ARRAY_SIZE(d))) - i++; - if (i == ARRAY_SIZE(d)) - i -= 1; - data = d[i]; - if (INV_MPU3050 == st->chip_type) { - if (st->mpu_slave != NULL) { - result = st->mpu_slave->set_lpf(st, rate); - if (result) - return result; - } - result = inv_i2c_single_write(st, reg->lpf, data | - (st->chip_config.fsr << GYRO_CONFIG_FSR_SHIFT)); - if (result) - return result; - } else - result = inv_i2c_single_write(st, reg->lpf, data); - if (result) - return result; - st->chip_config.lpf = data; - return 0; +error_write_raw: + result |= st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); + + return result; } /** * inv_fifo_rate_store() - Set fifo rate. */ -static ssize_t inv_fifo_rate_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static int inv_fifo_rate_store(struct inv_mpu_iio_s *st, int fifo_rate) { - unsigned long fifo_rate; - unsigned char data; - int result; - struct inv_gyro_state_s *st; - struct inv_reg_map_s *reg; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - reg = &st->reg; - - if (st->chip_config.is_asleep) - return -EPERM; - if (kstrtoul(buf, 10, &fifo_rate)) - return -EINVAL; if ((fifo_rate < MIN_FIFO_RATE) || (fifo_rate > MAX_FIFO_RATE)) return -EINVAL; if (fifo_rate == st->chip_config.fifo_rate) - return count; + return 0; + if (st->chip_config.has_compass) { - data = COMPASS_RATE_SCALE*fifo_rate/ONE_K_HZ; - if (data > 0) - data -= 1; - st->compass_divider = data; + st->compass_divider = COMPASS_RATE_SCALE * fifo_rate / + ONE_K_HZ; + if (st->compass_divider > 0) + st->compass_divider -= 1; st->compass_counter = 0; - /* I2C_MST_DLY is set according to sample rate, - AKM cannot be read or set at sample rate higher than 100Hz*/ - result = inv_i2c_single_write(st, REG_I2C_SLV4_CTRL, data); - if (result) - return result; } - data = ONE_K_HZ / fifo_rate - 1; - result = inv_i2c_single_write(st, reg->sample_rate_div, data); - if (result) - return result; - st->chip_config.fifo_rate = fifo_rate; - result = inv_set_lpf(st, fifo_rate); - if (result) - return result; - st->irq_dur_us = (data + 1) * ONE_K_HZ; - st->last_isr_time = iio_get_time_ns(); - return count; -} -/** - * inv_fifo_rate_show() - Get the current sampling rate. - */ -static ssize_t inv_fifo_rate_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.fifo_rate); -} + st->irq_dur_ns = (ONE_K_HZ / fifo_rate) * NSEC_PER_MSEC; + st->chip_config.new_fifo_rate = fifo_rate; -/** - * inv_power_state_store() - Turn device on/off. - */ -static ssize_t inv_power_state_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - int result; - unsigned long power_state; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - if (kstrtoul(buf, 10, &power_state)) - return -EINVAL; - if (!power_state == st->chip_config.is_asleep) - return count; - result = inv_set_power_state(st, power_state); - return count; + return 0; } /** - * inv_power_state_show() - Check if the device is on or in sleep mode. + * inv_reg_dump_show() - Register dump for testing. */ -static ssize_t inv_power_state_show(struct device *dev, +static ssize_t inv_reg_dump_show(struct device *dev, struct device_attribute *attr, char *buf) { + int ii; + char data; + ssize_t bytes_printed = 0; struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->chip_config.is_asleep) - return sprintf(buf, "0\n"); - else - return sprintf(buf, "1\n"); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + + mutex_lock(&indio_dev->mlock); + if (!st->chip_config.enable) + st->set_power_state(st, true); + for (ii = 0; ii < st->hw->num_reg; ii++) { + /* don't read fifo r/w register */ + if (ii == st->reg.fifo_r_w) + data = 0; + else + inv_i2c_read(st, ii, 1, &data); + bytes_printed += sprintf(buf + bytes_printed, "%#2x: %#2x\n", + ii, data); + } + if (!st->chip_config.enable) + st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); + + return bytes_printed; } -/** - * inv_firmware_loaded_store() - calling this function will change - * firmware load - */ -static ssize_t inv_firmware_loaded_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +int write_be32_key_to_mem(struct inv_mpu_iio_s *st, + u32 data, int key) { - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned long data, result; - result = kstrtoul(buf, 10, &data); - if (result) - return result; - if (data != 0) - return -EINVAL; - st->chip_config.firmware_loaded = 0; - st->chip_config.dmp_on = 0; - st->chip_config.quaternion_on = 0; - return count; + cpu_to_be32s(&data); + return mem_w_key(key, sizeof(data), (u8 *)&data); } + /** - * inv_firmware_loaded_show() - calling this function will show current - * firmware load status + * inv_quaternion_on() - calling this function will store + * current quaternion on */ -static ssize_t inv_firmware_loaded_show(struct device *dev, - struct device_attribute *attr, char *buf) +static int inv_quaternion_on(struct inv_mpu_iio_s *st, + struct iio_buffer *ring, bool en) { - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); + st->chip_config.quaternion_on = en; + if (!en) { + clear_bit(INV_MPU_SCAN_QUAT_R, ring->scan_mask); + clear_bit(INV_MPU_SCAN_QUAT_X, ring->scan_mask); + clear_bit(INV_MPU_SCAN_QUAT_Y, ring->scan_mask); + clear_bit(INV_MPU_SCAN_QUAT_Z, ring->scan_mask); + } - return sprintf(buf, "%d\n", st->chip_config.firmware_loaded); + return 0; } /** - * inv_lpa_mode_store() - store current low power settings + * inv_dmp_attr_store() - calling this function will store current + * dmp parameter settings */ -static ssize_t inv_lpa_mode_store(struct device *dev, +static ssize_t inv_dmp_attr_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned long result, lpa_mode; - unsigned char d; - struct inv_reg_map_s *reg; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, &lpa_mode); - if (result) - return result; - - reg = &st->reg; - result = inv_i2c_read(st, reg->pwr_mgmt_1, 1, &d); - if (result) - return result; - d &= ~BIT_CYCLE; - if (lpa_mode) - d |= BIT_CYCLE; - result = inv_i2c_single_write(st, reg->pwr_mgmt_1, d); - if (result) - return result; - st->chip_config.lpa_mode = lpa_mode; - return count; -} -/** - * inv_lpa_mode_show() - show current low power settings - */ -static ssize_t inv_lpa_mode_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.lpa_mode); -} - -/** - * inv_lpa_freq_store() - store current low power frequency setting. - */ -static ssize_t inv_lpa_freq_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned long result, lpa_freq; - unsigned char d; - struct inv_reg_map_s *reg; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, &lpa_freq); - if (result) - return result; - if (lpa_freq > MAX_LPA_FREQ_PARAM) - return -EINVAL; - reg = &st->reg; - result = inv_i2c_read(st, reg->pwr_mgmt_2, 1, &d); - if (result) - return result; - d &= ~BIT_LPA_FREQ; - d |= (unsigned char)(lpa_freq << LPA_FREQ_SHIFT); - result = inv_i2c_single_write(st, reg->pwr_mgmt_2, d); - if (result) - return result; - st->chip_config.lpa_freq = lpa_freq; - return count; -} -/** - * inv_lpa_freq_show() - show current low power frequency setting - */ -static ssize_t inv_lpa_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - switch (st->chip_config.lpa_freq) { - case 0: - return sprintf(buf, "1.25\n"); - case 1: - return sprintf(buf, "5\n"); - case 2: - return sprintf(buf, "20\n"); - case 3: - return sprintf(buf, "40\n"); - default: - return sprintf(buf, "0\n"); - } -} -/** - * inv_dmp_on_store() - calling this function will store current dmp on - */ -static ssize_t inv_dmp_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - st->chip_config.dmp_on = !!data; - return count; -} - -/** - * inv_dmp_on_show() - calling this function will show current dmp_on - */ -static ssize_t inv_dmp_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->chip_config.dmp_on); -} -/** - * inv_dmp_int_on_store() - calling this function will store current dmp int on - */ -static ssize_t inv_dmp_int_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - st->chip_config.dmp_int_on = !!data; - return count; -} - -/** - * inv_dmp_int_on_show() - calling this function will show current dmp_int_on - */ -static ssize_t inv_dmp_int_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->chip_config.dmp_int_on); -} - -/** - * inv_dmp_output_rate_store() - calling this function store dmp_output_rate - */ -static ssize_t inv_dmp_output_rate_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - unsigned int result, data; - st = iio_priv(indio_dev); - - CHECK_DMP - if (0 == data) - return -EINVAL; - result = inv_set_fifo_rate(st, data); - if (result) - return result; - st->chip_config.dmp_output_rate = data; - return count; -} - -/** - * inv_dmp_output_rate_show() - calling this shows dmp_output_rate - */ -static ssize_t inv_dmp_output_rate_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->chip_config.dmp_output_rate); -} - -/** - * inv_orientation_on_store() - calling this function will store - * current orientation on - */ -static ssize_t inv_orientation_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data, en; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - en = !!data; - result = inv_enable_orientation_dmp(st, en); - if (result) - return result; - st->chip_config.orientation_on = en; - return count; -} -/** - * inv_orientation_on_show() - calling this function will show - * current orientation_on - */ -static ssize_t inv_orientation_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.orientation_on); -} - -/** - * inv_display_orient_on_store() - calling this function will store - * current display_orient on - */ -static ssize_t inv_display_orient_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data, en; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - en = !!data; - result = inv_set_display_orient_interrupt_dmp(st, en); - if (result) - return result; - st->chip_config.display_orient_on = en; - return count; -} -/** - * inv_display_orient_on_show() - calling this function will show - * current display_orient_on - */ -static ssize_t inv_display_orient_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.display_orient_on); -} - -/** - * inv_quaternion_on_store() - calling this function will store - * current quaternion on - */ -static ssize_t inv_quaternion_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data, en; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct iio_buffer *ring = indio_dev->buffer; - st = iio_priv(indio_dev); - - CHECK_DMP - en = !!data; - result = inv_send_quaternion(st, en); - if (result) - return result; - st->chip_config.quaternion_on = en; - if (0 == en) { - clear_bit(INV_MPU_SCAN_QUAT_R, ring->scan_mask); - clear_bit(INV_MPU_SCAN_QUAT_X, ring->scan_mask); - clear_bit(INV_MPU_SCAN_QUAT_Y, ring->scan_mask); - clear_bit(INV_MPU_SCAN_QUAT_Z, ring->scan_mask); - } - - return count; -} -/** - * inv_quaternion_on_show() - calling this function will show - * current orientation_on - */ -static ssize_t inv_quaternion_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.quaternion_on); -} - -/** - * inv_tap_on_store() - calling this function will store current tap on - */ -static ssize_t inv_tap_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - st->chip_config.tap_on = !!data; - result = inv_enable_tap_dmp(st, st->chip_config.tap_on); - return count; -} - -/** - * inv_tap_on_show() - calling this function will show current tap_on - */ -static ssize_t inv_tap_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->chip_config.tap_on); -} -/** - * inv_tap_time_store() - calling this function will store current tap time - */ -static ssize_t inv_tap_time_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - result = inv_set_tap_time_dmp(st, data); - if (result) - return result; - st->tap.time = data; - return count; -} -/** - * inv_tap_time_show() - calling this function will show current tap time - */ -static ssize_t inv_tap_time_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->tap.time); -} - -/** - * inv_tap_min_count_store() - calling this function will store tap count - */ -static ssize_t inv_tap_min_count_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - result = inv_set_min_taps_dmp(st, data); - if (result) - return result; - st->tap.min_count = data; - return count; -} -/** - * inv_tap_min_count_show() - calling this function show minimum count - */ -static ssize_t inv_tap_min_count_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->tap.min_count); -} - -/** - * inv_tap_threshold_store() - calling this function will store tap threshold - */ -static ssize_t inv_tap_threshold_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - result = inv_set_tap_threshold_dmp(st, INV_TAP_AXIS_X, data); - if (result) - return result; - result = inv_set_tap_threshold_dmp(st, INV_TAP_AXIS_Y, data); - if (result) - return result; - result = inv_set_tap_threshold_dmp(st, INV_TAP_AXIS_Z, data); - if (result) - return result; - - st->tap.thresh = data; - return count; -} -/** - * inv_tap_thresh_show() - calling this function show current tap threshold - */ -static ssize_t inv_tap_threshold_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->tap.thresh); -} -/** - * inv_clk_src_show() - Show the device's clock source. - */ -static ssize_t inv_clk_src_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - - switch (st->chip_config.clk_src) { - case INV_CLK_INTERNAL: - return sprintf(buf, "INTERNAL\n"); - case INV_CLK_PLL: - return sprintf(buf, "Gyro PLL\n"); - default: - return -EPERM; - } -} -/** - * inv_reg_dump_show() - Register dump for testing. - * TODO: Only for testing. - */ -static ssize_t inv_reg_dump_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - int ii; - char data; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - ssize_t bytes_printed = 0; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - - for (ii = 0; ii < st->hw->num_reg; ii++) { - inv_i2c_read(st, ii, 1, &data); - bytes_printed += sprintf(buf+bytes_printed, "%#2x: %#2x\n", - ii, data); - } - return bytes_printed; -} - -/** - * inv_self_test_show() - self test result. 0 for fail; 1 for success. - * calling this function will trigger self test - * and return test result. - */ -static ssize_t inv_self_test_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - int result; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (INV_MPU3050 == st->chip_type) - result = 0; - else - result = inv_hw_self_test(st); - return sprintf(buf, "%d\n", result); -} -/** - * inv_key_show() - calling this function will show the key - * - */ -static ssize_t inv_key_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - unsigned char *key; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - key = st->plat_data.key; - return sprintf(buf, - "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", - key[0], key[1], key[2], key[3], - key[4], key[5], key[6], key[7], - key[8], key[9], key[10], key[11], - key[12], key[13], key[14], key[15]); -} -/** - * inv_gyro_matrix_show() - show orientation matrix - */ -static ssize_t inv_gyro_matrix_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - signed char *m; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - m = st->plat_data.orientation; - return sprintf(buf, - "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", - m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); -} -/** - * inv_accl_matrix_show() - show orientation matrix - */ -static ssize_t inv_accl_matrix_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - signed char *m; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->plat_data.sec_slave_type == SECONDARY_SLAVE_TYPE_ACCEL) - m = st->plat_data.secondary_orientation; - else - m = st->plat_data.orientation; - return sprintf(buf, - "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", - m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); -} -/** - * inv_compass_matrix_show() - show orientation matrix - */ -static ssize_t inv_compass_matrix_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - signed char *m; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->plat_data.sec_slave_type == SECONDARY_SLAVE_TYPE_COMPASS) - m = st->plat_data.secondary_orientation; - else - return -1; - return sprintf(buf, - "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", - m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); -} - -/** - * inv_flick_lower_store() - calling this function will store current - * flick lower bound - */ -static ssize_t inv_flick_lower_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - int result, data, out; - unsigned char *p; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtol(buf, 10, (long unsigned int *)&data); - if (result) - return result; - out = cpu_to_be32p(&data); - p = (unsigned char *)&out; - - result = mem_w_key(KEY_FLICK_LOWER, 4, p); - if (result) - return result; - st->flick.lower = data; - return count; -} - -/** - * inv_flick_lower_show() - calling this function will show current - * flick lower bound - */ -static ssize_t inv_flick_lower_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->flick.lower); -} -/** - * inv_flick_upper_store() - calling this function will store current - * flick upper bound - */ -static ssize_t inv_flick_upper_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned int result, data, out; - unsigned char *p; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, (long unsigned int *)&data); - if (result) - return result; - out = cpu_to_be32p(&data); - p = (unsigned char *)&out; - result = mem_w_key(KEY_FLICK_UPPER, 4, p); - if (result) - return result; - st->flick.upper = data; - return count; -} - -/** - * inv_flick_upper_show() - calling this function will show current - * flick upper bound - */ -static ssize_t inv_flick_upper_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->flick.upper); -} -/** - * inv_flick_counter_store() - calling this function will store current - * flick counter value - */ -static ssize_t inv_flick_counter_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned int result, data, out; - unsigned char *p; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, (long unsigned int *)&data); - if (result) - return result; - out = cpu_to_be32p(&data); - p = (unsigned char *)&out; - result = mem_w_key(KEY_FLICK_COUNTER, 4, p); - if (result) - return result; - st->flick.counter = data; - - return count; -} - -/** - * inv_flick_counter_show() - calling this function will show current - * flick counter value - */ -static ssize_t inv_flick_counter_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->flick.counter); -} - -/** - * inv_flick_int_on_store() - calling this function will store current - * flick interrupt on value - */ -static ssize_t inv_flick_int_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned long result, data; - unsigned char d[4]; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, &data); - if (result) - return result; - if (data) - /* Use interrupt to signal when gesture was observed */ - d[0] = DIND40+4; - else - d[0] = DINAA0+8; - result = mem_w_key(KEY_CGNOTICE_INTR, 1, d); - if (result) - return result; - st->chip_config.flick_int_on = data; - return count; -} + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + struct iio_dev_attr *this_attr = to_iio_dev_attr(attr); + int result, data; -/** - * inv_flick_int_on_show() - calling this function will show current - * flick interrupt on value - */ -static ssize_t inv_flick_int_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.flick_int_on); -} -/** - * inv_flick_axis_store() - calling this function will store current - * flick axis value - */ -static ssize_t inv_flick_axis_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned long result, data; - unsigned char d[4]; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, &data); - if (result) - return result; + mutex_lock(&indio_dev->mlock); + if (st->chip_config.enable) { + result = -EBUSY; + goto dmp_attr_store_fail; + } + if (this_attr->address <= ATTR_DMP_DISPLAY_ORIENTATION_ON) { + if (!st->chip_config.firmware_loaded) { + result = -EINVAL; + goto dmp_attr_store_fail; + } + result = st->set_power_state(st, true); + if (result) + goto dmp_attr_store_fail; + } - if (data == 0) - d[0] = DINBC2; - else if (data == 2) - d[2] = DINBC6; - else - d[0] = DINBC4; - result = mem_w_key(KEY_CFG_FLICK_IN, 1, d); + result = kstrtoint(buf, 10, &data); if (result) - return result; - st->flick.axis = data; - - return count; -} + goto dmp_attr_store_fail; + switch (this_attr->address) { + case ATTR_DMP_SMD_ENABLE: + { + u8 on[] = {0, 1}; + u8 off[] = {0, 0}; + u8 *d; + if (data) + d = on; + else + d = off; + result = mem_w_key(KEY_SMD_ENABLE, ARRAY_SIZE(on), d); + if (result) + goto dmp_attr_store_fail; + st->chip_config.smd_enable = !!data; + } + break; + case ATTR_DMP_SMD_THLD: + if (data < 0 || data > SHRT_MAX) + goto dmp_attr_store_fail; + result = write_be32_key_to_mem(st, data << 16, + KEY_SMD_ACCEL_THLD); + if (result) + goto dmp_attr_store_fail; + st->smd.threshold = data; + break; + case ATTR_DMP_SMD_DELAY_THLD: + if (data < 0 || data > INT_MAX / MPU_DEFAULT_DMP_FREQ) + goto dmp_attr_store_fail; + result = write_be32_key_to_mem(st, data * MPU_DEFAULT_DMP_FREQ, + KEY_SMD_DELAY_THLD); + if (result) + goto dmp_attr_store_fail; + st->smd.delay = data; + break; + case ATTR_DMP_SMD_DELAY_THLD2: + if (data < 0 || data > INT_MAX / MPU_DEFAULT_DMP_FREQ) + goto dmp_attr_store_fail; + result = write_be32_key_to_mem(st, data * MPU_DEFAULT_DMP_FREQ, + KEY_SMD_DELAY2_THLD); + if (result) + goto dmp_attr_store_fail; + st->smd.delay2 = data; + break; + case ATTR_DMP_TAP_ON: + result = inv_enable_tap_dmp(st, !!data); + if (result) + goto dmp_attr_store_fail; + st->chip_config.tap_on = !!data; + break; + case ATTR_DMP_TAP_THRESHOLD: { + const char ax[] = {INV_TAP_AXIS_X, INV_TAP_AXIS_Y, + INV_TAP_AXIS_Z}; + int i; + if (data < 0 || data > USHRT_MAX) { + result = -EINVAL; + goto dmp_attr_store_fail; + } + for (i = 0; i < ARRAY_SIZE(ax); i++) { + result = inv_set_tap_threshold_dmp(st, ax[i], data); + if (result) + goto dmp_attr_store_fail; + } + st->tap.thresh = data; + break; + } + case ATTR_DMP_TAP_MIN_COUNT: + if (data < 0 || data > USHRT_MAX) { + result = -EINVAL; + goto dmp_attr_store_fail; + } + result = inv_set_min_taps_dmp(st, data); + if (result) + goto dmp_attr_store_fail; + st->tap.min_count = data; + break; + case ATTR_DMP_TAP_TIME: + if (data < 0 || data > USHRT_MAX) { + result = -EINVAL; + goto dmp_attr_store_fail; + } + result = inv_set_tap_time_dmp(st, data); + if (result) + goto dmp_attr_store_fail; + st->tap.time = data; + break; + case ATTR_DMP_DISPLAY_ORIENTATION_ON: + result = inv_set_display_orient_interrupt_dmp(st, !!data); + if (result) + goto dmp_attr_store_fail; + st->chip_config.display_orient_on = !!data; + break; + /* from here, power of chip is not turned on */ + case ATTR_DMP_ON: + st->chip_config.dmp_on = !!data; + break; + case ATTR_DMP_INT_ON: + st->chip_config.dmp_int_on = !!data; + break; + case ATTR_DMP_EVENT_INT_ON: + st->chip_config.dmp_event_int_on = !!data; + break; + case ATTR_DMP_OUTPUT_RATE: + if (data <= 0 || data > MAX_DMP_OUTPUT_RATE) { + result = -EINVAL; + goto dmp_attr_store_fail; + } + st->chip_config.dmp_output_rate = data; + if (st->chip_config.has_compass) { + st->compass_dmp_divider = COMPASS_RATE_SCALE * data / + ONE_K_HZ; + if (st->compass_dmp_divider > 0) + st->compass_dmp_divider -= 1; + st->compass_counter = 0; + } + break; + case ATTR_DMP_QUATERNION_ON: + result = inv_quaternion_on(st, indio_dev->buffer, !!data); + break; +#ifdef CONFIG_INV_TESTING + case ATTR_DEBUG_SMD_ENABLE_TESTP1: + { + u8 d[] = {0x42}; + result = st->set_power_state(st, true); + if (result) + goto dmp_attr_store_fail; + result = mem_w_key(KEY_SMD_ENABLE_TESTPT1, ARRAY_SIZE(d), d); + if (result) + goto dmp_attr_store_fail; + } + break; + case ATTR_DEBUG_SMD_ENABLE_TESTP2: + { + u8 d[] = {0x42}; + result = st->set_power_state(st, true); + if (result) + goto dmp_attr_store_fail; + result = mem_w_key(KEY_SMD_ENABLE_TESTPT2, ARRAY_SIZE(d), d); + if (result) + goto dmp_attr_store_fail; + } + break; +#endif + default: + result = -EINVAL; + goto dmp_attr_store_fail; + } -/** - * inv_flick_axis_show() - calling this function will show current - * flick axis value - */ -static ssize_t inv_flick_axis_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->flick.axis); -} -/** - * inv_flick_msg_on_store() - calling this function will store current - * flick message on value - */ -static ssize_t inv_flick_msg_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned int result, data, out; - unsigned char *p; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, (long unsigned int *)&data); - if (result) - return result; - if (data) - data = DATA_MSG_ON; - out = cpu_to_be32p(&data); - p = (unsigned char *)&out; - result = mem_w_key(KEY_FLICK_MSG, 4, p); +dmp_attr_store_fail: + if ((this_attr->address <= ATTR_DMP_DISPLAY_ORIENTATION_ON) && + (!st->chip_config.enable)) + result |= st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); if (result) return result; - st->flick.msg_on = data; return count; } /** - * inv_flick_msg_on_show() - calling this function will show current - * flick message on value + * inv_attr_show() - calling this function will show current + * dmp parameters. */ -static ssize_t inv_flick_msg_on_show(struct device *dev, +static ssize_t inv_attr_show(struct device *dev, struct device_attribute *attr, char *buf) { struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->flick.msg_on); -} + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + struct iio_dev_attr *this_attr = to_iio_dev_attr(attr); + int result; + s8 *m; + + switch (this_attr->address) { + case ATTR_DMP_SMD_ENABLE: + return sprintf(buf, "%d\n", st->chip_config.smd_enable); + case ATTR_DMP_SMD_THLD: + return sprintf(buf, "%d\n", st->smd.threshold); + case ATTR_DMP_SMD_DELAY_THLD: + return sprintf(buf, "%d\n", st->smd.delay); + case ATTR_DMP_SMD_DELAY_THLD2: + return sprintf(buf, "%d\n", st->smd.delay2); + case ATTR_DMP_TAP_ON: + return sprintf(buf, "%d\n", st->chip_config.tap_on); + case ATTR_DMP_TAP_THRESHOLD: + return sprintf(buf, "%d\n", st->tap.thresh); + case ATTR_DMP_TAP_MIN_COUNT: + return sprintf(buf, "%d\n", st->tap.min_count); + case ATTR_DMP_TAP_TIME: + return sprintf(buf, "%d\n", st->tap.time); + case ATTR_DMP_DISPLAY_ORIENTATION_ON: + return sprintf(buf, "%d\n", + st->chip_config.display_orient_on); + + case ATTR_DMP_ON: + return sprintf(buf, "%d\n", st->chip_config.dmp_on); + case ATTR_DMP_INT_ON: + return sprintf(buf, "%d\n", st->chip_config.dmp_int_on); + case ATTR_DMP_EVENT_INT_ON: + return sprintf(buf, "%d\n", st->chip_config.dmp_event_int_on); + case ATTR_DMP_OUTPUT_RATE: + return sprintf(buf, "%d\n", + st->chip_config.dmp_output_rate); + case ATTR_DMP_QUATERNION_ON: + return sprintf(buf, "%d\n", st->chip_config.quaternion_on); + + case ATTR_MOTION_LPA_ON: + return sprintf(buf, "%d\n", st->mot_int.mot_on); + case ATTR_MOTION_LPA_FREQ:{ + const char *f[] = {"1.25", "5", "20", "40"}; + return sprintf(buf, "%s\n", f[st->chip_config.lpa_freq]); + } + case ATTR_MOTION_LPA_DURATION: + return sprintf(buf, "%d\n", st->mot_int.mot_dur); + case ATTR_MOTION_LPA_THRESHOLD: + return sprintf(buf, "%d\n", st->mot_int.mot_thr); + + case ATTR_SELF_TEST_SAMPLES: + return sprintf(buf, "%d\n", st->self_test.samples); + case ATTR_SELF_TEST_THRESHOLD: + return sprintf(buf, "%d\n", st->self_test.threshold); + case ATTR_GYRO_ENABLE: + return sprintf(buf, "%d\n", st->chip_config.gyro_enable); + case ATTR_ACCL_ENABLE: + return sprintf(buf, "%d\n", st->chip_config.accl_enable); + case ATTR_COMPASS_ENABLE: + return sprintf(buf, "%d\n", st->chip_config.compass_enable); + case ATTR_POWER_STATE: + return sprintf(buf, "%d\n", !fake_asleep); + case ATTR_FIRMWARE_LOADED: + return sprintf(buf, "%d\n", st->chip_config.firmware_loaded); + case ATTR_SAMPLING_FREQ: + return sprintf(buf, "%d\n", st->chip_config.new_fifo_rate); + + case ATTR_SELF_TEST: + if (st->chip_config.enable) + return -EBUSY; + mutex_lock(&indio_dev->mlock); + if (INV_MPU3050 == st->chip_type) + result = 1; + else + result = inv_hw_self_test(st); + mutex_unlock(&indio_dev->mlock); + return sprintf(buf, "%d\n", result); -/** - * inv_pedometer_steps_store() - calling this function will store current - * pedometer steps into MPU memory - */ -static ssize_t inv_pedometer_steps_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned int result, data, out; - unsigned char *p; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, (long unsigned int *)&data); - if (result) - return result; + case ATTR_GYRO_MATRIX: + m = st->plat_data.orientation; + return sprintf(buf, "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", + m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); + case ATTR_ACCL_MATRIX: + if (st->plat_data.sec_slave_type == SECONDARY_SLAVE_TYPE_ACCEL) + m = st->plat_data.secondary_orientation; + else + m = st->plat_data.orientation; + return sprintf(buf, "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", + m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); + case ATTR_COMPASS_MATRIX: + if (st->plat_data.sec_slave_type == + SECONDARY_SLAVE_TYPE_COMPASS) + m = st->plat_data.secondary_orientation; + else + return -ENODEV; + return sprintf(buf, "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", + m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); + case ATTR_SECONDARY_NAME:{ + const char *n[] = {"0", "AK8975", "AK8972", "AK8963", "BMA250"}; + if (COMPASS_ID_AK8975 == st->plat_data.sec_slave_id) + return sprintf(buf, "%s\n", n[1]); + else if (COMPASS_ID_AK8972 == st->plat_data.sec_slave_id) + return sprintf(buf, "%s\n", n[2]); + else if (COMPASS_ID_AK8963 == st->plat_data.sec_slave_id) + return sprintf(buf, "%s\n", n[3]); + else if (ACCEL_ID_BMA250 == st->plat_data.sec_slave_id) + return sprintf(buf, "%s\n", n[4]); + else + return sprintf(buf, "%s\n", n[0]); + } - out = cpu_to_be32p(&data); - p = (unsigned char *)&out; - result = mem_w_key(KEY_D_PEDSTD_STEPCTR, 4, p); - if (result) - return result; +#ifdef CONFIG_INV_TESTING + case ATTR_REG_WRITE: + return sprintf(buf, "1\n"); + case ATTR_DEBUG_SMD_EXE_STATE: + { + u8 d[2]; - return count; -} + result = st->set_power_state(st, true); + mpu_memory_read(st, st->i2c_addr, + inv_dmp_get_address(KEY_SMD_EXE_STATE), 2, d); + return sprintf(buf, "%d\n", (short)be16_to_cpup((__be16 *)(d))); + } + case ATTR_DEBUG_SMD_DELAY_CNTR: + { + u8 d[4]; -/** - * inv_pedometer_steps_show() - calling this function will store current - * pedometer steps into MPU memory - */ -static ssize_t inv_pedometer_steps_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - int result, data; - unsigned char d[4]; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->chip_config.is_asleep) + result = st->set_power_state(st, true); + mpu_memory_read(st, st->i2c_addr, + inv_dmp_get_address(KEY_SMD_DELAY_CNTR), 4, d); + return sprintf(buf, "%d\n", (int)be32_to_cpup((__be32 *)(d))); + } +#endif + default: return -EPERM; - result = mpu_memory_read(st->sl_handle, st->i2c_addr, - inv_dmp_get_address(KEY_D_PEDSTD_STEPCTR), 4, d); - if (result) - return result; - data = be32_to_cpup((int *)d); - return sprintf(buf, "%d\n", data); + } } -/** - * inv_pedometer_time_store() - calling this function will store current - * pedometer time into MPU memory - */ -static ssize_t inv_pedometer_time_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned int result, data, out; - unsigned char *p; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, (long unsigned int *)&data); - if (result) - return result; - out = cpu_to_be32p(&data); - p = (unsigned char *)&out; - result = mem_w_key(KEY_D_PEDSTD_TIMECTR, 4, p); - if (result) - return result; - return count; -} /** - * inv_pedometer_time_show() - calling this function will store current - * pedometer steps into MPU memory + * inv_dmp_display_orient_show() - calling this function will + * show orientation This event must use poll. */ -static ssize_t inv_pedometer_time_show(struct device *dev, +static ssize_t inv_dmp_display_orient_show(struct device *dev, struct device_attribute *attr, char *buf) { - int result, data; - unsigned char d[4]; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->chip_config.is_asleep) - return -EPERM; - result = mpu_memory_read(st->sl_handle, st->i2c_addr, - inv_dmp_get_address(KEY_D_PEDSTD_TIMECTR), 4, d); - if (result) - return result; - data = be32_to_cpup((int *)d); - return sprintf(buf, "%d\n", data*20); + struct inv_mpu_iio_s *st = iio_priv(dev_get_drvdata(dev)); + return sprintf(buf, "%d\n", st->display_orient_data); } /** - * inv_dmp_flick_show() - calling this function will show flick event. + * inv_accel_motion_show() - calling this function showes motion interrupt. * This event must use poll. */ -static ssize_t inv_dmp_flick_show(struct device *dev, +static ssize_t inv_accel_motion_show(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "1\n"); } -/** - * inv_dmp_orient_show() - calling this function will show orientation - * This event must use poll. - */ -static ssize_t inv_dmp_orient_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->orient_data); -} /** - * inv_dmp_display_orient_show() - calling this function will - * show orientation This event must use poll. + * inv_smd_show() - calling this function showes smd interrupt. + * This event must use poll. */ -static ssize_t inv_dmp_display_orient_show(struct device *dev, +static ssize_t inv_smd_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->display_orient_data); + return sprintf(buf, "1\n"); } /** @@ -1518,611 +1111,541 @@ static ssize_t inv_dmp_display_orient_show(struct device *dev, static ssize_t inv_dmp_tap_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(dev_get_drvdata(dev)); return sprintf(buf, "%d\n", st->tap_data); } + /** * inv_temperature_show() - Read temperature data directly from registers. */ static ssize_t inv_temperature_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct inv_reg_map_s *reg; - int result; + int result, cur_scale, cur_off; short temp; long scale_t; - unsigned char data[2]; - reg = &st->reg; + u8 data[2]; + const long scale[] = {3834792L, 3158064L, 3340827L}; + const long offset[] = {5383314L, 2394184L, 1376256L}; - if (st->chip_config.is_asleep) - return -EPERM; + reg = &st->reg; + mutex_lock(&indio_dev->mlock); + if (!st->chip_config.enable) + result = st->set_power_state(st, true); + else + result = 0; + if (result) { + mutex_unlock(&indio_dev->mlock); + return result; + } result = inv_i2c_read(st, reg->temperature, 2, data); + if (!st->chip_config.enable) + result |= st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); if (result) { - printk(KERN_ERR "Could not read temperature register.\n"); + pr_err("Could not read temperature register.\n"); return result; } temp = (signed short)(be16_to_cpup((short *)&data[0])); + switch (st->chip_type) { + case INV_MPU3050: + cur_scale = scale[0]; + cur_off = offset[0]; + break; + case INV_MPU6050: + cur_scale = scale[1]; + cur_off = offset[1]; + break; + case INV_MPU6500: + cur_scale = scale[2]; + cur_off = offset[2]; + break; + default: + return -EINVAL; + }; + scale_t = cur_off + + inv_q30_mult((int)temp << MPU_TEMP_SHIFT, cur_scale); - if (INV_MPU3050 == st->chip_type) - scale_t = MPU3050_TEMP_OFFSET + - inv_q30_mult((long)temp << MPU_TEMP_SHIFT, - MPU3050_TEMP_SCALE); - else - scale_t = MPU6050_TEMP_OFFSET + - inv_q30_mult((long)temp << MPU_TEMP_SHIFT, - MPU6050_TEMP_SCALE); - return sprintf(buf, "%ld %lld\n", scale_t, iio_get_time_ns()); + INV_I2C_INC_TEMPREAD(1); + + return sprintf(buf, "%ld %lld\n", scale_t, get_time_ns()); } -static int inv_switch_gyro_engine(struct inv_gyro_state_s *st, int en) + +/** + * inv_firmware_loaded() - calling this function will change + * firmware load + */ +static int inv_firmware_loaded(struct inv_mpu_iio_s *st, int data) { - struct inv_reg_map_s *reg; - unsigned char data, p; - int result; - reg = &st->reg; - if (INV_MPU3050 == st->chip_type) { - if (en) { - data = INV_CLK_PLL; - p = (BITS_3050_POWER1 | data); - result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); - if (result) - return result; - p = (BITS_3050_POWER2 | data); - result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); - if (result) - return result; - p = data; - result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); - if (result) - return result; - } else { - p = BITS_3050_GYRO_STANDBY; - result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); - if (result) - return result; - } - } else { - result = inv_i2c_read(st, reg->pwr_mgmt_2, 1, &data); - if (result) - return result; - if (en) - data &= (~BIT_PWR_GYRO_STBY); - else - data |= BIT_PWR_GYRO_STBY; - result = inv_i2c_single_write(st, reg->pwr_mgmt_2, data); - if (result) - return result; - msleep(SENSOR_UP_TIME); - } - if (en) - st->chip_config.clk_src = INV_CLK_PLL; - else - st->chip_config.clk_src = INV_CLK_INTERNAL; + if (data) + return -EINVAL; + st->chip_config.firmware_loaded = 0; + st->chip_config.dmp_on = 0; + st->chip_config.quaternion_on = 0; return 0; } -static int inv_switch_accl_engine(struct inv_gyro_state_s *st, int en) + +static int inv_switch_gyro_engine(struct inv_mpu_iio_s *st, bool en) { - struct inv_reg_map_s *reg; - unsigned char data; - int result; - reg = &st->reg; - if (INV_MPU3050 == st->chip_type) { - if (NULL == st->mpu_slave) - return -EPERM; - if (en) - result = st->mpu_slave->resume(st); - else - result = st->mpu_slave->suspend(st); - if (result) - return result; - } else { - result = inv_i2c_read(st, reg->pwr_mgmt_2, 1, &data); - if (result) - return result; - if (en) - data &= (~BIT_PWR_ACCL_STBY); - else - data |= BIT_PWR_ACCL_STBY; - result = inv_i2c_single_write(st, reg->pwr_mgmt_2, data); - if (result) - return result; - msleep(SENSOR_UP_TIME); - } - return 0; + return inv_switch_engine(st, en, BIT_PWR_GYRO_STBY); +} + +static int inv_switch_accl_engine(struct inv_mpu_iio_s *st, bool en) +{ + return inv_switch_engine(st, en, BIT_PWR_ACCL_STBY); } /** - * inv_gyro_enable_store() - Enable/disable gyro. + * inv_gyro_enable() - Enable/disable gyro. */ -static ssize_t inv_gyro_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static int inv_gyro_enable(struct inv_mpu_iio_s *st, + struct iio_buffer *ring, bool en) { - unsigned long data, en; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - struct iio_buffer *ring = indio_dev->buffer; - int result; - - if (st->chip_config.is_asleep) - return -EPERM; - if (st->chip_config.enable) - return -EPERM; - - result = kstrtoul(buf, 10, &data); - if (result) - return -EINVAL; - en = !!data; if (en == st->chip_config.gyro_enable) - return count; - result = inv_switch_gyro_engine(st, en); - if (result) - return result; - - if (0 == en) { + return 0; + if (!en) { st->chip_config.gyro_fifo_enable = 0; clear_bit(INV_MPU_SCAN_GYRO_X, ring->scan_mask); clear_bit(INV_MPU_SCAN_GYRO_Y, ring->scan_mask); clear_bit(INV_MPU_SCAN_GYRO_Z, ring->scan_mask); } st->chip_config.gyro_enable = en; - return count; -} -/** - * inv_gyro_enable_show() - Check if the FIFO and ring buffer are enabled. - */ -static ssize_t inv_gyro_enable_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.gyro_enable); + + return 0; } /** - * inv_accl_enable_store() - Enable/disable accl. + * inv_accl_enable() - Enable/disable accl. */ -static ssize_t inv_accl_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t inv_accl_enable(struct inv_mpu_iio_s *st, + struct iio_buffer *ring, bool en) { - unsigned long en, data; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - struct iio_buffer *ring = indio_dev->buffer; - int result; - - if (st->chip_config.is_asleep) - return -EPERM; - if (st->chip_config.enable) - return -EPERM; - result = kstrtoul(buf, 10, &data); - if (result) - return -EINVAL; - if (data) - en = 1; - else - en = 0; if (en == st->chip_config.accl_enable) - return count; - result = inv_switch_accl_engine(st, en); - if (result) - return result; - st->chip_config.accl_enable = en; - if (0 == en) { + return 0; + if (!en) { st->chip_config.accl_fifo_enable = 0; clear_bit(INV_MPU_SCAN_ACCL_X, ring->scan_mask); clear_bit(INV_MPU_SCAN_ACCL_Y, ring->scan_mask); clear_bit(INV_MPU_SCAN_ACCL_Z, ring->scan_mask); } - return count; + st->chip_config.accl_enable = en; + + return 0; } + /** - * inv_accl_enable_show() - Check if the FIFO and ring buffer are enabled. + * inv_compass_enable() - calling this function will store compass + * enable */ -static ssize_t inv_accl_enable_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t inv_compass_enable(struct inv_mpu_iio_s *st, + struct iio_buffer *ring, bool en) { - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.accl_enable); + if (en == st->chip_config.compass_enable) + return 0; + if (!en) { + st->chip_config.compass_fifo_enable = 0; + clear_bit(INV_MPU_SCAN_MAGN_X, ring->scan_mask); + clear_bit(INV_MPU_SCAN_MAGN_Y, ring->scan_mask); + clear_bit(INV_MPU_SCAN_MAGN_Z, ring->scan_mask); + } + st->chip_config.compass_enable = en; + + return 0; } /** - * inv_compass_en_store() - calling this function will store compass - * enable + * inv_attr_store() - calling this function will store current + * non-dmp parameter settings */ -static ssize_t inv_compass_en_store(struct device *dev, +static ssize_t inv_attr_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - unsigned long data, result, en; struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct iio_buffer *ring = indio_dev->buffer; - if (st->chip_config.is_asleep) - return -EPERM; - if (st->chip_config.enable) - return -EPERM; - result = kstrtoul(buf, 10, &data); + struct iio_dev_attr *this_attr = to_iio_dev_attr(attr); + int data; + u8 d; + int result; + + mutex_lock(&indio_dev->mlock); + if (st->chip_config.enable) { + result = -EBUSY; + goto attr_store_fail; + } + if (this_attr->address <= ATTR_MOTION_LPA_THRESHOLD) { + result = st->set_power_state(st, true); + if (result) + goto attr_store_fail; + } + + result = kstrtoint(buf, 10, &data); + if (result) + goto attr_store_fail; + switch (this_attr->address) { + case ATTR_MOTION_LPA_ON: + if (INV_MPU6500 == st->chip_type) { + if (data) + /* enable and put in MPU6500 mode */ + d = BIT_ACCEL_INTEL_ENABLE + | BIT_ACCEL_INTEL_MODE; + else + d = 0; + result = inv_i2c_single_write(st, + REG_6500_ACCEL_INTEL_CTRL, d); + if (result) + goto attr_store_fail; + } + st->mot_int.mot_on = !!data; + st->chip_config.lpa_mode = !!data; + break; + case ATTR_MOTION_LPA_FREQ: + result = inv_lpa_freq(st, data); + break; + case ATTR_MOTION_LPA_DURATION: + if (INV_MPU6500 != st->chip_type) { + result = inv_i2c_single_write(st, REG_ACCEL_MOT_DUR, + MPU6050_MOTION_DUR_DEFAULT); + if (result) + goto attr_store_fail; + } + st->mot_int.mot_dur = data; + break; + case ATTR_MOTION_LPA_THRESHOLD: + if ((data > MPU6XXX_MAX_MOTION_THRESH) || (data < 0)) { + result = -EINVAL; + goto attr_store_fail; + } + d = (u8)(data >> MPU6XXX_MOTION_THRESH_SHIFT); + data = (d << MPU6XXX_MOTION_THRESH_SHIFT); + result = inv_i2c_single_write(st, REG_ACCEL_MOT_THR, d); + if (result) + goto attr_store_fail; + st->mot_int.mot_thr = data; + break; + /* from now on, power is not turned on */ + case ATTR_SELF_TEST_SAMPLES: + if (data > ST_MAX_SAMPLES || data < 0) { + result = -EINVAL; + goto attr_store_fail; + } + st->self_test.samples = data; + break; + case ATTR_SELF_TEST_THRESHOLD: + if (data > ST_MAX_THRESHOLD || data < 0) { + result = -EINVAL; + goto attr_store_fail; + } + st->self_test.threshold = data; + case ATTR_GYRO_ENABLE: + result = st->gyro_en(st, ring, !!data); + break; + case ATTR_ACCL_ENABLE: + result = st->accl_en(st, ring, !!data); + break; + case ATTR_COMPASS_ENABLE: + result = inv_compass_enable(st, ring, !!data); + break; + case ATTR_POWER_STATE: + fake_asleep = !data; + break; + case ATTR_FIRMWARE_LOADED: + result = inv_firmware_loaded(st, data); + break; + case ATTR_SAMPLING_FREQ: + result = inv_fifo_rate_store(st, data); + break; + default: + result = -EINVAL; + goto attr_store_fail; + }; + +attr_store_fail: + if ((this_attr->address <= ATTR_MOTION_LPA_THRESHOLD) && + (!st->chip_config.enable)) + result |= st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); if (result) return result; - if (data) - en = 1; - else - en = 0; - if (en == st->chip_config.compass_enable) - return count; - st->chip_config.compass_enable = en; - if (0 == en) { - st->chip_config.compass_fifo_enable = 0; - clear_bit(INV_MPU_SCAN_MAGN_X, ring->scan_mask); - clear_bit(INV_MPU_SCAN_MAGN_Y, ring->scan_mask); - clear_bit(INV_MPU_SCAN_MAGN_Z, ring->scan_mask); - } return count; } + +#ifdef CONFIG_INV_TESTING /** - * inv_compass_en_show() - calling this function will show compass - * enable status + * inv_reg_write_store() - register write command for testing. + * Format: WSRRDD, where RR is the register in hex, + * and DD is the data in hex. */ -static ssize_t inv_compass_en_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t inv_reg_write_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + u32 result; + u8 wreg, wval; + int temp; + char local_buf[10]; + + if ((buf[0] != 'W' && buf[0] != 'w') || + (buf[1] != 'S' && buf[1] != 's')) + return -EINVAL; + if (strlen(buf) < 6) + return -EINVAL; + + strncpy(local_buf, buf, 7); + local_buf[6] = 0; + result = sscanf(&local_buf[4], "%x", &temp); + if (result == 0) + return -EINVAL; + wval = temp; + local_buf[4] = 0; + sscanf(&local_buf[2], "%x", &temp); + if (result == 0) + return -EINVAL; + wreg = temp; - return sprintf(buf, "%d\n", st->chip_config.compass_enable); + result = inv_i2c_single_write(st, wreg, wval); + if (result) + return result; + + return count; } +#endif /* CONFIG_INV_TESTING */ + +#define INV_MPU_CHAN(_type, _channel2, _index) \ + { \ + .type = _type, \ + .modified = 1, \ + .channel2 = _channel2, \ + .info_mask = (IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | \ + IIO_CHAN_INFO_SCALE_SHARED_BIT), \ + .scan_index = _index, \ + .scan_type = IIO_ST('s', 16, 16, 0) \ + } -static const struct iio_chan_spec gyro_channels[] = { - /*there is only one gyro, with modifier X, Y, Z - So it is not indexed. no modifier name, only simple, x, y,z - the scale should be shared while bias is not so each - axis has different bias*/ - { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_X, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_X, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_Y, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_Y, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_Z, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_Z, - .scan_type = IIO_ST('s', 16, 16, 0) - }, - IIO_CHAN_SOFT_TIMESTAMP(INV_MPU_SCAN_TIMESTAMP) -}; +#define INV_ACCL_CHAN(_type, _channel2, _index) \ + { \ + .type = _type, \ + .modified = 1, \ + .channel2 = _channel2, \ + .info_mask = (IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | \ + IIO_CHAN_INFO_SCALE_SHARED_BIT | \ + IIO_CHAN_INFO_OFFSET_SEPARATE_BIT), \ + .scan_index = _index, \ + .scan_type = IIO_ST('s', 16, 16, 0) \ + } -static const struct iio_chan_spec gyro_accel_channels[] = { - { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_X, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_X, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_Y, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_Y, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_Z, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_Z, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ACCEL, - .modified = 1, - .channel2 = IIO_MOD_X, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_ACCL_X, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ACCEL, - .modified = 1, - .channel2 = IIO_MOD_Y, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_ACCL_Y, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ACCEL, - .modified = 1, - .channel2 = IIO_MOD_Z, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_ACCL_Z, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_R, - .scan_index = INV_MPU_SCAN_QUAT_R, - .scan_type = IIO_ST('s', 32, 32, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_X, - .scan_index = INV_MPU_SCAN_QUAT_X, - .scan_type = IIO_ST('s', 32, 32, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_Y, - .scan_index = INV_MPU_SCAN_QUAT_Y, - .scan_type = IIO_ST('s', 32, 32, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_Z, - .scan_index = INV_MPU_SCAN_QUAT_Z, - .scan_type = IIO_ST('s', 32, 32, 0) - }, - IIO_CHAN_SOFT_TIMESTAMP(INV_MPU_SCAN_TIMESTAMP) -}; -static const struct iio_chan_spec gyro_accel_compass_channels[] = { - { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_X, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_X, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_Y, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_Y, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_Z, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_Z, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ACCEL, - .modified = 1, - .channel2 = IIO_MOD_X, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_ACCL_X, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ACCEL, - .modified = 1, - .channel2 = IIO_MOD_Y, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_ACCL_Y, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ACCEL, - .modified = 1, - .channel2 = IIO_MOD_Z, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_ACCL_Z, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_MAGN, - .modified = 1, - .channel2 = IIO_MOD_X, - .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_MAGN_X, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_MAGN, - .modified = 1, - .channel2 = IIO_MOD_Y, - .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_MAGN_Y, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_MAGN, - .modified = 1, - .channel2 = IIO_MOD_Z, - .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_MAGN_Z, - .scan_type = IIO_ST('s', 16, 16, 0) - }, - { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_R, - .scan_index = INV_MPU_SCAN_QUAT_R, - .scan_type = IIO_ST('s', 32, 32, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_X, - .scan_index = INV_MPU_SCAN_QUAT_X, - .scan_type = IIO_ST('s', 32, 32, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_Y, - .scan_index = INV_MPU_SCAN_QUAT_Y, - .scan_type = IIO_ST('s', 32, 32, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_Z, - .scan_index = INV_MPU_SCAN_QUAT_Z, - .scan_type = IIO_ST('s', 32, 32, 0) - }, - IIO_CHAN_SOFT_TIMESTAMP(INV_MPU_SCAN_TIMESTAMP) -}; +#define INV_MPU_QUATERNION_CHAN(_channel2, _index) \ + { \ + .type = IIO_QUATERNION, \ + .modified = 1, \ + .channel2 = _channel2, \ + .scan_index = _index, \ + .scan_type = IIO_ST('s', 32, 32, 0) \ + } -static struct inv_chip_chan_info chip_channel_info[] = { - { - .channels = gyro_channels, - .num_channels = ARRAY_SIZE(gyro_channels), - }, - { - .channels = gyro_accel_channels, - .num_channels = ARRAY_SIZE(gyro_accel_channels), - }, - { - .channels = gyro_accel_compass_channels, - .num_channels = ARRAY_SIZE(gyro_accel_compass_channels), +#define INV_MPU_MAGN_CHAN(_channel2, _index) \ + { \ + .type = IIO_MAGN, \ + .modified = 1, \ + .channel2 = _channel2, \ + .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, \ + .scan_index = _index, \ + .scan_type = IIO_ST('s', 16, 16, 0) \ } + +static const struct iio_chan_spec inv_mpu_channels[] = { + IIO_CHAN_SOFT_TIMESTAMP(INV_MPU_SCAN_TIMESTAMP), + INV_MPU_CHAN(IIO_ANGL_VEL, IIO_MOD_X, INV_MPU_SCAN_GYRO_X), + INV_MPU_CHAN(IIO_ANGL_VEL, IIO_MOD_Y, INV_MPU_SCAN_GYRO_Y), + INV_MPU_CHAN(IIO_ANGL_VEL, IIO_MOD_Z, INV_MPU_SCAN_GYRO_Z), + + INV_ACCL_CHAN(IIO_ACCEL, IIO_MOD_X, INV_MPU_SCAN_ACCL_X), + INV_ACCL_CHAN(IIO_ACCEL, IIO_MOD_Y, INV_MPU_SCAN_ACCL_Y), + INV_ACCL_CHAN(IIO_ACCEL, IIO_MOD_Z, INV_MPU_SCAN_ACCL_Z), + + INV_MPU_QUATERNION_CHAN(IIO_MOD_R, INV_MPU_SCAN_QUAT_R), + INV_MPU_QUATERNION_CHAN(IIO_MOD_X, INV_MPU_SCAN_QUAT_X), + INV_MPU_QUATERNION_CHAN(IIO_MOD_Y, INV_MPU_SCAN_QUAT_Y), + INV_MPU_QUATERNION_CHAN(IIO_MOD_Z, INV_MPU_SCAN_QUAT_Z), + + INV_MPU_MAGN_CHAN(IIO_MOD_X, INV_MPU_SCAN_MAGN_X), + INV_MPU_MAGN_CHAN(IIO_MOD_Y, INV_MPU_SCAN_MAGN_Y), + INV_MPU_MAGN_CHAN(IIO_MOD_Z, INV_MPU_SCAN_MAGN_Z), }; + /*constant IIO attribute */ -static IIO_CONST_ATTR_SAMP_FREQ_AVAIL("10 50 100 200 500"); -static IIO_DEV_ATTR_SAMP_FREQ(S_IRUGO | S_IWUSR, inv_fifo_rate_show, - inv_fifo_rate_store); -static DEVICE_ATTR(temperature, S_IRUGO, inv_temperature_show, NULL); -static DEVICE_ATTR(clock_source, S_IRUGO, inv_clk_src_show, NULL); -static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR, inv_power_state_show, - inv_power_state_store); -static DEVICE_ATTR(firmware_loaded, S_IRUGO | S_IWUSR, - inv_firmware_loaded_show, inv_firmware_loaded_store); -static DEVICE_ATTR(lpa_mode, S_IRUGO | S_IWUSR, inv_lpa_mode_show, - inv_lpa_mode_store); -static DEVICE_ATTR(lpa_freq, S_IRUGO | S_IWUSR, inv_lpa_freq_show, - inv_lpa_freq_store); +static IIO_CONST_ATTR_SAMP_FREQ_AVAIL("10 20 50 100 200 500"); + +/* special sysfs */ static DEVICE_ATTR(reg_dump, S_IRUGO, inv_reg_dump_show, NULL); -static DEVICE_ATTR(self_test, S_IRUGO, inv_self_test_show, NULL); -static DEVICE_ATTR(key, S_IRUGO, inv_key_show, NULL); -static DEVICE_ATTR(gyro_matrix, S_IRUGO, inv_gyro_matrix_show, NULL); -static DEVICE_ATTR(accl_matrix, S_IRUGO, inv_accl_matrix_show, NULL); -static DEVICE_ATTR(compass_matrix, S_IRUGO, inv_compass_matrix_show, NULL); -static DEVICE_ATTR(flick_lower, S_IRUGO | S_IWUSR, inv_flick_lower_show, - inv_flick_lower_store); -static DEVICE_ATTR(flick_upper, S_IRUGO | S_IWUSR, inv_flick_upper_show, - inv_flick_upper_store); -static DEVICE_ATTR(flick_counter, S_IRUGO | S_IWUSR, inv_flick_counter_show, - inv_flick_counter_store); -static DEVICE_ATTR(flick_message_on, S_IRUGO | S_IWUSR, inv_flick_msg_on_show, - inv_flick_msg_on_store); -static DEVICE_ATTR(flick_int_on, S_IRUGO | S_IWUSR, inv_flick_int_on_show, - inv_flick_int_on_store); -static DEVICE_ATTR(flick_axis, S_IRUGO | S_IWUSR, inv_flick_axis_show, - inv_flick_axis_store); -static DEVICE_ATTR(dmp_on, S_IRUGO | S_IWUSR, inv_dmp_on_show, - inv_dmp_on_store); -static DEVICE_ATTR(dmp_int_on, S_IRUGO | S_IWUSR, inv_dmp_int_on_show, - inv_dmp_int_on_store); -static DEVICE_ATTR(dmp_output_rate, S_IRUGO | S_IWUSR, - inv_dmp_output_rate_show, inv_dmp_output_rate_store); -static DEVICE_ATTR(orientation_on, S_IRUGO | S_IWUSR, - inv_orientation_on_show, inv_orientation_on_store); -static DEVICE_ATTR(quaternion_on, S_IRUGO | S_IWUSR, - inv_quaternion_on_show, inv_quaternion_on_store); -static DEVICE_ATTR(display_orientation_on, S_IRUGO | S_IWUSR, - inv_display_orient_on_show, inv_display_orient_on_store); -static DEVICE_ATTR(tap_on, S_IRUGO | S_IWUSR, inv_tap_on_show, - inv_tap_on_store); -static DEVICE_ATTR(tap_time, S_IRUGO | S_IWUSR, inv_tap_time_show, - inv_tap_time_store); -static DEVICE_ATTR(tap_min_count, S_IRUGO | S_IWUSR, inv_tap_min_count_show, - inv_tap_min_count_store); -static DEVICE_ATTR(tap_threshold, S_IRUGO | S_IWUSR, inv_tap_threshold_show, - inv_tap_threshold_store); -static DEVICE_ATTR(pedometer_time, S_IRUGO | S_IWUSR, inv_pedometer_time_show, - inv_pedometer_time_store); -static DEVICE_ATTR(pedometer_steps, S_IRUGO | S_IWUSR, - inv_pedometer_steps_show, inv_pedometer_steps_store); -static DEVICE_ATTR(event_flick, S_IRUGO, inv_dmp_flick_show, NULL); -static DEVICE_ATTR(event_orientation, S_IRUGO, inv_dmp_orient_show, NULL); +static DEVICE_ATTR(temperature, S_IRUGO, inv_temperature_show, NULL); + +/* event based sysfs, needs poll to read */ static DEVICE_ATTR(event_tap, S_IRUGO, inv_dmp_tap_show, NULL); static DEVICE_ATTR(event_display_orientation, S_IRUGO, - inv_dmp_display_orient_show, NULL); -static DEVICE_ATTR(gyro_enable, S_IRUGO | S_IWUSR, inv_gyro_enable_show, - inv_gyro_enable_store); -static DEVICE_ATTR(accl_enable, S_IRUGO | S_IWUSR, inv_accl_enable_show, - inv_accl_enable_store); -static DEVICE_ATTR(compass_enable, S_IRUGO | S_IWUSR, inv_compass_en_show, - inv_compass_en_store); + inv_dmp_display_orient_show, NULL); +static DEVICE_ATTR(event_accel_motion, S_IRUGO, inv_accel_motion_show, NULL); +static DEVICE_ATTR(event_smd, S_IRUGO, inv_smd_show, NULL); + +/* DMP sysfs with power on/off */ +static IIO_DEVICE_ATTR(smd_enable, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_SMD_ENABLE); +static IIO_DEVICE_ATTR(smd_threshold, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_SMD_THLD); +static IIO_DEVICE_ATTR(smd_delay_threshold, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_SMD_DELAY_THLD); +static IIO_DEVICE_ATTR(smd_delay_threshold2, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_SMD_DELAY_THLD2); +static IIO_DEVICE_ATTR(tap_on, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_TAP_ON); +static IIO_DEVICE_ATTR(tap_threshold, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_TAP_THRESHOLD); +static IIO_DEVICE_ATTR(tap_min_count, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_TAP_MIN_COUNT); +static IIO_DEVICE_ATTR(tap_time, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_TAP_TIME); +static IIO_DEVICE_ATTR(display_orientation_on, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_DISPLAY_ORIENTATION_ON); + +/* DMP sysfs without power on/off */ +static IIO_DEVICE_ATTR(dmp_on, S_IRUGO | S_IWUSR, inv_attr_show, + inv_dmp_attr_store, ATTR_DMP_ON); +static IIO_DEVICE_ATTR(dmp_int_on, S_IRUGO | S_IWUSR, inv_attr_show, + inv_dmp_attr_store, ATTR_DMP_INT_ON); +static IIO_DEVICE_ATTR(dmp_event_int_on, S_IRUGO | S_IWUSR, inv_attr_show, + inv_dmp_attr_store, ATTR_DMP_EVENT_INT_ON); +static IIO_DEVICE_ATTR(dmp_output_rate, S_IRUGO | S_IWUSR, inv_attr_show, + inv_dmp_attr_store, ATTR_DMP_OUTPUT_RATE); +static IIO_DEVICE_ATTR(quaternion_on, S_IRUGO | S_IWUSR, inv_attr_show, + inv_dmp_attr_store, ATTR_DMP_QUATERNION_ON); + +/* non DMP sysfs with power on/off */ +static IIO_DEVICE_ATTR(motion_lpa_on, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_MOTION_LPA_ON); +static IIO_DEVICE_ATTR(motion_lpa_freq, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_MOTION_LPA_FREQ); +static IIO_DEVICE_ATTR(motion_lpa_duration, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_MOTION_LPA_DURATION); +static IIO_DEVICE_ATTR(motion_lpa_threshold, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_MOTION_LPA_THRESHOLD); + +/* non DMP sysfs without power on/off */ +static IIO_DEVICE_ATTR(self_test_samples, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_SELF_TEST_SAMPLES); +static IIO_DEVICE_ATTR(self_test_threshold, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_SELF_TEST_THRESHOLD); +static IIO_DEVICE_ATTR(gyro_enable, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_GYRO_ENABLE); +static IIO_DEVICE_ATTR(accl_enable, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_ACCL_ENABLE); +static IIO_DEVICE_ATTR(compass_enable, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_COMPASS_ENABLE); +static IIO_DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_POWER_STATE); +static IIO_DEVICE_ATTR(firmware_loaded, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_FIRMWARE_LOADED); +static IIO_DEVICE_ATTR(sampling_frequency, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_SAMPLING_FREQ); + +/* show method only sysfs but with power on/off */ +static IIO_DEVICE_ATTR(self_test, S_IRUGO, inv_attr_show, NULL, + ATTR_SELF_TEST); + +/* show method only sysfs */ +static IIO_DEVICE_ATTR(gyro_matrix, S_IRUGO, inv_attr_show, NULL, + ATTR_GYRO_MATRIX); +static IIO_DEVICE_ATTR(accl_matrix, S_IRUGO, inv_attr_show, NULL, + ATTR_ACCL_MATRIX); +static IIO_DEVICE_ATTR(compass_matrix, S_IRUGO, inv_attr_show, NULL, + ATTR_COMPASS_MATRIX); +static IIO_DEVICE_ATTR(secondary_name, S_IRUGO, inv_attr_show, NULL, + ATTR_SECONDARY_NAME); + +#ifdef CONFIG_INV_TESTING +static IIO_DEVICE_ATTR(reg_write, S_IRUGO | S_IWUSR, inv_attr_show, + inv_reg_write_store, ATTR_REG_WRITE); +/* smd debug related sysfs */ +static IIO_DEVICE_ATTR(debug_smd_enable_testp1, S_IWUSR, NULL, + inv_dmp_attr_store, ATTR_DEBUG_SMD_ENABLE_TESTP1); +static IIO_DEVICE_ATTR(debug_smd_enable_testp2, S_IWUSR, NULL, + inv_dmp_attr_store, ATTR_DEBUG_SMD_ENABLE_TESTP2); +static IIO_DEVICE_ATTR(debug_smd_exe_state, S_IRUGO, inv_attr_show, + NULL, ATTR_DEBUG_SMD_EXE_STATE); +static IIO_DEVICE_ATTR(debug_smd_delay_cntr, S_IRUGO, inv_attr_show, + NULL, ATTR_DEBUG_SMD_DELAY_CNTR); +#endif static const struct attribute *inv_gyro_attributes[] = { - &dev_attr_gyro_enable.attr, - &dev_attr_temperature.attr, - &dev_attr_clock_source.attr, - &dev_attr_power_state.attr, + &iio_const_attr_sampling_frequency_available.dev_attr.attr, &dev_attr_reg_dump.attr, - &dev_attr_self_test.attr, - &dev_attr_key.attr, - &dev_attr_gyro_matrix.attr, + &dev_attr_temperature.attr, + &iio_dev_attr_self_test_samples.dev_attr.attr, + &iio_dev_attr_self_test_threshold.dev_attr.attr, + &iio_dev_attr_gyro_enable.dev_attr.attr, + &iio_dev_attr_power_state.dev_attr.attr, &iio_dev_attr_sampling_frequency.dev_attr.attr, - &iio_const_attr_sampling_frequency_available.dev_attr.attr, + &iio_dev_attr_self_test.dev_attr.attr, + &iio_dev_attr_gyro_matrix.dev_attr.attr, + &iio_dev_attr_secondary_name.dev_attr.attr, +#ifdef CONFIG_INV_TESTING + &iio_dev_attr_reg_write.dev_attr.attr, + &iio_dev_attr_debug_smd_enable_testp1.dev_attr.attr, + &iio_dev_attr_debug_smd_enable_testp2.dev_attr.attr, + &iio_dev_attr_debug_smd_exe_state.dev_attr.attr, + &iio_dev_attr_debug_smd_delay_cntr.dev_attr.attr, +#endif }; static const struct attribute *inv_mpu6050_attributes[] = { - &dev_attr_accl_enable.attr, - &dev_attr_accl_matrix.attr, - &dev_attr_firmware_loaded.attr, - &dev_attr_lpa_mode.attr, - &dev_attr_lpa_freq.attr, - &dev_attr_flick_lower.attr, - &dev_attr_flick_upper.attr, - &dev_attr_flick_counter.attr, - &dev_attr_flick_message_on.attr, - &dev_attr_flick_int_on.attr, - &dev_attr_flick_axis.attr, - &dev_attr_dmp_on.attr, - &dev_attr_dmp_int_on.attr, - &dev_attr_dmp_output_rate.attr, - &dev_attr_orientation_on.attr, - &dev_attr_quaternion_on.attr, - &dev_attr_display_orientation_on.attr, - &dev_attr_tap_on.attr, - &dev_attr_tap_time.attr, - &dev_attr_tap_min_count.attr, - &dev_attr_tap_threshold.attr, - &dev_attr_pedometer_time.attr, - &dev_attr_pedometer_steps.attr, - &dev_attr_event_flick.attr, - &dev_attr_event_orientation.attr, &dev_attr_event_display_orientation.attr, &dev_attr_event_tap.attr, + &dev_attr_event_accel_motion.attr, + &dev_attr_event_smd.attr, + &iio_dev_attr_smd_enable.dev_attr.attr, + &iio_dev_attr_smd_threshold.dev_attr.attr, + &iio_dev_attr_smd_delay_threshold.dev_attr.attr, + &iio_dev_attr_smd_delay_threshold2.dev_attr.attr, + &iio_dev_attr_tap_on.dev_attr.attr, + &iio_dev_attr_tap_threshold.dev_attr.attr, + &iio_dev_attr_tap_min_count.dev_attr.attr, + &iio_dev_attr_tap_time.dev_attr.attr, + &iio_dev_attr_display_orientation_on.dev_attr.attr, + &iio_dev_attr_dmp_on.dev_attr.attr, + &iio_dev_attr_dmp_int_on.dev_attr.attr, + &iio_dev_attr_dmp_event_int_on.dev_attr.attr, + &iio_dev_attr_dmp_output_rate.dev_attr.attr, + &iio_dev_attr_quaternion_on.dev_attr.attr, + &iio_dev_attr_motion_lpa_on.dev_attr.attr, + &iio_dev_attr_motion_lpa_freq.dev_attr.attr, + &iio_dev_attr_motion_lpa_duration.dev_attr.attr, + &iio_dev_attr_motion_lpa_threshold.dev_attr.attr, + &iio_dev_attr_accl_enable.dev_attr.attr, + &iio_dev_attr_firmware_loaded.dev_attr.attr, + &iio_dev_attr_accl_matrix.dev_attr.attr, }; static const struct attribute *inv_compass_attributes[] = { - &dev_attr_compass_matrix.attr, - &dev_attr_compass_enable.attr, + &iio_dev_attr_compass_enable.dev_attr.attr, + &iio_dev_attr_compass_matrix.dev_attr.attr, }; static const struct attribute *inv_mpu3050_attributes[] = { - &dev_attr_accl_matrix.attr, - &dev_attr_accl_enable.attr, + &iio_dev_attr_accl_enable.dev_attr.attr, + &iio_dev_attr_accl_matrix.dev_attr.attr, }; static struct attribute *inv_attributes[ARRAY_SIZE(inv_gyro_attributes) + ARRAY_SIZE(inv_mpu6050_attributes) + ARRAY_SIZE(inv_compass_attributes) + 1]; + static const struct attribute_group inv_attribute_group = { .name = "mpu", .attrs = inv_attributes @@ -2138,23 +1661,26 @@ static const struct iio_info mpu_info = { /** * inv_setup_compass() - Configure compass. */ -static int inv_setup_compass(struct inv_gyro_state_s *st) +static int inv_setup_compass(struct inv_mpu_iio_s *st) { int result; - unsigned char data[4]; + u8 data[4]; - result = inv_i2c_read(st, REG_YGOFFS_TC, 1, data); - if (result) - return result; - data[0] &= ~BIT_I2C_MST_VDDIO; - if (st->plat_data.level_shifter) - data[0] |= BIT_I2C_MST_VDDIO; - /*set up VDDIO register */ - result = inv_i2c_single_write(st, REG_YGOFFS_TC, data[0]); - if (result) - return result; + if (INV_MPU6050 == st->chip_type) { + result = inv_i2c_read(st, REG_YGOFFS_TC, 1, data); + if (result) + return result; + data[0] &= ~BIT_I2C_MST_VDDIO; + if (st->plat_data.level_shifter) + data[0] |= BIT_I2C_MST_VDDIO; + /*set up VDDIO register */ + result = inv_i2c_single_write(st, REG_YGOFFS_TC, data[0]); + if (result) + return result; + } /* set to bypass mode */ - result = inv_i2c_single_write(st, REG_INT_PIN_CFG, BIT_BYPASS_EN); + result = inv_i2c_single_write(st, REG_INT_PIN_CFG, + st->plat_data.int_config | BIT_BYPASS_EN); if (result) return result; /*read secondary i2c ID register */ @@ -2164,7 +1690,7 @@ static int inv_setup_compass(struct inv_gyro_state_s *st) if (data[0] != DATA_AKM_ID) return -ENXIO; /*set AKM to Fuse ROM access mode */ - result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PW_FR); + result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_FR); if (result) return result; result = inv_secondary_read(REG_AKM_SENSITIVITY, THREE_AXIS, @@ -2172,15 +1698,17 @@ static int inv_setup_compass(struct inv_gyro_state_s *st) if (result) return result; /*revert to power down mode */ - result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PW_DN); + result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PD); if (result) return result; - pr_err("senx=%d, seny=%d,senz=%d\n", - st->chip_info.compass_sens[0], - st->chip_info.compass_sens[1], - st->chip_info.compass_sens[2]); + pr_debug("%s senx=%d, seny=%d, senz=%d\n", + st->hw->name, + st->chip_info.compass_sens[0], + st->chip_info.compass_sens[1], + st->chip_info.compass_sens[2]); /*restore to non-bypass mode */ - result = inv_i2c_single_write(st, REG_INT_PIN_CFG, 0); + result = inv_i2c_single_write(st, REG_INT_PIN_CFG, + st->plat_data.int_config); if (result) return result; @@ -2218,29 +1746,19 @@ static int inv_setup_compass(struct inv_gyro_state_s *st) return result; /* output data for slave 1 is fixed, single measure mode*/ st->compass_scale = 1; - data[0] = 1; if (COMPASS_ID_AK8975 == st->plat_data.sec_slave_id) { - st->compass_st_upper[0] = DATA_AKM8975_ST_X_UP; - st->compass_st_upper[1] = DATA_AKM8975_ST_Y_UP; - st->compass_st_upper[2] = DATA_AKM8975_ST_Z_UP; - st->compass_st_lower[0] = DATA_AKM8975_ST_X_LW; - st->compass_st_lower[1] = DATA_AKM8975_ST_Y_LW; - st->compass_st_lower[2] = DATA_AKM8975_ST_Z_LW; + st->compass_st_upper = AKM8975_ST_Upper; + st->compass_st_lower = AKM8975_ST_Lower; + data[0] = DATA_AKM_MODE_SM; } else if (COMPASS_ID_AK8972 == st->plat_data.sec_slave_id) { - st->compass_st_upper[0] = DATA_AKM8972_ST_X_UP; - st->compass_st_upper[1] = DATA_AKM8972_ST_Y_UP; - st->compass_st_upper[2] = DATA_AKM8972_ST_Z_UP; - st->compass_st_lower[0] = DATA_AKM8972_ST_X_LW; - st->compass_st_lower[1] = DATA_AKM8972_ST_Y_LW; - st->compass_st_lower[2] = DATA_AKM8972_ST_Z_LW; + st->compass_st_upper = AKM8972_ST_Upper; + st->compass_st_lower = AKM8972_ST_Lower; + data[0] = DATA_AKM_MODE_SM; } else if (COMPASS_ID_AK8963 == st->plat_data.sec_slave_id) { - st->compass_st_upper[0] = DATA_AKM8963_ST_X_UP; - st->compass_st_upper[1] = DATA_AKM8963_ST_Y_UP; - st->compass_st_upper[2] = DATA_AKM8963_ST_Z_UP; - st->compass_st_lower[0] = DATA_AKM8963_ST_X_LW; - st->compass_st_lower[1] = DATA_AKM8963_ST_Y_LW; - st->compass_st_lower[2] = DATA_AKM8963_ST_Z_LW; - data[0] |= (st->compass_scale << AKM8963_SCALE_SHIFT); + st->compass_st_upper = AKM8963_ST_Upper; + st->compass_st_lower = AKM8963_ST_Lower; + data[0] = DATA_AKM_MODE_SM | + (st->compass_scale << AKM8963_SCALE_SHIFT); } result = inv_i2c_single_write(st, REG_I2C_SLV1_DO, data[0]); if (result) @@ -2251,15 +1769,56 @@ static int inv_setup_compass(struct inv_gyro_state_s *st) return result; } +static void inv_setup_func_ptr(struct inv_mpu_iio_s *st) +{ + if (st->chip_type == INV_MPU3050) { + st->set_power_state = set_power_mpu3050; + st->switch_gyro_engine = inv_switch_3050_gyro_engine; + st->switch_accl_engine = inv_switch_3050_accl_engine; + st->init_config = inv_init_config_mpu3050; + st->setup_reg = inv_setup_reg_mpu3050; + } else { + st->set_power_state = set_power_itg; + st->switch_gyro_engine = inv_switch_gyro_engine; + st->switch_accl_engine = inv_switch_accl_engine; + st->init_config = inv_init_config; + st->setup_reg = inv_setup_reg; + /*MPU6XXX special functions */ + st->compass_en = inv_compass_enable; + st->quaternion_en = inv_quaternion_on; + st->gyro_en = inv_gyro_enable; + st->accl_en = inv_accl_enable; + } +} + +static int inv_detect_6xxx(struct inv_mpu_iio_s *st) +{ + int result; + u8 d; + + result = inv_i2c_read(st, REG_WHOAMI, 1, &d); + if (result) + return result; + if (d == MPU6500_ID) { + st->chip_type = INV_MPU6500; + strcpy(st->name, "mpu6500"); + } else { + strcpy(st->name, "mpu6050"); + } + + return 0; +} + /** * inv_check_chip_type() - check and setup chip type. */ -static int inv_check_chip_type(struct inv_gyro_state_s *st, +static int inv_check_chip_type(struct inv_mpu_iio_s *st, const struct i2c_device_id *id) { struct inv_reg_map_s *reg; - int result, chan_index; + int result; int t_ind; + if (!strcmp(id->name, "itg3500")) st->chip_type = INV_ITG3500; else if (!strcmp(id->name, "mpu3050")) @@ -2267,86 +1826,131 @@ static int inv_check_chip_type(struct inv_gyro_state_s *st, else if (!strcmp(id->name, "mpu6050")) st->chip_type = INV_MPU6050; else if (!strcmp(id->name, "mpu9150")) - st->chip_type = INV_MPU9150; + st->chip_type = INV_MPU6050; + else if (!strcmp(id->name, "mpu6500")) + st->chip_type = INV_MPU6500; + else if (!strcmp(id->name, "mpu9250")) + st->chip_type = INV_MPU6500; + else if (!strcmp(id->name, "mpu6xxx")) + st->chip_type = INV_MPU6050; else return -EPERM; - st->hw = (struct inv_hw_s *)(hw_info + st->chip_type); + inv_setup_func_ptr(st); + st->hw = &hw_info[st->chip_type]; st->mpu_slave = NULL; - chan_index = CHAN_INDEX_GYRO; - if (INV_MPU9150 == st->chip_type) { - st->plat_data.sec_slave_type = SECONDARY_SLAVE_TYPE_COMPASS; - st->plat_data.sec_slave_id = COMPASS_ID_AK8975; - st->chip_config.has_compass = 1; - chan_index = CHAN_INDEX_GYRO_ACCL_MAGN; - } - if (SECONDARY_SLAVE_TYPE_ACCEL == st->plat_data.sec_slave_type) { - if (ACCEL_ID_BMA250 == st->plat_data.sec_slave_id) - inv_register_bma250_slave(st); - chan_index = CHAN_INDEX_GYRO_ACCL; - } - if (SECONDARY_SLAVE_TYPE_COMPASS == st->plat_data.sec_slave_type) - st->chip_config.has_compass = 1; - else - st->chip_config.has_compass = 0; - if (INV_MPU6050 == st->chip_type) { - if (st->chip_config.has_compass) - chan_index = CHAN_INDEX_GYRO_ACCL_MAGN; - else - chan_index = CHAN_INDEX_GYRO_ACCL; - } - st->chan_info = &chip_channel_info[chan_index]; reg = &st->reg; - if (INV_MPU3050 == st->chip_type) - inv_setup_reg_mpu3050(reg); - else - inv_setup_reg(reg); - st->chip_config.gyro_enable = 1; - result = inv_set_power_state(st, 1); + st->setup_reg(reg); + /* reset to make sure previous state are not there */ + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, BIT_H_RESET); + if (result) + return result; + msleep(POWER_UP_TIME); + /* toggle power state */ + result = st->set_power_state(st, false); if (result) return result; - if (INV_ITG3500 != st->chip_type && INV_MPU3050 != st->chip_type) { - result = inv_get_silicon_rev_mpu6050(st); - if (result) { - inv_i2c_single_write(st, reg->pwr_mgmt_1, - BIT_SLEEP | INV_CLK_PLL); + result = st->set_power_state(st, true); + if (result) + return result; + + if (!strcmp(id->name, "mpu6xxx")) { + /* for MPU6500, reading register need more time */ + msleep(POWER_UP_TIME); + result = inv_detect_6xxx(st); + if (result) return result; + } + + switch (st->chip_type) { + case INV_ITG3500: + st->num_channels = INV_CHANNEL_NUM_GYRO; + break; + case INV_MPU6050: + case INV_MPU6500: + if (SECONDARY_SLAVE_TYPE_COMPASS == + st->plat_data.sec_slave_type) { + st->chip_config.has_compass = 1; + st->num_channels = + INV_CHANNEL_NUM_GYRO_ACCL_QUANTERNION_MAGN; + } else { + st->chip_config.has_compass = 0; + st->num_channels = + INV_CHANNEL_NUM_GYRO_ACCL_QUANTERNION; + } + break; + case INV_MPU3050: + if (SECONDARY_SLAVE_TYPE_ACCEL == + st->plat_data.sec_slave_type) { + if (ACCEL_ID_BMA250 == st->plat_data.sec_slave_id) + inv_register_mpu3050_slave(st); + st->num_channels = INV_CHANNEL_NUM_GYRO_ACCL; + } else { + st->num_channels = INV_CHANNEL_NUM_GYRO; } + break; + default: + result = st->set_power_state(st, false); + return -ENODEV; } + switch (st->chip_type) { + case INV_MPU6050: + result = inv_get_silicon_rev_mpu6050(st); + break; + case INV_MPU6500: + result = inv_get_silicon_rev_mpu6500(st); + break; + default: + result = 0; + break; + } + if (result) { + pr_err("read silicon rev error\n"); + st->set_power_state(st, false); + return result; + } + /* turn off the gyro engine after OTP reading */ + result = st->switch_gyro_engine(st, false); + if (result) + return result; + result = st->switch_accl_engine(st, false); + if (result) + return result; if (st->chip_config.has_compass) { result = inv_setup_compass(st); if (result) { - inv_i2c_single_write(st, reg->pwr_mgmt_1, - BIT_SLEEP | INV_CLK_PLL); + pr_err("compass setup failed\n"); + st->set_power_state(st, false); return result; } } t_ind = 0; memcpy(&inv_attributes[t_ind], inv_gyro_attributes, - sizeof(inv_gyro_attributes)); - t_ind = ARRAY_SIZE(inv_gyro_attributes); + sizeof(inv_gyro_attributes)); + t_ind += ARRAY_SIZE(inv_gyro_attributes); if (INV_MPU3050 == st->chip_type && st->mpu_slave != NULL) { memcpy(&inv_attributes[t_ind], inv_mpu3050_attributes, - sizeof(inv_mpu3050_attributes)); + sizeof(inv_mpu3050_attributes)); t_ind += ARRAY_SIZE(inv_mpu3050_attributes); inv_attributes[t_ind] = NULL; return 0; } - if (chan_index > CHAN_INDEX_GYRO) { + if ((INV_MPU6050 == st->chip_type) || (INV_MPU6500 == st->chip_type)) { memcpy(&inv_attributes[t_ind], inv_mpu6050_attributes, - sizeof(inv_mpu6050_attributes)); + sizeof(inv_mpu6050_attributes)); t_ind += ARRAY_SIZE(inv_mpu6050_attributes); } - if (chan_index > CHAN_INDEX_GYRO_ACCL) { + if (st->chip_config.has_compass) { memcpy(&inv_attributes[t_ind], inv_compass_attributes, - sizeof(inv_compass_attributes)); + sizeof(inv_compass_attributes)); t_ind += ARRAY_SIZE(inv_compass_attributes); } inv_attributes[t_ind] = NULL; + return 0; } @@ -2367,6 +1971,7 @@ static int inv_create_dmp_sysfs(struct iio_dev *ind) { int result; result = sysfs_create_bin_file(&ind->dev.kobj, &dmp_firmware); + return result; } @@ -2376,21 +1981,23 @@ static int inv_create_dmp_sysfs(struct iio_dev *ind) static int inv_mpu_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct inv_gyro_state_s *st; + struct inv_mpu_iio_s *st; struct iio_dev *indio_dev; - int result, reg_done; + int result; + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { - result = -ENODEV; + result = -ENOSYS; + pr_err("I2c function error\n"); goto out_no_free; } indio_dev = iio_allocate_device(sizeof(*st)); if (indio_dev == NULL) { + pr_err("memory allocation failed\n"); result = -ENOMEM; goto out_no_free; } - reg_done = 0; st = iio_priv(indio_dev); - st->i2c = client; + st->client = client; st->sl_handle = client->adapter; st->i2c_addr = client->addr; st->plat_data = @@ -2399,16 +2006,14 @@ static int inv_mpu_probe(struct i2c_client *client, result = inv_check_chip_type(st, id); if (result) goto out_free; - if (INV_MPU3050 == st->chip_type) - result = inv_init_config_mpu3050(indio_dev); - else - result = inv_init_config(indio_dev); + + result = st->init_config(indio_dev); if (result) { dev_err(&client->adapter->dev, "Could not initialize device.\n"); goto out_free; } - result = inv_set_power_state(st, 1); + result = st->set_power_state(st, false); if (result) { dev_err(&client->adapter->dev, "%s could not be turned off.\n", st->hw->name); @@ -2418,38 +2023,58 @@ static int inv_mpu_probe(struct i2c_client *client, /* Make state variables available to all _show and _store functions. */ i2c_set_clientdata(client, indio_dev); indio_dev->dev.parent = &client->dev; - indio_dev->name = id->name; - indio_dev->channels = st->chan_info->channels; - indio_dev->num_channels = st->chan_info->num_channels; + if (!strcmp(id->name, "mpu6xxx")) + indio_dev->name = st->name; + else + indio_dev->name = id->name; + indio_dev->channels = inv_mpu_channels; + indio_dev->num_channels = st->num_channels; + indio_dev->info = &mpu_info; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->currentmode = INDIO_DIRECT_MODE; result = inv_mpu_configure_ring(indio_dev); - if (result) + if (result) { + pr_err("configure ring buffer fail\n"); goto out_free; - result = iio_buffer_register(indio_dev, st->chan_info->channels, - st->chan_info->num_channels); - if (result) + } + result = iio_buffer_register(indio_dev, indio_dev->channels, + indio_dev->num_channels); + if (result) { + pr_err("ring buffer register fail\n"); goto out_unreg_ring; + } st->irq = client->irq; result = inv_mpu_probe_trigger(indio_dev); - if (result) + if (result) { + pr_err("trigger probe fail\n"); goto out_remove_ring; + } + + /* Tell the i2c counter, we have an IRQ */ + INV_I2C_SETIRQ(IRQ_MPU, client->irq); result = iio_device_register(indio_dev); - if (result) + if (result) { + pr_err("IIO device register fail\n"); goto out_remove_trigger; - if (INV_MPU6050 == st->chip_type || INV_MPU9150 == st->chip_type) { + } + + if (INV_MPU6050 == st->chip_type || + INV_MPU6500 == st->chip_type) { result = inv_create_dmp_sysfs(indio_dev); - if (result) + if (result) { + pr_err("create dmp sysfs failed\n"); goto out_unreg_iio; + } } INIT_KFIFO(st->timestamps); spin_lock_init(&st->time_stamp_lock); - pr_info("%s: Probe name %s\n", __func__, id->name); - dev_info(&client->adapter->dev, "%s is ready to go!\n", st->hw->name); + dev_info(&client->dev, "%s is ready to go!\n", + indio_dev->name); + return 0; out_unreg_iio: iio_device_unregister(indio_dev); @@ -2464,27 +2089,78 @@ static int inv_mpu_probe(struct i2c_client *client, iio_free_device(indio_dev); out_no_free: dev_err(&client->adapter->dev, "%s failed %d\n", __func__, result); + return -EIO; } +static void inv_mpu_shutdown(struct i2c_client *client) +{ + struct iio_dev *indio_dev = i2c_get_clientdata(client); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + struct inv_reg_map_s *reg; + int result; + + reg = &st->reg; + dev_dbg(&client->adapter->dev, "Shutting down %s...\n", st->hw->name); + + /* reset to make sure previous state are not there */ + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, BIT_H_RESET); + if (result) + dev_err(&client->adapter->dev, "Failed to reset %s\n", + st->hw->name); + msleep(POWER_UP_TIME); + /* turn off power to ensure gyro engine is off */ + result = st->set_power_state(st, false); + if (result) + dev_err(&client->adapter->dev, "Failed to turn off %s\n", + st->hw->name); +} + /** * inv_mpu_remove() - remove function. */ static int inv_mpu_remove(struct i2c_client *client) { struct iio_dev *indio_dev = i2c_get_clientdata(client); - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); kfifo_free(&st->timestamps); iio_device_unregister(indio_dev); - inv_mpu_remove_trigger(indio_dev); + if (indio_dev->modes & INDIO_BUFFER_TRIGGERED) + inv_mpu_remove_trigger(indio_dev); iio_buffer_unregister(indio_dev); inv_mpu_unconfigure_ring(indio_dev); iio_free_device(indio_dev); dev_info(&client->adapter->dev, "inv-mpu-iio module removed.\n"); + return 0; } -static const unsigned short normal_i2c[] = { I2C_CLIENT_END }; + +#ifdef CONFIG_PM +static int inv_mpu_resume(struct device *dev) +{ + struct inv_mpu_iio_s *st = + iio_priv(i2c_get_clientdata(to_i2c_client(dev))); + pr_debug("%s inv_mpu_resume\n", st->hw->name); + return st->set_power_state(st, true); +} + +static int inv_mpu_suspend(struct device *dev) +{ + struct inv_mpu_iio_s *st = + iio_priv(i2c_get_clientdata(to_i2c_client(dev))); + pr_debug("%s inv_mpu_suspend\n", st->hw->name); + return st->set_power_state(st, false); +} +static const struct dev_pm_ops inv_mpu_pmops = { + SET_SYSTEM_SLEEP_PM_OPS(inv_mpu_suspend, inv_mpu_resume) +}; +#define INV_MPU_PMOPS (&inv_mpu_pmops) +#else +#define INV_MPU_PMOPS NULL +#endif /* CONFIG_PM */ + +static const u16 normal_i2c[] = { I2C_CLIENT_END }; /* device id table is used to identify what device can be * supported by this driver */ @@ -2493,6 +2169,9 @@ static const struct i2c_device_id inv_mpu_id[] = { {"mpu3050", INV_MPU3050}, {"mpu6050", INV_MPU6050}, {"mpu9150", INV_MPU9150}, + {"mpu6500", INV_MPU6500}, + {"mpu9250", INV_MPU9250}, + {"mpu6xxx", INV_MPU6XXX}, {} }; @@ -2502,10 +2181,12 @@ static struct i2c_driver inv_mpu_driver = { .class = I2C_CLASS_HWMON, .probe = inv_mpu_probe, .remove = inv_mpu_remove, + .shutdown = inv_mpu_shutdown, .id_table = inv_mpu_id, .driver = { .owner = THIS_MODULE, .name = "inv-mpu-iio", + .pm = INV_MPU_PMOPS, }, .address_list = normal_i2c, }; @@ -2514,7 +2195,7 @@ static int __init inv_mpu_init(void) { int result = i2c_add_driver(&inv_mpu_driver); if (result) { - pr_err("%s failed\n", __func__); + pr_err("failed\n"); return result; } return 0; @@ -2532,7 +2213,7 @@ MODULE_AUTHOR("Invensense Corporation"); MODULE_DESCRIPTION("Invensense device driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("inv-mpu-iio"); + /** * @} */ - diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_iio.h b/drivers/staging/iio/imu/mpu/inv_mpu_iio.h index 303ed197bfd..9d99d9bac1c 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_iio.h +++ b/drivers/staging/iio/imu/mpu/inv_mpu_iio.h @@ -17,28 +17,28 @@ * @brief Hardware drivers. * * @{ - * @file inv_gyro.h - * @brief Struct definitions for the Invensense gyro driver. + * @file inv_mpu_iio.h + * @brief Struct definitions for the Invensense mpu driver. */ -#ifndef _INV_GYRO_H_ -#define _INV_GYRO_H_ +#ifndef _INV_MPU_IIO_H_ +#define _INV_MPU_IIO_H_ #include #include #include -#include #include #include + #include "../../iio.h" #include "../../buffer.h" + #include "dmpKey.h" + /** * struct inv_reg_map_s - Notable slave registers. - * @who_am_i: Upper 6 bits of the device's slave address. * @sample_rate_div: Divider applied to gyro output rate. * @lpf: Configures internal LPF. - * @product_id: Product revision. * @bank_sel: Selects between memory banks. * @user_ctrl: Enables/resets the FIFO. * @fifo_en: Determines which data will appear in FIFO. @@ -58,34 +58,35 @@ * @prgm_strt_addrh firmware program start address register */ struct inv_reg_map_s { - unsigned char who_am_i; - unsigned char sample_rate_div; - unsigned char lpf; - unsigned char product_id; - unsigned char bank_sel; - unsigned char user_ctrl; - unsigned char fifo_en; - unsigned char gyro_config; - unsigned char accl_config; - unsigned char fifo_count_h; - unsigned char fifo_r_w; - unsigned char raw_gyro; - unsigned char raw_accl; - unsigned char temperature; - unsigned char int_enable; - unsigned char int_status; - unsigned char pwr_mgmt_1; - unsigned char pwr_mgmt_2; - unsigned char mem_start_addr; - unsigned char mem_r_w; - unsigned char prgm_strt_addrh; + u8 sample_rate_div; + u8 lpf; + u8 bank_sel; + u8 user_ctrl; + u8 fifo_en; + u8 gyro_config; + u8 accl_config; + u8 fifo_count_h; + u8 fifo_r_w; + u8 raw_gyro; + u8 raw_accl; + u8 temperature; + u8 int_enable; + u8 int_status; + u8 pwr_mgmt_1; + u8 pwr_mgmt_2; + u8 mem_start_addr; + u8 mem_r_w; + u8 prgm_strt_addrh; }; - +/*device enum */ enum inv_devices { - INV_ITG3500 = 0, - INV_MPU3050 = 1, - INV_MPU6050 = 2, - INV_MPU9150 = 3, + INV_ITG3500, + INV_MPU3050, + INV_MPU6050, + INV_MPU9150, + INV_MPU6500, + INV_MPU9250, + INV_MPU6XXX, INV_NUM_PARTS }; @@ -104,7 +105,7 @@ struct test_setup_t { int lpf; int fsr; int accl_fs; - unsigned int accl_sens[3]; + u32 accl_sens[3]; }; /** @@ -113,17 +114,15 @@ struct test_setup_t { * @name: name of the chip */ struct inv_hw_s { - unsigned char num_reg; - unsigned char *name; + u8 num_reg; + u8 *name; }; /** * struct inv_chip_config_s - Cached chip configuration data. * @fsr: Full scale range. * @lpf: Digital low pass filter frequency. - * @clk_src: Clock source. * @accl_fs: accel full scale range. - * @lpa_freq: low power frequency * @self_test_run_once flag for self test run ever. * @has_footer: MPU3050 specific work around. * @has_compass: has compass or not. @@ -137,46 +136,50 @@ struct inv_hw_s { * @is_asleep: 1 if chip is powered down. * @dmp_on: dmp is on/off. * @dmp_int_on: dmp interrupt on/off. - * @orientation_on: dmp is on/off. + * @dmp_event_int_on: dmp event interrupt on/off. * @firmware_loaded: flag indicate firmware loaded or not. * @lpa_mod: low power mode. * @tap_on: tap on/off. - * @flick_int_on: flick interrupt on/off. * @quaternion_on: send quaternion data on/off. * @display_orient_on: display orientation on/off. + * @normal_compass_measure: discard first compass data after reset. + * @smd_enable: disable/enable SMD function. + * @lpa_freq: low power frequency * @prog_start_addr: firmware program start address. - * @dmp_output_rate: dmp output rate. - * @fifo_rate: FIFO update rate. + * @fifo_rate: current FIFO update rate. + * @new_fifo_rate: set FIFO update rate + * @dmp_output_rate: current dmp output rate. */ struct inv_chip_config_s { - unsigned int fsr:2; - unsigned int lpf:3; - unsigned int clk_src:1; - unsigned int accl_fs:2; - unsigned int lpa_freq:2; - unsigned int self_test_run_once:1; - unsigned int has_footer:1; - unsigned int has_compass:1; - unsigned int enable:1; - unsigned int accl_enable:1; - unsigned int accl_fifo_enable:1; - unsigned int gyro_enable:1; - unsigned int gyro_fifo_enable:1; - unsigned int compass_enable:1; - unsigned int compass_fifo_enable:1; - unsigned int is_asleep:1; - unsigned int dmp_on:1; - unsigned int dmp_int_on:1; - unsigned int orientation_on:1; - unsigned int firmware_loaded:1; - unsigned int lpa_mode:1; - unsigned int tap_on:1; - unsigned int flick_int_on:1; - unsigned int quaternion_on:1; - unsigned int display_orient_on:1; - unsigned short prog_start_addr; - unsigned short fifo_rate; - unsigned char dmp_output_rate; + u32 fsr:2; + u32 lpf:3; + u32 accl_fs:2; + u32 self_test_run_once:1; + u32 has_footer:1; + u32 has_compass:1; + u32 enable:1; + u32 accl_enable:1; + u32 accl_fifo_enable:1; + u32 gyro_enable:1; + u32 gyro_fifo_enable:1; + u32 compass_enable:1; + u32 compass_fifo_enable:1; + u32 is_asleep:1; + u32 dmp_on:1; + u32 dmp_int_on:1; + u32 dmp_event_int_on:1; + u32 firmware_loaded:1; + u32 lpa_mode:1; + u32 tap_on:1; + u32 quaternion_on:1; + u32 display_orient_on:1; + u32 normal_compass_measure:1; + u32 smd_enable:1; + u16 lpa_freq; + u16 prog_start_addr; + u16 fifo_rate; + u16 new_fifo_rate; + u16 dmp_output_rate; }; /** @@ -191,40 +194,23 @@ struct inv_chip_config_s { * @accl_sens_trim: accel sensitivity trim factor. */ struct inv_chip_info_s { - unsigned char product_id; - unsigned char product_revision; - unsigned char silicon_revision; - unsigned char software_revision; - unsigned char multi; - unsigned char compass_sens[3]; - unsigned long gyro_sens_trim; - unsigned long accl_sens_trim; -}; -/** - * struct inv_chip_chan_info - Chip channel information. - * @channels: channel specification. - * @num_channels: number of channels. - */ -struct inv_chip_chan_info { - const struct iio_chan_spec *channels; - int num_channels; + u8 product_id; + u8 product_revision; + u8 silicon_revision; + u8 software_revision; + u8 multi; + u8 compass_sens[3]; + u32 gyro_sens_trim; + u32 accl_sens_trim; }; -/** - * struct inv_flick_s structure to store flick data. - * @lower: lower bound of flick. - * @upper: upper bound of flick. - * @counter: counter of flick. - * @msg_on; message to carry flick - * @axis: axis of flick - */ -struct inv_flick_s { - int lower; - int upper; - int counter; - char msg_on; - char axis; +enum inv_channel_num { + INV_CHANNEL_NUM_GYRO = 4, + INV_CHANNEL_NUM_GYRO_ACCL = 7, + INV_CHANNEL_NUM_GYRO_ACCL_QUANTERNION = 11, + INV_CHANNEL_NUM_GYRO_ACCL_QUANTERNION_MAGN = 14, }; + /** * struct inv_tap_s structure to store tap data. * @min_count: minimum taps counted. @@ -232,26 +218,72 @@ struct inv_flick_s { * @time: tap time. */ struct inv_tap_s { - char min_count; - short thresh; - short time; + u16 min_count; + u16 thresh; + u16 time; +}; + +/** + * struct accel_mot_int_s structure to store motion interrupt data + * @mot_thr: motion threshold. + * @mot_dur: motion duration. + * @mot_on: flag to indicate motion detection on; + */ +struct accel_mot_int_s { + u16 mot_thr; + u32 mot_dur; + u8 mot_on:1; +}; + +/** + * struct self_test_setting - self test settables from sysfs + * samples: number of samples used in self test. + * threshold: threshold fail/pass criterion in self test. + * This value is in the percentage multiplied by 100. + * So 14% would be 14. + */ +struct self_test_setting { + u16 samples; + u16 threshold; }; + +/** + * struct inv_smd_s significant motion detection structure. + * threshold: accel threshold for motion detection. + * delay: delay time to confirm 2nd motion. + * delay2: delay window parameter. + */ +struct inv_smd_s { + u32 threshold; + u32 delay; + u32 delay2; +}; + struct inv_mpu_slave; /** - * struct inv_gyro_state_s - Driver state variables. + * struct inv_mpu_iio_s - Driver state variables. * @chip_config: Cached attribute information. * @chip_info: Chip information from read-only registers. * @trig; iio trigger. - * @flick: flick data structure - * @tap: tap data structure + * @tap: tap data structure. + * @smd: SMD data structure. * @reg: Map of important registers. + * @self_test: self test settings. * @hw: Other hardware-specific information. * @chip_type: chip type. * @time_stamp_lock: spin lock to time stamp. - * @i2c: i2c client handle. + * @client: i2c client handle. * @plat_data: platform data. * @mpu_slave: mpu slave handle. - * @chan_info: channel information + * (*set_power_state)(struct inv_mpu_iio_s *, int on): function ptr + * (*switch_gyro_engine)(struct inv_mpu_iio_s *, int on): function ptr + * (*switch_accl_engine)(struct inv_mpu_iio_s *, int on): function ptr + * (*compass_en)(struct inv_mpu_iio_s *, struct iio_buffer *, bool); + * (*quaternion_en)(struct inv_mpu_iio_s *, struct iio_buffer *, bool) + * (*gyro_en)(struct inv_mpu_iio_s *, struct iio_buffer *, bool): func ptr. + * (*accl_en)(struct inv_mpu_iio_s *, struct iio_buffer *, bool): func ptr. + * (*init_config)(struct iio_dev *indio_dev): function ptr + * void (*setup_reg)(struct inv_reg_map_s *reg): function ptr * @timestamps: kfifo queue to store time stamp. * @compass_st_upper: compass self test upper limit. * @compass_st_lower: compass self test lower limit. @@ -261,64 +293,91 @@ struct inv_mpu_slave; * @raw_gyro: raw gyro data. * @raw_accel: raw accel data. * @raw_compass: raw compass. - * @compass_scale: compass scale. - * @i2c_addr: i2c address. - * @compass_divider: slow down compass rate. - * @compass_counter: slow down compass rate. + * @raw_quaternion raw quaternion data. + * @int input_accel_bias[3]: accel bias from sysfs. + * @compass_scale: compass scale. + * @i2c_addr: i2c address. + * @compass_divider: slow down compass rate. + * @compass_dmp_divider: slow down compass rate for dmp. + * @compass_counter: slow down compass rate. * @sample_divider: sample divider for dmp. * @fifo_divider: fifo divider for dmp. - * @orient_data: orientation data. - * @display_orient_data: display orient data. + * @display_orient_data:display orient data. * @tap_data: tap data. + * @num_channels: number of channels for current chip. * @sl_handle: Handle to I2C port. - * @irq_dur_us: duration between each irq. - * @last_isr_time: last isr time. + * @irq_dur_ns: duration between each irq. + * @last_isr_time: last isr time. + * @mpu6500_last_motion_time: MPU6500 last real motion interrupt time. + * @name: name for distiguish MPU6050 and MPU6500 in MPU6XXX. */ -struct inv_gyro_state_s { +struct inv_mpu_iio_s { #define TIMESTAMP_FIFO_SIZE 16 struct inv_chip_config_s chip_config; struct inv_chip_info_s chip_info; struct iio_trigger *trig; - struct inv_flick_s flick; struct inv_tap_s tap; + struct inv_smd_s smd; struct inv_reg_map_s reg; - struct inv_hw_s *hw; + struct self_test_setting self_test; + const struct inv_hw_s *hw; enum inv_devices chip_type; spinlock_t time_stamp_lock; - struct i2c_client *i2c; + struct i2c_client *client; struct mpu_platform_data plat_data; struct inv_mpu_slave *mpu_slave; - struct inv_chip_chan_info *chan_info; - DECLARE_KFIFO(timestamps, long long, TIMESTAMP_FIFO_SIZE); - short compass_st_upper[3]; - short compass_st_lower[3]; + struct accel_mot_int_s mot_int; + int (*set_power_state)(struct inv_mpu_iio_s *, bool on); + int (*switch_gyro_engine)(struct inv_mpu_iio_s *, bool on); + int (*switch_accl_engine)(struct inv_mpu_iio_s *, bool on); + int (*compass_en)(struct inv_mpu_iio_s *, + struct iio_buffer *ring, bool on); + int (*quaternion_en)(struct inv_mpu_iio_s *, + struct iio_buffer *ring, bool on); + int (*gyro_en)(struct inv_mpu_iio_s *, + struct iio_buffer *ring, bool on); + int (*accl_en)(struct inv_mpu_iio_s *, + struct iio_buffer *ring, bool on); + int (*init_config)(struct iio_dev *indio_dev); + void (*setup_reg)(struct inv_reg_map_s *reg); + DECLARE_KFIFO(timestamps, u64, TIMESTAMP_FIFO_SIZE); + const short *compass_st_upper; + const short *compass_st_lower; short irq; int accel_bias[3]; int gyro_bias[3]; short raw_gyro[3]; short raw_accel[3]; short raw_compass[3]; - unsigned char compass_scale; - unsigned char i2c_addr; - unsigned char compass_divider; - unsigned char compass_counter; - unsigned char sample_divider; - unsigned char fifo_divider; - unsigned char orient_data; - unsigned char display_orient_data; - unsigned char tap_data; + int raw_quaternion[4]; + int input_accel_bias[3]; + u8 compass_scale; + u8 i2c_addr; + u8 compass_divider; + u8 compass_counter; + u8 compass_dmp_divider; + u8 sample_divider; + u8 fifo_divider; + u8 display_orient_data; + u8 tap_data; + enum inv_channel_num num_channels; void *sl_handle; - unsigned int irq_dur_us; - long long last_isr_time; + u32 irq_dur_ns; + u64 last_isr_time; + u64 mpu6500_last_motion_time; + u8 name[20]; + u8 secondary_name[20]; }; + /* produces an unique identifier for each device based on the combination of product version and product revision */ struct prod_rev_map_t { - unsigned short mpl_product_key; - unsigned char silicon_rev; - unsigned short gyro_trim; - unsigned short accel_trim; + u16 mpl_product_key; + u8 silicon_rev; + u16 gyro_trim; + u16 accel_trim; }; + /** * struct inv_mpu_slave - MPU slave structure. * @suspend: suspend operation. @@ -330,196 +389,276 @@ struct prod_rev_map_t { * @set_fs set full scale */ struct inv_mpu_slave { - int (*suspend)(struct inv_gyro_state_s *); - int (*resume)(struct inv_gyro_state_s *); - int (*setup)(struct inv_gyro_state_s *); - int (*combine_data)(unsigned char *in, short *out); - int (*get_mode)(struct inv_gyro_state_s *); - int (*set_lpf)(struct inv_gyro_state_s *, int rate); - int (*set_fs)(struct inv_gyro_state_s *, int fs); + int (*suspend)(struct inv_mpu_iio_s *); + int (*resume)(struct inv_mpu_iio_s *); + int (*setup)(struct inv_mpu_iio_s *); + int (*combine_data)(u8 *in, short *out); + int (*get_mode)(void); + int (*set_lpf)(struct inv_mpu_iio_s *, int rate); + int (*set_fs)(struct inv_mpu_iio_s *, int fs); }; + /* AKM definitions */ -#define REG_AKM_ID (0x00) -#define REG_AKM_STATUS (0x02) -#define REG_AKM_MEASURE_DATA (0x03) -#define REG_AKM_MODE (0x0A) -#define REG_AKM_ST_CTRL (0x0C) -#define REG_AKM_SENSITIVITY (0x10) -#define REG_AKM8963_CNTL1 (0x0A) - -#define DATA_AKM_ID (0x48) -#define DATA_AKM_MODE_PW_DN (0x00) -#define DATA_AKM_MODE_PW_SM (0x01) -#define DATA_AKM_MODE_PW_ST (0x08) -#define DATA_AKM_MODE_PW_FR (0x0F) -#define DATA_AKM_SELF_TEST (0x40) -#define DATA_AKM_DRDY (0x01) -#define DATA_AKM8963_BIT (0x10) -#define DATA_AKM_STAT_MASK (0x0C) - -#define DATA_AKM8975_SCALE (9830) -#define DATA_AKM8972_SCALE (19661) -#define DATA_AKM8963_SCALE0 (19661) -#define DATA_AKM8963_SCALE1 (4915) -#define AKM8963_SCALE_SHIFT (4) -#define NUM_BYTES_COMPASS_SLAVE (8) - -#define DATA_AKM8975_ST_X_UP (100) -#define DATA_AKM8975_ST_X_LW (-100) -#define DATA_AKM8975_ST_Y_UP (100) -#define DATA_AKM8975_ST_Y_LW (-100) -#define DATA_AKM8975_ST_Z_UP (-300) -#define DATA_AKM8975_ST_Z_LW (-1000) - -#define DATA_AKM8972_ST_X_UP (50) -#define DATA_AKM8972_ST_X_LW (-50) -#define DATA_AKM8972_ST_Y_UP (50) -#define DATA_AKM8972_ST_Y_LW (-50) -#define DATA_AKM8972_ST_Z_UP (-100) -#define DATA_AKM8972_ST_Z_LW (-500) - -#define DATA_AKM8963_ST_X_UP (200) -#define DATA_AKM8963_ST_X_LW (-200) -#define DATA_AKM8963_ST_Y_UP (200) -#define DATA_AKM8963_ST_Y_LW (-200) -#define DATA_AKM8963_ST_Z_UP (-800) -#define DATA_AKM8963_ST_Z_LW (-3200) - - -/* register definition*/ -#define REG_3050_AUX_VDDIO (0x13) -#define REG_3050_SLAVE_ADDR (0x14) -#define REG_3050_AUX_BST_ADDR (0x18) -#define REG_3050_AUX_XOUT_H (0x23) - -#define REG_3500_OTP (0x00) - -#define REG_YGOFFS_TC (0x01) -#define REG_XA_OFFS_L_TC (0x07) -#define REG_ST_GCT_X (0x0D) -#define REG_I2C_MST_CTRL (0x24) -#define REG_I2C_SLV0_ADDR (0x25) -#define REG_I2C_SLV0_REG (0x26) -#define REG_I2C_SLV0_CTRL (0x27) -#define REG_I2C_SLV1_ADDR (0x28) -#define REG_I2C_SLV1_REG (0x29) -#define REG_I2C_SLV1_CTRL (0x2A) - -#define REG_I2C_SLV4_CTRL (0x34) -#define REG_INT_PIN_CFG (0x37) -#define REG_DMP_INT_STATUS (0x39) -#define REG_EXT_SENS_DATA_00 (0x49) -#define REG_I2C_SLV1_DO (0x64) -#define REG_I2C_MST_DELAY_CTRL (0x67) -#define REG_BANK_SEL (0x6D) -#define REG_MEM_START (0x6E) -#define REG_MEM_RW (0x6F) - -/* bit definitions */ -#define BIT_3050_VDDIO (0x04) -#define BIT_3050_AUX_IF_EN (0x20) -#define BIT_3050_FIFO_RST (0x02) - -#define BIT_BYPASS_EN (0x2) -#define BIT_WAIT_FOR_ES (0x40) -#define BIT_I2C_READ (0x80) -#define BIT_SLV_EN (0x80) -#define BIT_I2C_MST_VDDIO (0x80) - -#define BIT_DMP_EN (0x80) -#define BIT_FIFO_EN (0x40) -#define BIT_I2C_MST_EN (0x20) -#define BIT_DMP_RST (0x08) -#define BIT_FIFO_RST (0x04) - -#define BIT_SLV0_DLY_EN (0x01) -#define BIT_SLV1_DLY_EN (0x02) - -#define BIT_FIFO_OVERFLOW (0x10) -#define BIT_DATA_RDY_EN (0x01) -#define BIT_DMP_INT_EN (0x02) - -#define BIT_PWR_ACCL_STBY (0x38) -#define BIT_PWR_GYRO_STBY (0x07) - -#define BIT_GYRO_XOUT (0x40) -#define BIT_GYRO_YOUT (0x20) -#define BIT_GYRO_ZOUT (0x10) -#define BIT_ACCEL_OUT (0x08) -#define BITS_GYRO_OUT (0x70) -#define BITS_SELF_TEST_EN (0xE0) -#define BITS_3050_ACCL_OUT (0x0E) -#define BITS_3050_POWER1 (0x30) -#define BITS_3050_POWER2 (0x10) -#define BITS_3050_GYRO_STANDBY (0x38) -#define BITS_FSR (0x18) -#define BITS_LPF (0x07) -#define BITS_CLK (0x07) -#define BIT_3500_FIFO_OVERFLOW (0x10) -#define BIT_SLEEP (0x40) -#define BIT_CYCLE (0x20) -#define BIT_LPA_FREQ (0xC0) - -#define DMP_START_ADDR (0x400) -#define BYTES_FOR_DMP (16) -#define QUATERNION_BYTES (16) -#define BYTES_PER_SENSOR (6) -#define MPU3050_FOOTER_SIZE (2) -#define FIFO_COUNT_BYTE (2) -#define FIFO_THRESHOLD (500) -#define POWER_UP_TIME (100) -#define SENSOR_UP_TIME (30) -#define MPU_MEM_BANK_SIZE (256) -#define MPU3050_TEMP_OFFSET (5383314L) -#define MPU3050_TEMP_SCALE (3834792L) -#define MPU6050_TEMP_OFFSET (2462307L) -#define MPU6050_TEMP_SCALE (2977653L) -#define MPU_TEMP_SHIFT (16) -#define LPA_FREQ_SHIFT (6) -#define COMPASS_RATE_SCALE (10) -#define MAX_GYRO_FS_PARAM (3) -#define MAX_ACCL_FS_PARAM (3) -#define MAX_LPA_FREQ_PARAM (3) -#define THREE_AXIS (3) -#define GYRO_CONFIG_FSR_SHIFT (3) -#define ACCL_CONFIG_FSR_SHIFT (3) -#define GYRO_DPS_SCALE (250) -#define MEM_ADDR_PROD_REV (0x6) -#define SOFT_PROD_VER_BYTES (5) -#define CHAN_INDEX_GYRO (0) -#define CHAN_INDEX_GYRO_ACCL (1) -#define CHAN_INDEX_GYRO_ACCL_MAGN (2) +#define REG_AKM_ID 0x00 +#define REG_AKM_STATUS 0x02 +#define REG_AKM_MEASURE_DATA 0x03 +#define REG_AKM_MODE 0x0A +#define REG_AKM_ST_CTRL 0x0C +#define REG_AKM_SENSITIVITY 0x10 +#define REG_AKM8963_CNTL1 0x0A + +#define DATA_AKM_ID 0x48 +#define DATA_AKM_MODE_PD 0x00 +#define DATA_AKM_MODE_SM 0x01 +#define DATA_AKM_MODE_ST 0x08 +#define DATA_AKM_MODE_FR 0x0F +#define DATA_AKM_SELF_TEST 0x40 +#define DATA_AKM_DRDY 0x01 +#define DATA_AKM8963_BIT 0x10 +#define DATA_AKM_STAT_MASK 0x0C + +#define DATA_AKM8975_SCALE (9830 * (1L << 15)) +#define DATA_AKM8972_SCALE (19661 * (1L << 15)) +#define DATA_AKM8963_SCALE0 (19661 * (1L << 15)) +#define DATA_AKM8963_SCALE1 (4915 * (1L << 15)) +#define AKM8963_SCALE_SHIFT 4 +#define NUM_BYTES_COMPASS_SLAVE 8 + +/*register and associated bit definition*/ +#define REG_3050_FIFO_EN 0x12 +#define BITS_3050_ACCL_OUT 0x0E + +#define REG_3050_AUX_VDDIO 0x13 +#define BIT_3050_VDDIO 0x04 + +#define REG_3050_SLAVE_ADDR 0x14 +#define REG_3050_SAMPLE_RATE_DIV 0x15 +#define REG_3050_LPF 0x16 +#define REG_3050_INT_ENABLE 0x17 +#define REG_3050_AUX_BST_ADDR 0x18 +#define REG_3050_INT_STATUS 0x1A +#define REG_3050_TEMPERATURE 0x1B +#define REG_3050_RAW_GYRO 0x1D +#define REG_3050_AUX_XOUT_H 0x23 +#define REG_3050_FIFO_COUNT_H 0x3A +#define REG_3050_FIFO_R_W 0x3C + +#define REG_3050_USER_CTRL 0x3D +#define BIT_3050_AUX_IF_EN 0x20 +#define BIT_3050_FIFO_RST 0x02 + +#define REG_3050_PWR_MGMT_1 0x3E +#define BITS_3050_POWER1 0x30 +#define BITS_3050_POWER2 0x10 +#define BITS_3050_GYRO_STANDBY 0x38 + +#define REG_3500_OTP 0x0 + +#define REG_YGOFFS_TC 0x1 +#define BIT_I2C_MST_VDDIO 0x80 + +#define REG_XA_OFFS_L_TC 0x7 +#define REG_PRODUCT_ID 0xC +#define REG_ST_GCT_X 0xD +#define REG_SAMPLE_RATE_DIV 0x19 +#define REG_CONFIG 0x1A + +#define REG_GYRO_CONFIG 0x1B +#define BITS_SELF_TEST_EN 0xE0 + +#define REG_ACCEL_CONFIG 0x1C +#define REG_ACCEL_MOT_THR 0x1F +#define REG_ACCEL_MOT_DUR 0x20 + +#define REG_FIFO_EN 0x23 +#define BIT_ACCEL_OUT 0x08 +#define BITS_GYRO_OUT 0x70 + + +#define REG_I2C_MST_CTRL 0x24 +#define BIT_WAIT_FOR_ES 0x40 + +#define REG_I2C_SLV0_ADDR 0x25 +#define BIT_I2C_READ 0x80 + +#define REG_I2C_SLV0_REG 0x26 + +#define REG_I2C_SLV0_CTRL 0x27 +#define BIT_SLV_EN 0x80 + +#define REG_I2C_SLV1_ADDR 0x28 +#define REG_I2C_SLV1_REG 0x29 +#define REG_I2C_SLV1_CTRL 0x2A + +#define REG_I2C_SLV2_ADDR 0x2B +#define REG_I2C_SLV2_REG 0x2C +#define REG_I2C_SLV2_CTRL 0x2D + +#define REG_I2C_SLV4_CTRL 0x34 + +#define REG_INT_PIN_CFG 0x37 +#define BIT_BYPASS_EN 0x2 + +#define REG_INT_ENABLE 0x38 +#define BIT_DATA_RDY_EN 0x01 +#define BIT_DMP_INT_EN 0x02 +#define BIT_ZMOT_EN 0x20 +#define BIT_MOT_EN 0x40 +#define BIT_6500_WOM_EN 0x40 + +#define REG_DMP_INT_STATUS 0x39 +#define SMD_INT_ON 0x04 + +#define REG_INT_STATUS 0x3A +#define BIT_MOT_INT 0x40 +#define BIT_ZMOT_INT 0x20 + +#define REG_RAW_ACCEL 0x3B +#define REG_TEMPERATURE 0x41 +#define REG_RAW_GYRO 0x43 +#define REG_EXT_SENS_DATA_00 0x49 + +#define REG_ACCEL_INTEL_STATUS 0x61 + +#define REG_I2C_SLV1_DO 0x64 + +#define REG_I2C_MST_DELAY_CTRL 0x67 +#define BIT_SLV0_DLY_EN 0x01 +#define BIT_SLV1_DLY_EN 0x02 +#define BIT_SLV2_DLY_EN 0x04 + +#define REG_USER_CTRL 0x6A +#define BIT_FIFO_RST 0x04 +#define BIT_DMP_RST 0x08 +#define BIT_I2C_MST_EN 0x20 +#define BIT_FIFO_EN 0x40 +#define BIT_DMP_EN 0x80 + +#define REG_PWR_MGMT_1 0x6B +#define BIT_H_RESET 0x80 +#define BIT_SLEEP 0x40 +#define BIT_CYCLE 0x20 +#define BIT_CLK_MASK 0x7 + +#define REG_PWR_MGMT_2 0x6C +#define BIT_PWR_ACCL_STBY 0x38 +#define BIT_PWR_GYRO_STBY 0x07 +#define BIT_LPA_FREQ 0xC0 + +#define REG_BANK_SEL 0x6D +#define REG_MEM_START_ADDR 0x6E +#define REG_MEM_RW 0x6F +#define REG_PRGM_STRT_ADDRH 0x70 +#define REG_FIFO_COUNT_H 0x72 +#define REG_FIFO_R_W 0x74 +#define REG_WHOAMI 0x75 + +#define REG_6500_XG_ST_DATA 0x0 +#define REG_6500_XA_ST_DATA 0xD +#define REG_6500_ACCEL_CONFIG2 0x1D +#define BIT_ACCEL_FCHOCIE_B 0x08 + +#define REG_6500_LP_ACCEL_ODR 0x1E +#define REG_6500_ACCEL_WOM_THR 0x1F + +#define REG_6500_ACCEL_INTEL_CTRL 0x69 +#define BIT_ACCEL_INTEL_ENABLE 0x80 +#define BIT_ACCEL_INTEL_MODE 0x40 + +/* data definitions */ +#define DMP_START_ADDR 0x400 +#define DMP_MASK_TAP 0x3f +#define DMP_MASK_DIS_ORIEN 0xC0 +#define DMP_DIS_ORIEN_SHIFT 6 + +#define BYTES_FOR_DMP 16 +#define BYTES_FOR_EVENTS 4 +#define QUATERNION_BYTES 16 +#define BYTES_PER_SENSOR 6 +#define MPU3050_FOOTER_SIZE 2 +#define FIFO_COUNT_BYTE 2 +#define FIFO_THRESHOLD 500 +#define POWER_UP_TIME 100 +#define SENSOR_UP_TIME 30 +#define REG_UP_TIME 5 +#define MPU_MEM_BANK_SIZE 256 + +#define MPU6XXX_MAX_MOTION_THRESH (255*4) +#define MPU6XXX_MOTION_THRESH_SHIFT 5 +#define MPU6050_MOTION_DUR_DEFAULT 1 +#define MPU6050_ID 0x68 +#define MPU6050_MAX_MOTION_DUR 255 +#define MPU_TEMP_SHIFT 16 +#define LPA_FREQ_SHIFT 6 +#define COMPASS_RATE_SCALE 10 +#define MAX_GYRO_FS_PARAM 3 +#define MAX_ACCL_FS_PARAM 3 +#define MAX_LPA_FREQ_PARAM 3 +#define MPU6XXX_MAX_MPU_MEM (256 * 12) + +#define INIT_MOT_DUR 128 +#define INIT_MOT_THR 128 +#define INIT_ZMOT_DUR 128 +#define INIT_ZMOT_THR 128 +#define INIT_ST_SAMPLES 50 +#define INIT_ST_THRESHOLD 14 +#define ST_THRESHOLD_MULTIPLIER 10 +#define ST_MAX_SAMPLES 500 +#define ST_MAX_THRESHOLD 100 + +/*---- MPU6500 ----*/ +#define MPU6500_ID 0x70 /* unique WHOAMI */ +#define MPU6500_PRODUCT_REVISION 1 +#define MPU6500_MEM_REV_ADDR 0x17 +#define MPU6500_REV 2 + +/*---- MPU9250 ----*/ +#define MPU9250_ID 0x71 /* unique WHOAMI */ + +#define THREE_AXIS 3 +#define GYRO_CONFIG_FSR_SHIFT 3 +#define ACCL_CONFIG_FSR_SHIFT 3 +#define GYRO_DPS_SCALE 250 +#define MEM_ADDR_PROD_REV 0x6 +#define SOFT_PROD_VER_BYTES 5 +#define CRC_FIRMWARE_SEED 0 +#define SELF_TEST_SUCCESS 1 +#define MS_PER_DMP_TICK 20 /* init parameters */ -#define INIT_FIFO_RATE (50) -#define INIT_DUR_TIME ((1000/INIT_FIFO_RATE)*1000) -#define INIT_TAP_THRESHOLD (100) -#define INIT_TAP_TIME (100) -#define INIT_TAP_MIN_COUNT (2) +#define INIT_FIFO_RATE 50 +#define INIT_DMP_OUTPUT_RATE 25 +#define INIT_DUR_TIME ((1000 / INIT_FIFO_RATE) * 1000 * 1000) +#define INIT_TAP_THRESHOLD 100 +#define INIT_TAP_TIME 100 +#define INIT_TAP_MIN_COUNT 2 +#define MPU_INIT_SMD_DELAY_THLD 3 +#define MPU_INIT_SMD_DELAY2_THLD 1 +#define MPU_INIT_SMD_THLD 3000 +#define MPU_DEFAULT_DMP_FREQ 200 #define MPL_PROD_KEY(ver, rev) (ver * 100 + rev) #define NUM_OF_PROD_REVS (ARRAY_SIZE(prod_rev_map)) /*---- MPU6050 Silicon Revisions ----*/ -#define MPU_SILICON_REV_A2 1 /* MPU6050A2 Device */ -#define MPU_SILICON_REV_B1 2 /* MPU6050B1 Device */ - -#define BIT_PRFTCH_EN 0x40 -#define BIT_CFG_USER_BANK 0x20 -#define BITS_MEM_SEL 0x1f -/* time stamp tolerance */ -#define TIME_STAMP_TOR (5) -#define MAX_CATCH_UP (5) -#define DEFAULT_ACCL_TRIM (16384) -#define MAX_FIFO_RATE (1000) -#define MIN_FIFO_RATE (4) -#define ONE_K_HZ (1000) - -/* flick related defines */ -#define DATA_INT (2097) -#define DATA_MSG_ON (262144) +#define MPU_SILICON_REV_A2 1 /* MPU6050A2 Device */ +#define MPU_SILICON_REV_B1 2 /* MPU6050B1 Device */ + +#define BIT_PRFTCH_EN 0x40 +#define BIT_CFG_USER_BANK 0x20 +#define BITS_MEM_SEL 0x1f + +#define TIME_STAMP_TOR 5 +#define MAX_CATCH_UP 5 +#define DEFAULT_ACCL_TRIM 16384 +#define DEFAULT_GYRO_TRIM 131 +#define MAX_FIFO_RATE 1000 +#define MAX_DMP_OUTPUT_RATE 200 +#define MIN_FIFO_RATE 4 +#define ONE_K_HZ 1000 +#define NS_PER_MS_SHIFT 20 /*tap related defines */ #define INV_TAP 0x08 -#define INV_NUM_TAP_AXES (3) +#define INV_NUM_TAP_AXES 3 #define INV_TAP_AXIS_X_POS 0x20 #define INV_TAP_AXIS_X_NEG 0x10 @@ -539,45 +678,38 @@ struct inv_mpu_slave { INV_TAP_AXIS_Z) #define INT_SRC_TAP 0x01 -#define INT_SRC_ORIENT 0x02 - -/*orientation related */ -#define INV_X_UP 0x01 -#define INV_X_DOWN 0x02 -#define INV_Y_UP 0x04 -#define INV_Y_DOWN 0x08 -#define INV_Z_UP 0x10 -#define INV_Z_DOWN 0x20 -#define INV_ORIENTATION_ALL 0x3F - -#define INV_ORIENTATION_FLIP 0x40 -#define INV_X_AXIS_INDEX (0x00) -#define INV_Y_AXIS_INDEX (0x01) -#define INV_Z_AXIS_INDEX (0x02) - -#define INV_ELEMENT_1 (0x0001) -#define INV_ELEMENT_2 (0x0002) -#define INV_ELEMENT_3 (0x0004) -#define INV_ELEMENT_4 (0x0008) -#define INV_ELEMENT_5 (0x0010) -#define INV_ELEMENT_6 (0x0020) -#define INV_ELEMENT_7 (0x0040) -#define INV_ELEMENT_8 (0x0080) -#define INV_ALL (0xFFFF) -#define INV_ELEMENT_MASK (0x00FF) -#define INV_GYRO_ACC_MASK (0x007E) +#define INT_SRC_DISPLAY_ORIENT 0x08 +#define INT_SRC_SHAKE 0x10 + +#define INV_X_AXIS_INDEX 0x00 +#define INV_Y_AXIS_INDEX 0x01 +#define INV_Z_AXIS_INDEX 0x02 + +#define INV_ELEMENT_1 0x0001 +#define INV_ELEMENT_2 0x0002 +#define INV_ELEMENT_3 0x0004 +#define INV_ELEMENT_4 0x0008 +#define INV_ELEMENT_5 0x0010 +#define INV_ELEMENT_6 0x0020 +#define INV_ELEMENT_7 0x0040 +#define INV_ELEMENT_8 0x0080 +#define INV_ALL 0xFFFF +#define INV_ELEMENT_MASK 0x00FF +#define INV_GYRO_ACC_MASK 0x007E +#define INV_ACCL_MASK 0x70 +#define INV_GYRO_MASK 0xE /* scan element definition */ enum inv_mpu_scan { INV_MPU_SCAN_QUAT_R = 0, INV_MPU_SCAN_QUAT_X, INV_MPU_SCAN_QUAT_Y, INV_MPU_SCAN_QUAT_Z, - INV_MPU_SCAN_GYRO_X, - INV_MPU_SCAN_GYRO_Y, - INV_MPU_SCAN_GYRO_Z, INV_MPU_SCAN_ACCL_X, INV_MPU_SCAN_ACCL_Y, INV_MPU_SCAN_ACCL_Z, + INV_MPU_SCAN_GYRO_X, + INV_MPU_SCAN_GYRO_Y, + INV_MPU_SCAN_GYRO_Z, INV_MPU_SCAN_MAGN_X, INV_MPU_SCAN_MAGN_Y, INV_MPU_SCAN_MAGN_Z, @@ -595,6 +727,12 @@ enum inv_filter_e { INV_FILTER_2100HZ_NOLPF, NUM_FILTER }; + +enum inv_slave_mode { + INV_MODE_SUSPEND, + INV_MODE_NORMAL, +}; + /*==== MPU6050B1 MEMORY ====*/ enum MPU_MEMORY_BANKS { MEM_RAM_BANK_0 = 0, @@ -613,6 +751,56 @@ enum MPU_MEMORY_BANKS { MPU_MEM_OTP_BANK_0 = 16 }; +/* IIO attribute address */ +enum MPU_IIO_ATTR_ADDR { + ATTR_DMP_SMD_ENABLE, + ATTR_DMP_SMD_THLD, + ATTR_DMP_SMD_DELAY_THLD, + ATTR_DMP_SMD_DELAY_THLD2, + ATTR_DMP_TAP_ON, + ATTR_DMP_TAP_THRESHOLD, + ATTR_DMP_TAP_MIN_COUNT, + ATTR_DMP_TAP_TIME, + ATTR_DMP_DISPLAY_ORIENTATION_ON, +/* *****above this line, are DMP features, power needs on/off */ +/* *****below this line, are DMP features, no power needed */ + ATTR_DMP_ON, + ATTR_DMP_INT_ON, + ATTR_DMP_EVENT_INT_ON, + ATTR_DMP_OUTPUT_RATE, + ATTR_DMP_QUATERNION_ON, +/* *****above this line, it is all DMP related features */ +/* *****below this line, it is all non-DMP related features */ + ATTR_MOTION_LPA_ON, + ATTR_MOTION_LPA_FREQ, + ATTR_MOTION_LPA_DURATION, + ATTR_MOTION_LPA_THRESHOLD, +/* *****above this line, it is non-DMP, power needs on/off */ +/* *****below this line, it is non-DMP, no needs to on/off power */ + ATTR_SELF_TEST_SAMPLES, + ATTR_SELF_TEST_THRESHOLD, + ATTR_GYRO_ENABLE, + ATTR_ACCL_ENABLE, + ATTR_COMPASS_ENABLE, + ATTR_POWER_STATE, /* this is fake sysfs for compatibility */ + ATTR_FIRMWARE_LOADED, + ATTR_SAMPLING_FREQ, +/* *****below this line, it is attributes only has show methods */ + ATTR_SELF_TEST, /* this has show-only methods but needs power on/off */ + ATTR_GYRO_MATRIX, + ATTR_ACCL_MATRIX, + ATTR_COMPASS_MATRIX, + ATTR_SECONDARY_NAME, +#ifdef CONFIG_INV_TESTING + ATTR_I2C_COUNTERS, + ATTR_REG_WRITE, + ATTR_DEBUG_SMD_ENABLE_TESTP1, + ATTR_DEBUG_SMD_ENABLE_TESTP2, + ATTR_DEBUG_SMD_EXE_STATE, + ATTR_DEBUG_SMD_DELAY_CNTR +#endif +}; + enum inv_accl_fs_e { INV_FS_02G = 0, INV_FS_04G, @@ -635,8 +823,6 @@ enum inv_clock_sel_e { NUM_CLK }; -void inv_wake_up(void); -int inv_set_power_state(struct inv_gyro_state_s *st, unsigned char power_on); ssize_t inv_dmp_firmware_write(struct file *fp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t pos, size_t size); ssize_t inv_dmp_firmware_read(struct file *filp, @@ -649,43 +835,54 @@ int inv_mpu_probe_trigger(struct iio_dev *indio_dev); void inv_mpu_unconfigure_ring(struct iio_dev *indio_dev); void inv_mpu_remove_trigger(struct iio_dev *indio_dev); int inv_init_config_mpu3050(struct iio_dev *indio_dev); -int inv_get_silicon_rev_mpu6050(struct inv_gyro_state_s *st); -int set_3050_bypass(struct inv_gyro_state_s *st, int enable); -int inv_register_bma250_slave(struct inv_gyro_state_s *st); +int inv_get_silicon_rev_mpu6050(struct inv_mpu_iio_s *st); +int inv_get_silicon_rev_mpu6500(struct inv_mpu_iio_s *st); +int set_3050_bypass(struct inv_mpu_iio_s *st, bool enable); +int inv_register_mpu3050_slave(struct inv_mpu_iio_s *st); void inv_setup_reg_mpu3050(struct inv_reg_map_s *reg); -int set_power_mpu3050(struct inv_gyro_state_s *st, unsigned char power_on); -int set_inv_enable(struct iio_dev *indio_dev, unsigned long enable); -int inv_send_quaternion(struct inv_gyro_state_s *st, int on); -int inv_set_display_orient_interrupt_dmp(struct inv_gyro_state_s *st, int on); -int inv_enable_orientation_dmp(struct inv_gyro_state_s *st, int on); -int inv_set_fifo_rate(struct inv_gyro_state_s *st, unsigned long fifo_rate); -unsigned short inv_dmp_get_address(unsigned short key); -long inv_q30_mult(long a, long b); -int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, - unsigned int axis, unsigned short threshold); -int inv_set_min_taps_dmp(struct inv_gyro_state_s *st, unsigned int min_taps); -int inv_set_tap_time_dmp(struct inv_gyro_state_s *st, unsigned int time); -int inv_enable_tap_dmp(struct inv_gyro_state_s *st, unsigned char on); -int inv_i2c_read_base(struct inv_gyro_state_s *st, unsigned short i2c_addr, - unsigned char reg, unsigned short length, unsigned char *data); -int inv_i2c_single_write_base(struct inv_gyro_state_s *st, - unsigned short i2c_addr, unsigned char reg, unsigned char data); -int inv_do_test(struct inv_gyro_state_s *st, int self_test_flag, +int inv_switch_3050_gyro_engine(struct inv_mpu_iio_s *st, bool en); +int inv_switch_3050_accl_engine(struct inv_mpu_iio_s *st, bool en); +int set_power_mpu3050(struct inv_mpu_iio_s *st, bool power_on); +int inv_set_interrupt_on_gesture_event(struct inv_mpu_iio_s *st, bool on); +int inv_send_quaternion(struct inv_mpu_iio_s *st, bool on); +int inv_set_display_orient_interrupt_dmp(struct inv_mpu_iio_s *st, bool on); +int inv_set_fifo_rate(struct inv_mpu_iio_s *st, u16 fifo_rate); +u16 inv_dmp_get_address(u16 key); +int inv_q30_mult(int a, int b); +int inv_set_tap_threshold_dmp(struct inv_mpu_iio_s *st, + u32 axis, u16 threshold); +int inv_set_min_taps_dmp(struct inv_mpu_iio_s *st, u16 min_taps); +int inv_set_tap_time_dmp(struct inv_mpu_iio_s *st, u16 time); +int inv_enable_tap_dmp(struct inv_mpu_iio_s *st, bool on); +int inv_i2c_read_base(struct inv_mpu_iio_s *st, u16 i2c_addr, + u8 reg, u16 length, u8 *data); +int inv_i2c_single_write_base(struct inv_mpu_iio_s *st, + u16 i2c_addr, u8 reg, u8 data); +int inv_do_test(struct inv_mpu_iio_s *st, int self_test_flag, int *gyro_result, int *accl_result); -int mpu_memory_write(struct i2c_adapter *i2c_adap, - unsigned char mpu_addr, - unsigned short mem_addr, - unsigned int len, unsigned char const *data); -int mpu_memory_read(struct i2c_adapter *i2c_adap, - unsigned char mpu_addr, - unsigned short mem_addr, - unsigned int len, unsigned char *data); -int inv_hw_self_test(struct inv_gyro_state_s *st); - -#define mem_w(a, b, c) mpu_memory_write(st->sl_handle,\ - st->i2c_addr, a, b, c) -#define mem_w_key(key, b, c) mpu_memory_write(st->sl_handle,\ - st->i2c_addr, inv_dmp_get_address(key), b, c) +int inv_hw_self_test(struct inv_mpu_iio_s *st); +int inv_hw_self_test_6500(struct inv_mpu_iio_s *st); +void inv_recover_setting(struct inv_mpu_iio_s *st); +int inv_power_up_self_test(struct inv_mpu_iio_s *st); +s64 get_time_ns(void); +int write_be32_key_to_mem(struct inv_mpu_iio_s *st, + u32 data, int key); +int inv_set_accel_bias_dmp(struct inv_mpu_iio_s *st); +int inv_send_sensor_data(struct inv_mpu_iio_s *st, u16 elements); +int inv_send_interrupt_word(struct inv_mpu_iio_s *st, bool on); +int mpu_memory_write(struct inv_mpu_iio_s *st, u8 mpu_addr, u16 mem_addr, + u32 len, u8 const *data); +int mpu_memory_read(struct inv_mpu_iio_s *st, u8 mpu_addr, u16 mem_addr, + u32 len, u8 *data); +int mpu_memory_write_unaligned(struct inv_mpu_iio_s *st, u16 key, int len, + u8 const *d); +/* used to print i2c data using pr_debug */ +char *wr_pr_debug_begin(u8 const *data, u32 len, char *string); +char *wr_pr_debug_end(char *string); + +#define mem_w(a, b, c) \ + mpu_memory_write(st, st->i2c_addr, a, b, c) +#define mem_w_key(key, b, c) mpu_memory_write_unaligned(st, key, b, c) #define inv_i2c_read(st, reg, len, data) \ inv_i2c_read_base(st, st->i2c_addr, reg, len, data) #define inv_i2c_single_write(st, reg, data) \ @@ -695,5 +892,6 @@ int inv_hw_self_test(struct inv_gyro_state_s *st); #define inv_secondary_write(reg, data) \ inv_i2c_single_write_base(st, st->plat_data.secondary_i2c_addr, \ reg, data) -#endif /* #ifndef _INV_GYRO_H_ */ + +#endif /* #ifndef _INV_MPU_IIO_H_ */ diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_misc.c b/drivers/staging/iio/imu/mpu/inv_mpu_misc.c index 9b2bbcfa6b4..00f16875a83 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_misc.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_misc.c @@ -17,11 +17,13 @@ * @brief Hardware drivers. * * @{ - * @file inv_gyro_misc.c - * @brief A sysfs device driver for Invensense gyroscopes. - * @details This file is part of inv_gyro driver code + * @file inv_mpu_misc.c + * @brief A sysfs device driver for Invensense mpu. + * @details This file is part of invensense mpu driver code */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -35,16 +37,16 @@ #include #include #include +#include #include "inv_mpu_iio.h" -/* - Defines -*/ +#include "../../inv_test/inv_counters.h" + /* DMP defines */ #define DMP_ORIENTATION_TIME 500 #define DMP_ORIENTATION_ANGLE 60 #define DMP_DEFAULT_FIFO_RATE 200 -#define DMP_TAP_SCALE (767603923/5) +#define DMP_TAP_SCALE (767603923 / 5) #define DMP_MULTI_SHIFT 30 #define DMP_MULTI_TAP_TIME 500 #define DMP_SHAKE_REJECT_THRESH 100 @@ -54,6 +56,8 @@ #define DMP_PRECISION 1000 #define DMP_MAX_DIVIDER 4 #define DMP_MAX_MIN_TAPS 4 +#define DMP_IMAGE_CRC_VALUE 0x665f5a73 +#define DMP_IMAGE_SIZE 2913 /*--- Test parameters defaults --- */ #define DEF_OLDEST_SUPP_PROD_REV 8 @@ -61,17 +65,13 @@ /* sample rate */ #define DEF_SELFTEST_SAMPLE_RATE 0 -/* LPF parameter */ -#define DEF_SELFTEST_LPF_PARA 1 /* full scale setting dps */ #define DEF_SELFTEST_GYRO_FULL_SCALE (0 << 3) #define DEF_SELFTEST_ACCL_FULL_SCALE (2 << 3) -#define DEF_SELFTEST_GYRO_SENS (32768/250) +#define DEF_SELFTEST_GYRO_SENS (32768 / 250) /* wait time before collecting data */ -#define DEF_GYRO_WAIT_TIME 50 +#define DEF_GYRO_WAIT_TIME 10 #define DEF_ST_STABLE_TIME 200 -#define DEF_GYRO_PACKET_THRESH DEF_GYRO_WAIT_TIME -#define DEF_GYRO_THRESH 10 #define DEF_GYRO_SCALE 131 #define DEF_ST_PRECISION 1000 #define DEF_ST_ACCL_FULL_SCALE 8000UL @@ -79,8 +79,12 @@ #define DEF_ST_TRY_TIMES 2 #define DEF_ST_COMPASS_RESULT_SHIFT 2 #define DEF_ST_ACCEL_RESULT_SHIFT 1 +#define DEF_ST_OTP0_THRESH 60 +#define DEF_ST_ABS_THRESH 20 +#define DEF_ST_TOR 2 -#define DEF_ST_COMPASS_WAIT (10*1000) +#define DEF_ST_COMPASS_WAIT_MIN (10 * 1000) +#define DEF_ST_COMPASS_WAIT_MAX (15 * 1000) #define DEF_ST_COMPASS_TRY_TIMES 10 #define DEF_ST_COMPASS_8963_SHIFT 2 @@ -103,7 +107,7 @@ static struct test_setup_t test_setup = { .gyro_sens = DEF_SELFTEST_GYRO_SENS, .sample_rate = DEF_SELFTEST_SAMPLE_RATE, - .lpf = DEF_SELFTEST_LPF_PARA, + .lpf = INV_FILTER_188HZ, .fsr = DEF_SELFTEST_GYRO_FULL_SCALE, .accl_fs = DEF_SELFTEST_ACCL_FULL_SCALE }; @@ -116,78 +120,148 @@ static const struct prod_rev_map_t prod_rev_map[] = { {MPL_PROD_KEY(0, 3), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 4), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 5), MPU_SILICON_REV_A2, 131, 16384}, - {MPL_PROD_KEY(0, 6), MPU_SILICON_REV_A2, 131, 16384}, /* (A2/C2-1) */ - /* prod_ver = 1, forced to 0 for MPU6050 A2 */ + {MPL_PROD_KEY(0, 6), MPU_SILICON_REV_A2, 131, 16384}, + /* prod_ver = 1 */ {MPL_PROD_KEY(0, 7), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 8), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 9), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 10), MPU_SILICON_REV_A2, 131, 16384}, - {MPL_PROD_KEY(0, 11), MPU_SILICON_REV_A2, 131, 16384}, /* (A2/D2-1) */ + {MPL_PROD_KEY(0, 11), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 12), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 13), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 14), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 15), MPU_SILICON_REV_A2, 131, 16384}, - {MPL_PROD_KEY(0, 27), MPU_SILICON_REV_A2, 131, 16384}, /* (A2/D4) */ + {MPL_PROD_KEY(0, 27), MPU_SILICON_REV_A2, 131, 16384}, /* prod_ver = 1 */ - {MPL_PROD_KEY(1, 16), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/D2-1) */ - {MPL_PROD_KEY(1, 17), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/D2-2) */ - {MPL_PROD_KEY(1, 18), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/D2-3) */ - {MPL_PROD_KEY(1, 19), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/D2-4) */ - {MPL_PROD_KEY(1, 20), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/D2-5) */ - {MPL_PROD_KEY(1, 28), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/D4) */ - {MPL_PROD_KEY(1, 1), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/E1-1) */ - {MPL_PROD_KEY(1, 2), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/E1-2) */ - {MPL_PROD_KEY(1, 3), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/E1-3) */ - {MPL_PROD_KEY(1, 4), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/E1-4) */ - {MPL_PROD_KEY(1, 5), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/E1-5) */ - {MPL_PROD_KEY(1, 6), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/E1-6) */ + {MPL_PROD_KEY(1, 16), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 17), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 18), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 19), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 20), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 28), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 1), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 2), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 3), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 4), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 5), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 6), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 2 */ - {MPL_PROD_KEY(2, 7), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E1-1) */ - {MPL_PROD_KEY(2, 8), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E1-2) */ - {MPL_PROD_KEY(2, 9), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E1-3) */ - {MPL_PROD_KEY(2, 10), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E1-4) */ - {MPL_PROD_KEY(2, 11), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E1-5) */ - {MPL_PROD_KEY(2, 12), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E1-6) */ - {MPL_PROD_KEY(2, 29), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/D4) */ + {MPL_PROD_KEY(2, 7), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(2, 8), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(2, 9), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(2, 10), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(2, 11), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(2, 12), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(2, 29), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 3 */ - {MPL_PROD_KEY(3, 30), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E2) */ + {MPL_PROD_KEY(3, 30), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 4 */ - {MPL_PROD_KEY(4, 31), MPU_SILICON_REV_B1, 131, 8192}, /* (B2/F1) */ - {MPL_PROD_KEY(4, 1), MPU_SILICON_REV_B1, 131, 8192}, /* (B3/F1) */ - {MPL_PROD_KEY(4, 3), MPU_SILICON_REV_B1, 131, 8192}, /* (B4/F1) */ + {MPL_PROD_KEY(4, 31), MPU_SILICON_REV_B1, 131, 8192}, + {MPL_PROD_KEY(4, 1), MPU_SILICON_REV_B1, 131, 8192}, + {MPL_PROD_KEY(4, 3), MPU_SILICON_REV_B1, 131, 8192}, /* prod_ver = 5 */ - {MPL_PROD_KEY(5, 3), MPU_SILICON_REV_B1, 131, 16384}, /* (B4/F1) */ + {MPL_PROD_KEY(5, 3), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 6 */ - {MPL_PROD_KEY(6, 19), MPU_SILICON_REV_B1, 131, 16384}, /* (B5/E2) */ + {MPL_PROD_KEY(6, 19), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 7 */ - {MPL_PROD_KEY(7, 19), MPU_SILICON_REV_B1, 131, 16384}, /* (B5/E2) */ + {MPL_PROD_KEY(7, 19), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 8 */ - {MPL_PROD_KEY(8, 19), MPU_SILICON_REV_B1, 131, 16384}, /* (B5/E2) */ + {MPL_PROD_KEY(8, 19), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 9 */ - {MPL_PROD_KEY(9, 19), MPU_SILICON_REV_B1, 131, 16384}, /* (B5/E2) */ + {MPL_PROD_KEY(9, 19), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 10 */ - {MPL_PROD_KEY(10, 19), MPU_SILICON_REV_B1, 131, 16384} /* (B5/E2) */ + {MPL_PROD_KEY(10, 19), MPU_SILICON_REV_B1, 131, 16384} }; /* - List of product software revisions - - NOTE : - software revision 0 falls back to the old detection method - based off the product version and product revision per the - table above +* List of product software revisions +* +* NOTE : +* software revision 0 falls back to the old detection method +* based off the product version and product revision per the +* table above */ static const struct prod_rev_map_t sw_rev_map[] = { {0, 0, 0, 0}, {1, MPU_SILICON_REV_B1, 131, 8192}, /* rev C */ {2, MPU_SILICON_REV_B1, 131, 16384} /* rev D */ }; +static const u16 accl_6500_st_tb[256] = { +655, 662, 669, 675, 682, 689, 696, 703, +710, 717, 724, 731, 738, 746, 753, 761, +768, 776, 784, 792, 800, 808, 816, 824, +832, 840, 849, 857, 866, 875, 883, 892, +901, 910, 919, 928, 938, 947, 957, 966, +976, 985, 995, 1005, 1015, 1026, 1036, 1046, +1057, 1067, 1078, 1089, 1099, 1110, 1122, 1133, +1144, 1156, 1167, 1179, 1191, 1202, 1215, 1227, +1239, 1251, 1264, 1276, 1289, 1302, 1315, 1328, +1342, 1355, 1369, 1382, 1396, 1410, 1424, 1438, +1453, 1467, 1482, 1497, 1512, 1527, 1542, 1558, +1573, 1589, 1605, 1621, 1637, 1653, 1670, 1687, +1703, 1720, 1738, 1755, 1773, 1790, 1808, 1826, +1845, 1863, 1882, 1900, 1920, 1939, 1958, 1978, +1997, 2017, 2038, 2058, 2079, 2099, 2120, 2142, +2163, 2185, 2206, 2228, 2251, 2273, 2296, 2319, +2342, 2366, 2389, 2413, 2437, 2462, 2486, 2511, +2536, 2562, 2587, 2613, 2639, 2666, 2692, 2719, +2746, 2774, 2802, 2830, 2858, 2886, 2915, 2944, +2974, 3004, 3034, 3064, 3095, 3126, 3157, 3188, +3220, 3253, 3285, 3318, 3351, 3385, 3418, 3453, +3487, 3522, 3557, 3593, 3629, 3665, 3702, 3739, +3776, 3814, 3852, 3891, 3929, 3969, 4008, 4048, +4089, 4130, 4171, 4213, 4255, 4298, 4341, 4384, +4428, 4472, 4517, 4562, 4608, 4654, 4700, 4747, +4795, 4843, 4891, 4940, 4989, 5039, 5090, 5140, +5192, 5244, 5296, 5349, 5403, 5457, 5511, 5566, +5622, 5678, 5735, 5792, 5850, 5909, 5968, 6028, +6088, 6149, 6210, 6272, 6335, 6398, 6462, 6527, +6592, 6658, 6725, 6792, 6860, 6929, 6998, 7068, +7139, 7210, 7282, 7355, 7428, 7503, 7578, 7653, +7730, 7807, 7885, 7964, 8044, 8124, 8206, 8288, +}; + +static const u16 gyro_6500_st_tb[256] = { +2621, 2648, 2674, 2701, 2728, 2755, 2783, 2811, +2839, 2867, 2896, 2925, 2954, 2983, 3013, 3043, +3074, 3105, 3136, 3167, 3199, 3231, 3263, 3296, +3329, 3362, 3395, 3429, 3464, 3498, 3533, 3569, +3604, 3640, 3677, 3714, 3751, 3788, 3826, 3864, +3903, 3942, 3981, 4021, 4061, 4102, 4143, 4185, +4226, 4269, 4311, 4354, 4398, 4442, 4486, 4531, +4577, 4622, 4669, 4715, 4762, 4810, 4858, 4907, +4956, 5005, 5055, 5106, 5157, 5209, 5261, 5313, +5366, 5420, 5474, 5529, 5584, 5640, 5696, 5753, +5811, 5869, 5928, 5987, 6047, 6107, 6168, 6230, +6292, 6355, 6419, 6483, 6548, 6613, 6680, 6746, +6814, 6882, 6951, 7020, 7091, 7161, 7233, 7305, +7378, 7452, 7527, 7602, 7678, 7755, 7832, 7911, +7990, 8070, 8150, 8232, 8314, 8397, 8481, 8566, +8652, 8738, 8826, 8914, 9003, 9093, 9184, 9276, +9369, 9462, 9557, 9653, 9749, 9847, 9945, 10044, +10145, 10246, 10349, 10452, 10557, 10662, 10769, 10877, +10985, 11095, 11206, 11318, 11432, 11546, 11661, 11778, +11896, 12015, 12135, 12256, 12379, 12502, 12627, 12754, +12881, 13010, 13140, 13272, 13404, 13538, 13674, 13810, +13949, 14088, 14229, 14371, 14515, 14660, 14807, 14955, +15104, 15255, 15408, 15562, 15718, 15875, 16034, 16194, +16356, 16519, 16685, 16851, 17020, 17190, 17362, 17536, +17711, 17888, 18067, 18248, 18430, 18614, 18801, 18989, +19179, 19370, 19564, 19760, 19957, 20157, 20358, 20562, +20768, 20975, 21185, 21397, 21611, 21827, 22045, 22266, +22488, 22713, 22940, 23170, 23401, 23635, 23872, 24111, +24352, 24595, 24841, 25089, 25340, 25594, 25850, 26108, +26369, 26633, 26899, 27168, 27440, 27714, 27992, 28271, +28554, 28840, 29128, 29419, 29714, 30011, 30311, 30614, +30920, 31229, 31542, 31857, 32176, 32497, 32822, 33151, +}; static const int accl_st_tb[31] = { 340, 351, 363, 375, 388, 401, 414, 428, 443, 458, 473, 489, 506, 523, 541, 559, 578, 597, 617, 638, 660, 682, 705, 729, 753, 779, 805, 832, 860, 889, 919}; + static const int gyro_6050_st_tb[31] = { 3275, 3425, 3583, 3748, 3920, 4100, 4289, 4486, 4693, 4909, 5134, 5371, 5618, 5876, 6146, 6429, @@ -227,19 +301,33 @@ static const int gyro_3500_st_tb[255] = { 28538, 28823, 29112, 29403, 29697, 29994, 30294, 30597, 30903, 31212, 31524, 31839, 32157, 32479, 32804}; -int mpu_memory_write(struct i2c_adapter *i2c_adap, - unsigned char mpu_addr, - unsigned short mem_addr, - unsigned int len, unsigned char const *data) +char *wr_pr_debug_begin(u8 const *data, u32 len, char *string) +{ + int ii; + string = kmalloc(len * 2 + 1, GFP_KERNEL); + for (ii = 0; ii < len; ii++) + sprintf(&string[ii * 2], "%02X", data[ii]); + string[len * 2] = 0; + return string; +} + +char *wr_pr_debug_end(char *string) +{ + kfree(string); + return ""; +} + +int mpu_memory_write(struct inv_mpu_iio_s *st, u8 mpu_addr, u16 mem_addr, + u32 len, u8 const *data) { - unsigned char bank[2]; - unsigned char addr[2]; - unsigned char buf[513]; + u8 bank[2]; + u8 addr[2]; + u8 buf[513]; struct i2c_msg msgs[3]; int res; - if (!data || !i2c_adap) + if (!data || !st) return -EINVAL; if (len >= (sizeof(buf) - 1)) @@ -248,7 +336,7 @@ int mpu_memory_write(struct i2c_adapter *i2c_adap, bank[0] = REG_BANK_SEL; bank[1] = mem_addr >> 8; - addr[0] = REG_MEM_START; + addr[0] = REG_MEM_START_ADDR; addr[1] = mem_addr & 0xFF; buf[0] = REG_MEM_RW; @@ -267,36 +355,48 @@ int mpu_memory_write(struct i2c_adapter *i2c_adap, msgs[2].addr = mpu_addr; msgs[2].flags = 0; - msgs[2].buf = (unsigned char *)buf; + msgs[2].buf = (u8 *)buf; msgs[2].len = len + 1; - res = i2c_transfer(i2c_adap, msgs, 3); + INV_I2C_INC_MPUWRITE(3 + 3 + (2 + len)); +#if CONFIG_DYNAMIC_DEBUG + { + char *write = 0; + pr_debug("%s WM%02X%02X%02X%s%s - %d\n", st->hw->name, + mpu_addr, bank[1], addr[1], + wr_pr_debug_begin(data, len, write), + wr_pr_debug_end(write), + len); + } +#endif + + res = i2c_transfer(st->sl_handle, msgs, 3); if (res != 3) { if (res >= 0) res = -EIO; return res; - } else + } else { return 0; + } } -int mpu_memory_read(struct i2c_adapter *i2c_adap, - unsigned char mpu_addr, - unsigned short mem_addr, - unsigned int len, unsigned char *data) + +int mpu_memory_read(struct inv_mpu_iio_s *st, u8 mpu_addr, u16 mem_addr, + u32 len, u8 *data) { - unsigned char bank[2]; - unsigned char addr[2]; - unsigned char buf; + u8 bank[2]; + u8 addr[2]; + u8 buf; struct i2c_msg msgs[4]; int res; - if (!data || !i2c_adap) + if (!data || !st) return -EINVAL; bank[0] = REG_BANK_SEL; bank[1] = mem_addr >> 8; - addr[0] = REG_MEM_START; + addr[0] = REG_MEM_START_ADDR; addr[1] = mem_addr & 0xFF; buf = REG_MEM_RW; @@ -322,53 +422,129 @@ int mpu_memory_read(struct i2c_adapter *i2c_adap, msgs[3].buf = data; msgs[3].len = len; - res = i2c_transfer(i2c_adap, msgs, 4); + res = i2c_transfer(st->sl_handle, msgs, 4); if (res != 4) { if (res >= 0) res = -EIO; - return res; } else - return 0; + res = 0; + + INV_I2C_INC_MPUWRITE(3 + 3 + 3); + INV_I2C_INC_MPUREAD(len); +#if CONFIG_DYNAMIC_DEBUG + { + char *read = 0; + pr_debug("%s RM%02X%02X%02X%02X - %s%s\n", st->hw->name, + mpu_addr, bank[1], addr[1], len, + wr_pr_debug_begin(data, len, read), + wr_pr_debug_end(read)); + } +#endif + + return res; +} + +int mpu_memory_write_unaligned(struct inv_mpu_iio_s *st, u16 key, int len, + u8 const *d) +{ + u32 addr; + int start, end; + int len1, len2; + int result = 0; + if (len > MPU_MEM_BANK_SIZE) + return -EINVAL; + addr = inv_dmp_get_address(key); + if (addr > MPU6XXX_MAX_MPU_MEM) + return -EINVAL; + start = (addr >> 8); + end = ((addr + len - 1) >> 8); + if (start == end) { + result = mpu_memory_write(st, st->i2c_addr, addr, len, d); + } else { + end <<= 8; + len1 = end - addr; + len2 = len - len1; + result = mpu_memory_write(st, st->i2c_addr, addr, len1, d); + result |= mpu_memory_write(st, st->i2c_addr, end, len2, + d + len1); + } + + return result; } /** - * @internal - * @brief Inverse lookup of the index of an MPL product key . - * @param key - * the MPL product indentifier also referred to as 'key'. - * @return the index position of the key in the array, -1 if not found. + * index_of_key()- Inverse lookup of the index of an MPL product key . + * @key: the MPL product indentifier also referred to as 'key'. */ -static short index_of_key(unsigned short key) +static short index_of_key(u16 key) { int i; for (i = 0; i < NUM_OF_PROD_REVS; i++) if (prod_rev_map[i].mpl_product_key == key) return (short)i; - return -1; + return -EINVAL; +} + +int inv_get_silicon_rev_mpu6500(struct inv_mpu_iio_s *st) +{ + struct inv_chip_info_s *chip_info = &st->chip_info; + int result; + u8 whoami, sw_rev; + + result = inv_i2c_read(st, REG_WHOAMI, 1, &whoami); + if (result) + return result; + if (whoami != MPU6500_ID && whoami != MPU9250_ID) + return -EINVAL; + + /*memory read need more time after power up */ + msleep(POWER_UP_TIME); + result = mpu_memory_read(st, st->i2c_addr, + MPU6500_MEM_REV_ADDR, 1, &sw_rev); + if (sw_rev == 0) { + pr_warning("Rev 0 of MPU6500\n"); + pr_warning("can't sit with other devices in same I2C bus\n"); + } + if (result) + return result; + if (sw_rev > MPU6500_REV) + return -EINVAL; + + /* these values are place holders and not real values */ + chip_info->product_id = MPU6500_PRODUCT_REVISION; + chip_info->product_revision = MPU6500_PRODUCT_REVISION; + chip_info->silicon_revision = MPU6500_PRODUCT_REVISION; + chip_info->software_revision = sw_rev; + chip_info->gyro_sens_trim = DEFAULT_GYRO_TRIM; + chip_info->accl_sens_trim = DEFAULT_ACCL_TRIM; + chip_info->multi = 1; + + return 0; } -int inv_get_silicon_rev_mpu6050(struct inv_gyro_state_s *st) +int inv_get_silicon_rev_mpu6050(struct inv_mpu_iio_s *st) { int result; struct inv_reg_map_s *reg; - unsigned char prod_ver = 0x00, prod_rev = 0x00; + u8 prod_ver = 0x00, prod_rev = 0x00; struct prod_rev_map_t *p_rev; - unsigned char bank = + u8 bank = (BIT_PRFTCH_EN | BIT_CFG_USER_BANK | MPU_MEM_OTP_BANK_0); - unsigned short mem_addr = ((bank << 8) | MEM_ADDR_PROD_REV); - unsigned short key; - unsigned char regs[5]; - unsigned short sw_rev; + u16 mem_addr = ((bank << 8) | MEM_ADDR_PROD_REV); + u16 key; + u8 regs[5]; + u16 sw_rev; short index; struct inv_chip_info_s *chip_info = &st->chip_info; reg = &st->reg; - result = inv_i2c_read(st, reg->product_id, 1, &prod_ver); + result = inv_i2c_read(st, REG_PRODUCT_ID, 1, &prod_ver); if (result) return result; prod_ver &= 0xf; + /*memory read need more time after power up */ msleep(POWER_UP_TIME); - result = mpu_memory_read(st->sl_handle, st->i2c_addr, mem_addr, + result = mpu_memory_read(st, st->i2c_addr, mem_addr, 1, &prod_rev); if (result) return result; @@ -390,19 +566,20 @@ int inv_get_silicon_rev_mpu6050(struct inv_gyro_state_s *st) if (sw_rev == 0) { key = MPL_PROD_KEY(prod_ver, prod_rev); if (key == 0) - return -1; + return -EINVAL; index = index_of_key(key); - if (index == -1 || index >= NUM_OF_PROD_REVS) - return -1; + if (index < 0 || index >= NUM_OF_PROD_REVS) + return -EINVAL; /* check MPL is compiled for this device */ if (prod_rev_map[index].silicon_rev != MPU_SILICON_REV_B1) - return -1; + return -EINVAL; p_rev = (struct prod_rev_map_t *)&prod_rev_map[index]; /* if valid, use the software product key */ - } else if (sw_rev < ARRAY_SIZE(sw_rev_map)) + } else if (sw_rev < ARRAY_SIZE(sw_rev_map)) { p_rev = (struct prod_rev_map_t *)&sw_rev_map[sw_rev]; - else - return -1; + } else { + return -EINVAL; + } chip_info->product_id = prod_ver; chip_info->product_revision = prod_rev; chip_info->silicon_revision = p_rev->silicon_rev; @@ -411,37 +588,39 @@ int inv_get_silicon_rev_mpu6050(struct inv_gyro_state_s *st) chip_info->accl_sens_trim = p_rev->accel_trim; if (chip_info->accl_sens_trim == 0) chip_info->accl_sens_trim = DEFAULT_ACCL_TRIM; - chip_info->multi = DEFAULT_ACCL_TRIM/chip_info->accl_sens_trim; + chip_info->multi = DEFAULT_ACCL_TRIM / chip_info->accl_sens_trim; if (chip_info->multi != 1) - pr_err("multi is %d\n", chip_info->multi); + pr_info("multi is %d\n", chip_info->multi); return result; } + /** - * @internal - * @brief read the accelerometer hardware self-test bias shift calculated - * during final production test and stored in chip non-volatile memory. - * @param st - * serial interface handle to allow serial communication with the - * device, both gyro and accelerometer. - * @param ct_shift_prod - * A pointer to an array of 3 float elements to hold the values + * read_accel_hw_self_test_prod_shift()- read the accelerometer hardware + * self-test bias shift calculated + * during final production test and + * stored in chip non-volatile memory. + * @st: main data structure. + * @st_prod: A pointer to an array of 3 elements to hold the values * for production hardware self-test bias shifts returned to the * user. - * @return 0 on success, or a non-zero error code otherwise. */ -static int read_accel_hw_self_test_prod_shift(struct inv_gyro_state_s *st, +static int read_accel_hw_self_test_prod_shift(struct inv_mpu_iio_s *st, int *st_prod) { - unsigned char regs[4]; - unsigned char shift_code[3]; + u8 regs[4]; + u8 shift_code[3]; int result, i; - st_prod[0] = st_prod[1] = st_prod[2] = 0; + + st_prod[0] = 0; + st_prod[1] = 0; + st_prod[2] = 0; result = inv_i2c_read(st, REG_ST_GCT_X, ARRAY_SIZE(regs), regs); + if (result) return result; if ((0 == regs[0]) && (0 == regs[1]) && - (0 == regs[2]) && (0 == regs[3])) - return -1; + (0 == regs[2]) && (0 == regs[3])) + return -EINVAL; shift_code[X] = ((regs[0] & 0xE0) >> 3) | ((regs[3] & 0x30) >> 4); shift_code[Y] = ((regs[1] & 0xE0) >> 3) | ((regs[3] & 0x0C) >> 2); shift_code[Z] = ((regs[2] & 0xE0) >> 3) | (regs[3] & 0x03); @@ -450,31 +629,41 @@ static int read_accel_hw_self_test_prod_shift(struct inv_gyro_state_s *st, st_prod[i] = test_setup.accl_sens[i]* accl_st_tb[shift_code[i] - 1]; } + return 0; } -static int inv_check_accl_self_test(struct inv_gyro_state_s *st, +/** +* inv_check_accl_self_test()- check accel self test. this function returns +* zero as success. A non-zero return value +* indicates failure in self test. +* @*st: main data structure. +* @*reg_avg: average value of normal test. +* @*st_avg: average value of self test +*/ +static int inv_check_accl_self_test(struct inv_mpu_iio_s *st, int *reg_avg, int *st_avg){ int gravity, reg_z_avg, g_z_sign, fs, j, ret_val; int tmp1; int st_shift_prod[THREE_AXIS], st_shift_cust[THREE_AXIS]; int st_shift_ratio[THREE_AXIS]; + if (st->chip_info.software_revision < DEF_OLDEST_SUPP_SW_REV && - st->chip_info.product_revision < DEF_OLDEST_SUPP_PROD_REV) + st->chip_info.product_revision < DEF_OLDEST_SUPP_PROD_REV) return 0; fs = DEF_ST_ACCL_FULL_SCALE; /* assume +/- 2 mg as typical */ g_z_sign = 1; ret_val = 0; - test_setup.accl_sens[X] = (unsigned int)(DEF_ST_SCALE * + test_setup.accl_sens[X] = (u32)(DEF_ST_SCALE * DEF_ST_PRECISION / fs); - test_setup.accl_sens[Y] = (unsigned int)(DEF_ST_SCALE * + test_setup.accl_sens[Y] = (u32)(DEF_ST_SCALE * DEF_ST_PRECISION / fs); - test_setup.accl_sens[Z] = (unsigned int)(DEF_ST_SCALE * + test_setup.accl_sens[Z] = (u32)(DEF_ST_SCALE * DEF_ST_PRECISION / fs); if (MPL_PROD_KEY(st->chip_info.product_id, - st->chip_info.product_revision) == - MPU_PRODUCT_KEY_B1_E1_5) { + st->chip_info.product_revision) == + MPU_PRODUCT_KEY_B1_E1_5) { /* half sensitivity Z accelerometer parts */ test_setup.accl_sens[Z] /= 2; } else { @@ -484,35 +673,47 @@ static int inv_check_accl_self_test(struct inv_gyro_state_s *st, test_setup.accl_sens[Z] /= st->chip_info.multi; } gravity = test_setup.accl_sens[Z]; - reg_z_avg = reg_avg[Z] - g_z_sign * gravity*DEF_ST_PRECISION; - read_accel_hw_self_test_prod_shift(st, st_shift_prod); + reg_z_avg = reg_avg[Z] - g_z_sign * gravity * DEF_ST_PRECISION; + ret_val = read_accel_hw_self_test_prod_shift(st, st_shift_prod); + if (ret_val) + return ret_val; + for (j = 0; j < 3; j++) { st_shift_cust[j] = abs(reg_avg[j] - st_avg[j]); if (st_shift_prod[j]) { tmp1 = st_shift_prod[j]/DEF_ST_PRECISION; - st_shift_ratio[j] = st_shift_cust[j]/tmp1 - - DEF_ST_PRECISION; + st_shift_ratio[j] = abs(st_shift_cust[j]/tmp1 + - DEF_ST_PRECISION); if (st_shift_ratio[j] > DEF_ACCEL_ST_SHIFT_DELTA) ret_val |= 1 << j; - if (st_shift_ratio[j] < -DEF_ACCEL_ST_SHIFT_DELTA) - ret_val |= 1 << j; } else { if (st_shift_cust[j] < - DEF_ACCEL_ST_SHIFT_MIN*gravity) + DEF_ACCEL_ST_SHIFT_MIN * gravity) ret_val |= 1 << j; if (st_shift_cust[j] > - DEF_ACCEL_ST_SHIFT_MAX*gravity) + DEF_ACCEL_ST_SHIFT_MAX * gravity) ret_val |= 1 << j; } } + return ret_val; } -static int inv_check_3500_gyro_self_test(struct inv_gyro_state_s *st, + +/** +* inv_check_3500_gyro_self_test() check gyro self test. this function returns +* zero as success. A non-zero return value +* indicates failure in self test. +* @*st: main data structure. +* @*reg_avg: average value of normal test. +* @*st_avg: average value of self test +*/ + +static int inv_check_3500_gyro_self_test(struct inv_mpu_iio_s *st, int *reg_avg, int *st_avg){ int result; int gst[3], ret_val; int gst_otp[3], i; - unsigned char st_code[THREE_AXIS]; + u8 st_code[THREE_AXIS]; ret_val = 0; for (i = 0; i < 3; i++) @@ -520,90 +721,172 @@ static int inv_check_3500_gyro_self_test(struct inv_gyro_state_s *st, result = inv_i2c_read(st, REG_3500_OTP, THREE_AXIS, st_code); if (result) return result; - gst_otp[0] = gst_otp[1] = gst_otp[2] = 0; + gst_otp[0] = 0; + gst_otp[1] = 0; + gst_otp[2] = 0; for (i = 0; i < 3; i++) { if (st_code[i] != 0) gst_otp[i] = gyro_3500_st_tb[st_code[i] - 1]; } + /* check self test value passing criterion. Using the DEF_ST_TOR + * for certain degree of tolerance */ for (i = 0; i < 3; i++) { if (gst_otp[i] == 0) { - if (abs(gst[i])*4 < 60*2*DEF_ST_PRECISION* - DEF_GYRO_SCALE) - ret_val |= (1< DEF_GYRO_CT_SHIFT_DELTA) - ret_val |= (1< 20*2*DEF_ST_PRECISION*DEF_GYRO_SCALE) - ret_val |= (1< DEF_ST_TOR * DEF_ST_ABS_THRESH * + DEF_ST_PRECISION * DEF_GYRO_SCALE) + ret_val |= (1 << i); } return ret_val; } -static int inv_check_6050_gyro_self_test(struct inv_gyro_state_s *st, + +/** +* inv_check_6050_gyro_self_test() - check 6050 gyro self test. this function +* returns zero as success. A non-zero return +* value indicates failure in self test. +* @*st: main data structure. +* @*reg_avg: average value of normal test. +* @*st_avg: average value of self test +*/ +static int inv_check_6050_gyro_self_test(struct inv_mpu_iio_s *st, int *reg_avg, int *st_avg){ int result; int ret_val; int ct_shift_prod[3], st_shift_cust[3], st_shift_ratio[3], i; - unsigned char regs[3]; + u8 regs[3]; + if (st->chip_info.software_revision < DEF_OLDEST_SUPP_SW_REV && - st->chip_info.product_revision < DEF_OLDEST_SUPP_PROD_REV) + st->chip_info.product_revision < DEF_OLDEST_SUPP_PROD_REV) return 0; ret_val = 0; result = inv_i2c_read(st, REG_ST_GCT_X, 3, regs); + if (result) + return result; regs[X] &= 0x1f; regs[Y] &= 0x1f; regs[Z] &= 0x1f; - for (i = 0; i < 3; i++) { if (regs[i] != 0) ct_shift_prod[i] = gyro_6050_st_tb[regs[i] - 1]; else ct_shift_prod[i] = 0; } + + for (i = 0; i < 3; i++) { st_shift_cust[i] = abs(reg_avg[i] - st_avg[i]); if (ct_shift_prod[i]) { - st_shift_ratio[i] = st_shift_cust[i]/ - ct_shift_prod[i] - DEF_ST_PRECISION; + st_shift_ratio[i] = abs(st_shift_cust[i] / + ct_shift_prod[i] - DEF_ST_PRECISION); if (st_shift_ratio[i] > DEF_GYRO_CT_SHIFT_DELTA) ret_val |= 1 << i; - if (st_shift_ratio[i] < -DEF_GYRO_CT_SHIFT_DELTA) - ret_val |= 1 << i; } else { - if (st_shift_cust[i] < DEF_ST_PRECISION* - DEF_GYRO_CT_SHIFT_MIN*test_setup.gyro_sens) + if (st_shift_cust[i] < DEF_ST_PRECISION * + DEF_GYRO_CT_SHIFT_MIN * test_setup.gyro_sens) ret_val |= 1 << i; - if (st_shift_cust[i] > DEF_ST_PRECISION* - DEF_GYRO_CT_SHIFT_MAX*test_setup.gyro_sens) + if (st_shift_cust[i] > DEF_ST_PRECISION * + DEF_GYRO_CT_SHIFT_MAX * test_setup.gyro_sens) ret_val |= 1 << i; } } + /* check for absolute value passing criterion. Using DEF_ST_TOR + * for certain degree of tolerance */ + for (i = 0; i < 3; i++) { + if (abs(reg_avg[i]) > DEF_ST_TOR * DEF_ST_ABS_THRESH * + DEF_ST_PRECISION * DEF_GYRO_SCALE) + ret_val |= (1 << i); + } + + return ret_val; +} + +/** +* inv_check_6500_self_test() - check 6050 gyro self test. this function +* returns zero as success. A non-zero return +* value indicates failure in self test. +* @*st: main data structure. +* @*reg_avg: average value of normal test. +* @*st_avg: average value of self test +* @is_gyro: switch for gyro/accl. +*/ +static int inv_check_6500_self_test(struct inv_mpu_iio_s *st, + int *reg_avg, int *st_avg, bool is_gyro) +{ + int ret_val, result; + int ct_shift_prod[3], st_shift_cust[3], st_shift_ratio[3], i; + u8 regs[3]; + const u16 *st_tb; + + ret_val = 0; + if (is_gyro) { + st_tb = gyro_6500_st_tb; + result = inv_i2c_read(st, REG_6500_XG_ST_DATA, 3, regs); + } else { + st_tb = accl_6500_st_tb; + result = inv_i2c_read(st, REG_6500_XA_ST_DATA, 3, regs); + } + pr_debug("isgyro=%d, OTP:%d, %d, %d\n", is_gyro, regs[0], + regs[1], regs[2]); + + for (i = 0; i < 3; i++) { + if (regs[i] != 0) + ct_shift_prod[i] = st_tb[regs[i] - 1]; + else + ct_shift_prod[i] = 0; + } + for (i = 0; i < 3; i++) { - if (abs(reg_avg[i])*4 > 20*2*DEF_ST_PRECISION*DEF_GYRO_SCALE) - ret_val |= (1<self_test.threshold); + if (st_shift_ratio[i] > ST_THRESHOLD_MULTIPLIER * + st->self_test.threshold) + ret_val |= 1 << i; + } } + return ret_val; } /** * inv_do_test() - do the actual test of self testing */ -int inv_do_test(struct inv_gyro_state_s *st, int self_test_flag, +int inv_do_test(struct inv_mpu_iio_s *st, int self_test_flag, int *gyro_result, int *accl_result) { struct inv_reg_map_s *reg; int result, i, j, packet_size; - unsigned char data[BYTES_PER_SENSOR * 2], has_accl; - int fifo_count, packet_count, ind; + u8 data[BYTES_PER_SENSOR * 2], d; + bool has_accl; + int fifo_count, packet_count, ind, s; reg = &st->reg; has_accl = (st->chip_type != INV_ITG3500); - packet_size = BYTES_PER_SENSOR*(1 + has_accl); + if (has_accl) + packet_size = BYTES_PER_SENSOR * 2; + else + packet_size = BYTES_PER_SENSOR; result = inv_i2c_single_write(st, reg->int_enable, 0); if (result) @@ -638,7 +921,7 @@ int inv_do_test(struct inv_gyro_state_s *st, int self_test_flag, if (result) return result; } - /*wait for the output to stable*/ + /* wait for the output to get stable */ if (self_test_flag) msleep(DEF_ST_STABLE_TIME); @@ -647,115 +930,130 @@ int inv_do_test(struct inv_gyro_state_s *st, int self_test_flag, if (result) return result; /* enable sensor output to FIFO */ - result = inv_i2c_single_write(st, reg->fifo_en, BITS_GYRO_OUT - | (has_accl << 3)); - if (result) - return result; - mdelay(DEF_GYRO_WAIT_TIME); - /* stop sending data to FIFO */ - result = inv_i2c_single_write(st, reg->fifo_en, 0); - if (result) - return result; - result = inv_i2c_read(st, reg->fifo_count_h, 2, data); + if (has_accl) + d = BITS_GYRO_OUT | BIT_ACCEL_OUT; + else + d = BITS_GYRO_OUT; + result = inv_i2c_single_write(st, reg->fifo_en, d); if (result) return result; - fifo_count = (data[0] << 8) + data[1]; - packet_count = fifo_count/packet_size; - gyro_result[0] = gyro_result[1] = gyro_result[2] = 0; - accl_result[0] = accl_result[1] = accl_result[2] = 0; - if (abs(packet_count - DEF_GYRO_PACKET_THRESH) > DEF_GYRO_THRESH) - return -EAGAIN; - - for (i = 0; i < packet_count; i++) { - /* getting FIFO data */ - result = inv_i2c_read(st, reg->fifo_r_w, - packet_size, data); + + for (i = 0; i < THREE_AXIS; i++) { + gyro_result[i] = 0; + accl_result[i] = 0; + } + s = 0; + while (s < st->self_test.samples) { + mdelay(DEF_GYRO_WAIT_TIME); + result = inv_i2c_read(st, reg->fifo_count_h, + FIFO_COUNT_BYTE, data); if (result) + return result; + fifo_count = be16_to_cpup((__be16 *)(&data[0])); + packet_count = fifo_count / packet_size; + result = inv_i2c_read(st, reg->fifo_r_w, packet_size, data); + if (result) + return result; + i = 0; + while ((i < packet_count) && (s < st->self_test.samples)) { + result = inv_i2c_read(st, reg->fifo_r_w, + packet_size, data); + if (result) return result; - ind = 0; - if (has_accl) { + ind = 0; + if (has_accl) { + for (j = 0; j < THREE_AXIS; j++) + accl_result[j] += + (short)be16_to_cpup( + (__be16 *)(&data[ind + 2 * j])); + ind += BYTES_PER_SENSOR; + } for (j = 0; j < THREE_AXIS; j++) - accl_result[j] += - (short)be16_to_cpup((__be16 - *)(&data[ind + 2*j])); - ind += 6; + gyro_result[j] += + (short)be16_to_cpup( + (__be16 *)(&data[ind + 2 * j])); + s++; + i++; } - for (j = 0; j < THREE_AXIS; j++) - gyro_result[j] += - (short)be16_to_cpup((__be16 *)(&data[ind + 2*j])); } - gyro_result[0] = gyro_result[0]*DEF_ST_PRECISION/packet_count; - gyro_result[1] = gyro_result[1]*DEF_ST_PRECISION/packet_count; - gyro_result[2] = gyro_result[2]*DEF_ST_PRECISION/packet_count; + /* stop sending data to FIFO */ + result = inv_i2c_single_write(st, reg->fifo_en, 0); + if (result) + return result; + for (j = 0; j < THREE_AXIS; j++) { + gyro_result[j] = gyro_result[j]/s; + gyro_result[j] *= DEF_ST_PRECISION; + } + if (has_accl) { - accl_result[0] = - accl_result[0]*DEF_ST_PRECISION/packet_count; - accl_result[1] = - accl_result[1]*DEF_ST_PRECISION/packet_count; - accl_result[2] = - accl_result[2]*DEF_ST_PRECISION/packet_count; + for (j = 0; j < THREE_AXIS; j++) { + accl_result[j] = accl_result[j]/s; + accl_result[j] *= DEF_ST_PRECISION; + } } return 0; } + /** * inv_recover_setting() recover the old settings after everything is done */ -static void inv_recover_setting(struct inv_gyro_state_s *st) +void inv_recover_setting(struct inv_mpu_iio_s *st) { struct inv_reg_map_s *reg; int data; - struct iio_dev *indio = iio_priv_to_dev(st); reg = &st->reg; - set_inv_enable(indio, st->chip_config.enable); inv_i2c_single_write(st, reg->gyro_config, - st->chip_config.fsr<chip_config.fsr << GYRO_CONFIG_FSR_SHIFT); inv_i2c_single_write(st, reg->lpf, st->chip_config.lpf); - data = ONE_K_HZ/st->chip_config.fifo_rate - 1; + data = ONE_K_HZ/st->chip_config.new_fifo_rate - 1; inv_i2c_single_write(st, reg->sample_rate_div, data); if (INV_ITG3500 != st->chip_type) { inv_i2c_single_write(st, reg->accl_config, - (st->chip_config.accl_fs << ACCL_CONFIG_FSR_SHIFT)); + (st->chip_config.accl_fs << + ACCL_CONFIG_FSR_SHIFT)); } - if (st->chip_config.is_asleep) - inv_set_power_state(st, 0); - else - inv_set_power_state(st, 1); + st->switch_gyro_engine(st, false); + st->switch_accl_engine(st, false); + st->set_power_state(st, false); } -static int inv_check_compass_self_test(struct inv_gyro_state_s *st) + +static int inv_check_compass_self_test(struct inv_mpu_iio_s *st) { int result; - unsigned char data[6]; - unsigned char counter, cntl; + u8 data[6]; + u8 counter, cntl; short x, y, z; - unsigned char *sens; + u8 *sens; sens = st->chip_info.compass_sens; - /*set to bypass mode */ - result = inv_i2c_single_write(st, REG_INT_PIN_CFG, BIT_BYPASS_EN); + /* set to bypass mode */ + result = inv_i2c_single_write(st, REG_INT_PIN_CFG, + st->plat_data.int_config | BIT_BYPASS_EN); if (result) { - inv_i2c_single_write(st, REG_INT_PIN_CFG, 0x0); + result = inv_i2c_single_write(st, REG_INT_PIN_CFG, + st->plat_data.int_config); return result; } - /*set to power down mode */ - result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PW_DN); + /* set to power down mode */ + result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PD); if (result) goto AKM_fail; - /*write 1 to ASTC register */ + /* write 1 to ASTC register */ result = inv_secondary_write(REG_AKM_ST_CTRL, DATA_AKM_SELF_TEST); if (result) goto AKM_fail; - /*set self test mode */ - result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PW_ST); + /* set self test mode */ + result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_ST); if (result) goto AKM_fail; counter = DEF_ST_COMPASS_TRY_TIMES; while (counter > 0) { - usleep_range(DEF_ST_COMPASS_WAIT, DEF_ST_COMPASS_WAIT); + usleep_range(DEF_ST_COMPASS_WAIT_MIN, DEF_ST_COMPASS_WAIT_MAX); result = inv_secondary_read(REG_AKM_STATUS, 1, data); if (result) goto AKM_fail; @@ -765,7 +1063,7 @@ static int inv_check_compass_self_test(struct inv_gyro_state_s *st) counter = 0; } if ((data[0] & DATA_AKM_DRDY) == 0) { - result = -1; + result = -EINVAL; goto AKM_fail; } result = inv_secondary_read(REG_AKM_MEASURE_DATA, @@ -773,9 +1071,9 @@ static int inv_check_compass_self_test(struct inv_gyro_state_s *st) if (result) goto AKM_fail; - x = le16_to_cpup((__le16 *)(&data[0])); - y = le16_to_cpup((__le16 *)(&data[2])); - z = le16_to_cpup((__le16 *)(&data[4])); + x = le16_to_cpup((__le16 *)(&data[0])); + y = le16_to_cpup((__le16 *)(&data[2])); + z = le16_to_cpup((__le16 *)(&data[4])); x = ((x * (sens[0] + 128)) >> 8); y = ((y * (sens[1] + 128)) >> 8); z = ((z * (sens[2] + 128)) >> 8); @@ -789,7 +1087,7 @@ static int inv_check_compass_self_test(struct inv_gyro_state_s *st) z <<= DEF_ST_COMPASS_8963_SHIFT; } } - result = 1; + result = -EINVAL; if (x > st->compass_st_upper[X] || x < st->compass_st_lower[X]) goto AKM_fail; if (y > st->compass_st_upper[Y] || y < st->compass_st_lower[Y]) @@ -801,43 +1099,50 @@ static int inv_check_compass_self_test(struct inv_gyro_state_s *st) /*write 0 to ASTC register */ result |= inv_secondary_write(REG_AKM_ST_CTRL, 0); /*set to power down mode */ - result |= inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PW_DN); + result |= inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PD); /*restore to non-bypass mode */ - result |= inv_i2c_single_write(st, REG_INT_PIN_CFG, 0x0); + result |= inv_i2c_single_write(st, REG_INT_PIN_CFG, + st->plat_data.int_config); return result; } -static int inv_power_up_self_test(struct inv_gyro_state_s *st) + +int inv_power_up_self_test(struct inv_mpu_iio_s *st) { int result; - result = inv_i2c_single_write(st, st->reg.pwr_mgmt_1, INV_CLK_PLL); + + result = st->set_power_state(st, true); if (result) return result; - msleep(POWER_UP_TIME); - result = inv_i2c_single_write(st, st->reg.pwr_mgmt_2, 0); + result = st->switch_accl_engine(st, true); if (result) return result; - msleep(POWER_UP_TIME); + result = st->switch_gyro_engine(st, true); + if (result) + return result; + return 0; } + /** * inv_hw_self_test() - main function to do hardware self test */ -int inv_hw_self_test(struct inv_gyro_state_s *st) +int inv_hw_self_test(struct inv_mpu_iio_s *st) { int result; int gyro_bias_st[THREE_AXIS], gyro_bias_regular[THREE_AXIS]; int accl_bias_st[THREE_AXIS], accl_bias_regular[THREE_AXIS]; int test_times; char compass_result, accel_result, gyro_result; - if (st->chip_config.is_asleep || st->chip_config.lpa_mode) { - result = inv_power_up_self_test(st); - if (result) - return result; - } - compass_result = accel_result = gyro_result = 0; + + result = inv_power_up_self_test(st); + if (result) + return result; + compass_result = 0; + accel_result = 0; + gyro_result = 0; test_times = DEF_ST_TRY_TIMES; while (test_times > 0) { - result = inv_do_test(st, 0, gyro_bias_regular, + result = inv_do_test(st, 0, gyro_bias_regular, accl_bias_regular); if (result == -EAGAIN) test_times--; @@ -864,22 +1169,42 @@ int inv_hw_self_test(struct inv_gyro_state_s *st) } else { if (st->chip_config.has_compass) compass_result = !inv_check_compass_self_test(st); - accel_result = !inv_check_accl_self_test(st, - accl_bias_regular, accl_bias_st); - gyro_result = !inv_check_6050_gyro_self_test(st, - gyro_bias_regular, gyro_bias_st); + + if (INV_MPU6050 == st->chip_type) { + accel_result = !inv_check_accl_self_test(st, + accl_bias_regular, accl_bias_st); + gyro_result = !inv_check_6050_gyro_self_test(st, + gyro_bias_regular, gyro_bias_st); + } else if (INV_MPU6500 == st->chip_type) { + accel_result = !inv_check_6500_self_test(st, + accl_bias_regular, accl_bias_st, false); + gyro_result = !inv_check_6500_self_test(st, + gyro_bias_regular, gyro_bias_st, true); + } } test_fail: inv_recover_setting(st); - return (compass_result< 0; bank++, @@ -899,13 +1224,13 @@ static int inv_load_firmware(struct inv_gyro_state_s *st, return 0; } -static int inv_verify_firmware(struct inv_gyro_state_s *st, - unsigned char *data, int size) +static int inv_verify_firmware(struct inv_mpu_iio_s *st, + u8 *data, int size) { int bank, write_size; int result; - unsigned short memaddr; - unsigned char firmware[MPU_MEM_BANK_SIZE]; + u16 memaddr; + u8 firmware[MPU_MEM_BANK_SIZE]; /* Write and verify memory */ for (bank = 0; size > 0; bank++, @@ -917,7 +1242,7 @@ static int inv_verify_firmware(struct inv_gyro_state_s *st, write_size = size; memaddr = ((bank << 8) | 0x00); - result = mpu_memory_read(st->sl_handle, + result = mpu_memory_read(st, st->i2c_addr, memaddr, write_size, firmware); if (result) return result; @@ -927,18 +1252,18 @@ static int inv_verify_firmware(struct inv_gyro_state_s *st, return 0; } -static int inv_set_fifo_div(struct inv_gyro_state_s *st, - unsigned short fifoRate) +static int inv_set_fifo_div(struct inv_mpu_iio_s *st, + u16 fifoRate) { - unsigned char regs[2]; + u8 regs[2]; int result = 0; /*For some reason DINAC4 is defined as 0xb8, but DINBC4 is not*/ - const unsigned char regs_end[12] = {DINAFE, DINAF2, DINAAB, 0xc4, + const u8 regs_end[] = {DINAFE, DINAF2, DINAAB, 0xc4, DINAAA, DINAF1, DINADF, DINADF, 0xbb, 0xaf, DINADF, DINADF}; - regs[0] = (unsigned char)((fifoRate >> 8) & 0xff); - regs[1] = (unsigned char)(fifoRate & 0xff); + regs[0] = (u8)((fifoRate >> 8) & 0xff); + regs[1] = (u8)(fifoRate & 0xff); result = mem_w_key(KEY_D_0_22, ARRAY_SIZE(regs), regs); if (result) return result; @@ -950,28 +1275,29 @@ static int inv_set_fifo_div(struct inv_gyro_state_s *st, return result; } -int inv_send_quaternion(struct inv_gyro_state_s *st, int on) +int inv_send_quaternion(struct inv_mpu_iio_s *st, bool on) { - const unsigned char regs_on[] = {DINBC0, DINBC2, - DINBC4, DINBC6}; - const unsigned char regs_off[] = {DINA80, DINA80, - DINA80, DINA80}; - const unsigned char *regs; - unsigned char result; + const u8 regs_on[] = {DINBC0, DINBC2, + DINBC4, DINBC6}; + const u8 regs_off[] = {DINA80, DINA80, + DINA80, DINA80}; + const u8 *regs; + u8 result; if (on) regs = regs_on; else regs = regs_off; result = mem_w_key(KEY_CFG_LP_QUAT, ARRAY_SIZE(regs_on), regs); + return result; } -int inv_set_display_orient_interrupt_dmp(struct inv_gyro_state_s *st, - int on) +int inv_set_display_orient_interrupt_dmp(struct inv_mpu_iio_s *st, + bool on) { /*Turn on the display orientation interrupt in the DMP*/ int result; - unsigned char regs[1] = {0xd8}; + u8 regs[] = {0xd8}; if (on) regs[0] = 0xd9; @@ -979,16 +1305,16 @@ int inv_set_display_orient_interrupt_dmp(struct inv_gyro_state_s *st, return result; } -int inv_set_fifo_rate(struct inv_gyro_state_s *st, unsigned long fifo_rate) +int inv_set_fifo_rate(struct inv_mpu_iio_s *st, u16 fifo_rate) { - unsigned char divider; + u8 divider; int result; - divider = (unsigned char)(ONE_K_HZ/fifo_rate) - 1; + divider = (u8)(ONE_K_HZ / fifo_rate) - 1; if (divider > DMP_MAX_DIVIDER) { st->sample_divider = DMP_MAX_DIVIDER; st->fifo_divider = - (unsigned char)(DMP_DEFAULT_FIFO_RATE/fifo_rate)-1; + (u8)(DMP_DEFAULT_FIFO_RATE / fifo_rate) - 1; } else { st->sample_divider = divider; st->fifo_divider = 0; @@ -998,11 +1324,11 @@ int inv_set_fifo_rate(struct inv_gyro_state_s *st, unsigned long fifo_rate) return result; } -static int inv_set_tap_interrupt_dmp(struct inv_gyro_state_s *st, - unsigned char on) +static int inv_set_tap_interrupt_dmp(struct inv_mpu_iio_s *st, + u8 on) { int result; - unsigned char regs[] = {0}; + u8 regs[] = {0}; if (on) regs[0] = 0xf8; @@ -1013,40 +1339,23 @@ static int inv_set_tap_interrupt_dmp(struct inv_gyro_state_s *st, return result; return result; } -static int inv_set_orientation_interrupt_dmp(struct inv_gyro_state_s *st, - unsigned char on) -{ - int result; - unsigned char regs[2]; - if (on) { - regs[0] = DINBF8; - regs[1] = DINBF8; - } else { - regs[0] = DINAD8; - regs[1] = DINAD8; - } - result = mem_w_key(KEY_CFG_ORIENT_IRQ_1, ARRAY_SIZE(regs), regs); - if (result) - return result; - return result; -} -int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, - unsigned int axis, unsigned short threshold) +int inv_set_tap_threshold_dmp(struct inv_mpu_iio_s *st, + u32 axis, u16 threshold) { /* Sets the tap threshold in the dmp Simultaneously sets secondary tap threshold to help correct the tap direction for soft taps */ int result; /* DMP Algorithm */ - unsigned char data[2]; + u8 data[2]; int sampleDivider; int scaledThreshold; - unsigned int dmpThreshold; - unsigned char sample_div; -#define accel_sens (0x20000000/0x00010000) + u32 dmpThreshold; + u8 sample_div; + const u32 accel_sens = (0x20000000 / 0x00010000); - if ((axis & ~(INV_TAP_AXIS_ALL)) || (threshold > (1<<15))) + if ((axis & ~(INV_TAP_AXIS_ALL)) || (threshold > (1 << 15))) return -EINVAL; sample_div = st->sample_divider; @@ -1067,10 +1376,10 @@ int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, /* Scale to DMP 16 bit value */ if (accel_sens != 0) - dmpThreshold = (unsigned int)(scaledThreshold*accel_sens); + dmpThreshold = (u32)(scaledThreshold * accel_sens); else return -EINVAL; - dmpThreshold = dmpThreshold/DMP_PRECISION; + dmpThreshold = dmpThreshold / DMP_PRECISION; data[0] = dmpThreshold >> 8; data[1] = dmpThreshold & 0xFF; @@ -1083,8 +1392,8 @@ int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, /*Also set additional threshold for correcting the direction of taps that were very near the threshold. */ - data[0] = (dmpThreshold*3/4) >> 8; - data[1] = (dmpThreshold*3/4) & 0xFF; + data[0] = (dmpThreshold * 3 / 4) >> 8; + data[1] = (dmpThreshold * 3 / 4) & 0xFF; result = mem_w_key(KEY_D_1_36, ARRAY_SIZE(data), data); if (result) return result; @@ -1093,8 +1402,8 @@ int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, result = mem_w_key(KEY_DMP_TAP_THR_Y, 2, data); if (result) return result; - data[0] = (dmpThreshold*3/4) >> 8; - data[1] = (dmpThreshold*3/4) & 0xFF; + data[0] = (dmpThreshold * 3 / 4) >> 8; + data[1] = (dmpThreshold * 3 / 4) & 0xFF; result = mem_w_key(KEY_D_1_40, ARRAY_SIZE(data), data); if (result) @@ -1104,8 +1413,8 @@ int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, result = mem_w_key(KEY_DMP_TAP_THR_Z, ARRAY_SIZE(data), data); if (result) return result; - data[0] = (dmpThreshold*3/4) >> 8; - data[1] = (dmpThreshold*3/4) & 0xFF; + data[0] = (dmpThreshold * 3 / 4) >> 8; + data[1] = (dmpThreshold * 3 / 4) & 0xFF; result = mem_w_key(KEY_D_1_44, ARRAY_SIZE(data), data); if (result) @@ -1114,47 +1423,48 @@ int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, return 0; } -static int inv_set_tap_axes_dmp(struct inv_gyro_state_s *st, - unsigned int axes) +static int inv_set_tap_axes_dmp(struct inv_mpu_iio_s *st, + u32 axes) { /* Sets a mask in the DMP that indicates what tap events should result in an interrupt */ - unsigned char regs[4]; - unsigned char result; + u8 regs[4]; + u8 result; /* check if any spurious bit other the ones expected are set */ if (axes & (~(INV_TAP_ALL_DIRECTIONS))) return -EINVAL; - regs[0] = (unsigned char)axes; + regs[0] = (u8)axes; result = mem_w_key(KEY_D_1_72, 1, regs); return result; } -int inv_set_min_taps_dmp(struct inv_gyro_state_s *st, - unsigned int min_taps) { +int inv_set_min_taps_dmp(struct inv_mpu_iio_s *st, + u16 min_taps) { /*Indicates the minimum number of consecutive taps required before the DMP will generate an interrupt */ - unsigned char regs[1]; - unsigned char result; + u8 regs[1]; + u8 result; /* check if any spurious bit other the ones expected are set */ if ((min_taps > DMP_MAX_MIN_TAPS) || (min_taps < 1)) return -EINVAL; - regs[0] = (unsigned char)(min_taps-1); + regs[0] = (u8)(min_taps-1); result = mem_w_key(KEY_D_1_79, ARRAY_SIZE(regs), regs); return result; } -int inv_set_tap_time_dmp(struct inv_gyro_state_s *st, unsigned int time) + +int inv_set_tap_time_dmp(struct inv_mpu_iio_s *st, u16 time) { /* Determines how long after a tap the DMP requires before another tap can be registered*/ int result; /* DMP Algorithm */ - unsigned short dmpTime; - unsigned char data[2]; - unsigned char sampleDivider; + u16 dmpTime; + u8 data[2]; + u8 sampleDivider; sampleDivider = st->sample_divider; sampleDivider++; @@ -1165,19 +1475,20 @@ int inv_set_tap_time_dmp(struct inv_gyro_state_s *st, unsigned int time) data[1] = dmpTime & 0xFF; result = mem_w_key(KEY_DMP_TAPW_MIN, ARRAY_SIZE(data), data); + return result; } -static int inv_set_multiple_tap_time_dmp(struct inv_gyro_state_s *st, - unsigned int time) +static int inv_set_multiple_tap_time_dmp(struct inv_mpu_iio_s *st, + u32 time) { /*Determines how close together consecutive taps must occur to be considered double/triple taps*/ int result; /* DMP Algorithm */ - unsigned short dmpTime; - unsigned char data[2]; - unsigned char sampleDivider; + u16 dmpTime; + u8 data[2]; + u8 sampleDivider; sampleDivider = st->sample_divider; sampleDivider++; @@ -1186,21 +1497,25 @@ static int inv_set_multiple_tap_time_dmp(struct inv_gyro_state_s *st, dmpTime = ((time) / sampleDivider); data[0] = dmpTime >> 8; data[1] = dmpTime & 0xFF; - result = mem_w_key(KEY_D_1_218, ARRAY_SIZE(data), data); + return result; } -long inv_q30_mult(long a, long b) + +int inv_q30_mult(int a, int b) { - long long temp; - long result; - temp = (long long)a * b; - result = (long)(temp >> DMP_MULTI_SHIFT); + u64 temp; + int result; + + temp = (u64)a * b; + result = (int)(temp >> DMP_MULTI_SHIFT); + return result; } -static unsigned short inv_row_2_scale(const signed char *row) + +static u16 inv_row_2_scale(const s8 *row) { - unsigned short b; + u16 b; if (row[0] > 0) b = 0; @@ -1216,7 +1531,7 @@ static unsigned short inv_row_2_scale(const signed char *row) b = 6; else b = 7; - /* error */ + return b; } @@ -1234,10 +1549,10 @@ static unsigned short inv_row_2_scale(const signed char *row) * bit number 8 being the sign. In binary the identity matrix would therefor * be: 010_001_000 or 0x88 in hex. */ -static unsigned short inv_orientation_matrix_to_scaler(const signed char *mtx) +static u16 inv_orientation_matrix_to_scaler(const signed char *mtx) { - unsigned short scalar; + u16 scalar; scalar = inv_row_2_scale(mtx); scalar |= inv_row_2_scale(mtx + 3) << 3; scalar |= inv_row_2_scale(mtx + 6) << 6; @@ -1245,15 +1560,27 @@ static unsigned short inv_orientation_matrix_to_scaler(const signed char *mtx) return scalar; } -static int inv_gyro_dmp_cal(struct inv_gyro_state_s *st) +#if 0 +static int inv_disable_gyro_cal(struct inv_mpu_iio_s *st) +{ + const u8 regs[] = { + 0xb8, 0xaa, 0xaa, 0xaa, + 0xb0, 0x88, 0xc3, 0xc5, + 0xc7 + }; + return mem_w_key(KEY_CFG_MOTION_BIAS, ARRAY_SIZE(regs), regs); +} +#endif + +static int inv_gyro_dmp_cal(struct inv_mpu_iio_s *st) { int inv_gyro_orient; - unsigned char regs[3]; + u8 regs[3]; int result; - unsigned char tmpD = DINA4C; - unsigned char tmpE = DINACD; - unsigned char tmpF = DINA6C; + u8 tmpD = DINA4C; + u8 tmpE = DINACD; + u8 tmpF = DINA6C; inv_gyro_orient = inv_orientation_matrix_to_scaler(st->plat_data.orientation); @@ -1277,7 +1604,7 @@ static int inv_gyro_dmp_cal(struct inv_gyro_state_s *st) else if ((inv_gyro_orient & 0xc0) == 0x80) regs[2] = tmpF; - result = mem_w_key(KEY_FCFG_1, 3, regs); + result = mem_w_key(KEY_FCFG_1, ARRAY_SIZE(regs), regs); if (result) return result; @@ -1295,22 +1622,23 @@ static int inv_gyro_dmp_cal(struct inv_gyro_state_s *st) regs[2] = DINA76; result = mem_w_key(KEY_FCFG_3, ARRAY_SIZE(regs), regs); + return result; } -static int inv_accel_dmp_cal(struct inv_gyro_state_s *st) +static int inv_accel_dmp_cal(struct inv_mpu_iio_s *st) { int inv_accel_orient; int result; - unsigned char regs[3]; - const unsigned char tmp[3] = { DINA0C, DINAC9, DINA2C }; + u8 regs[3]; + const u8 tmp[3] = { DINA0C, DINAC9, DINA2C }; inv_accel_orient = inv_orientation_matrix_to_scaler(st->plat_data.orientation); regs[0] = tmp[inv_accel_orient & 3]; regs[1] = tmp[(inv_accel_orient >> 3) & 3]; regs[2] = tmp[(inv_accel_orient >> 6) & 3]; - result = mem_w_key(KEY_FCFG_2, 3, regs); + result = mem_w_key(KEY_FCFG_2, ARRAY_SIZE(regs), regs); if (result) return result; @@ -1324,62 +1652,110 @@ static int inv_accel_dmp_cal(struct inv_gyro_state_s *st) if (inv_accel_orient & 0x100) regs[2] |= 1; result = mem_w_key(KEY_FCFG_7, ARRAY_SIZE(regs), regs); + return result; } -#define gyro_sens (0x03e80000) -static int inv_set_gyro_sf_dmp(struct inv_gyro_state_s *st) +static u16 inv_orientation_matrix_to_scalar(const s8 *mtx) +{ + + u16 scalar; + + /* + XYZ 010_001_000 Identity Matrix + XZY 001_010_000 + YXZ 010_000_001 + YZX 000_010_001 + ZXY 001_000_010 + ZYX 000_001_010 + */ + + scalar = inv_row_2_scale(mtx); + scalar |= inv_row_2_scale(mtx + 3) << 3; + scalar |= inv_row_2_scale(mtx + 6) << 6; + + return scalar; +} + +int inv_set_accel_bias_dmp(struct inv_mpu_iio_s *st) +{ + int inv_accel_orient, result, i, accel_bias_body[3], out[3]; + int tmp[] = {1, 1, 1}; + int mask[] = {4, 0x20, 0x100}; + int accel_sf = 0x20000000;/* 536870912 */ + u8 *regs; + + inv_accel_orient = + inv_orientation_matrix_to_scalar(st->plat_data.orientation); + + for (i = 0; i < 3; i++) + if (inv_accel_orient & mask[i]) + tmp[i] = -1; + + for (i = 0; i < 3; i++) + accel_bias_body[i] = st->input_accel_bias[(inv_accel_orient >> + (i * 3)) & 3] * tmp[i]; + for (i = 0; i < 3; i++) + accel_bias_body[i] = inv_q30_mult(accel_sf, + accel_bias_body[i]); + for (i = 0; i < 3; i++) + out[i] = cpu_to_be32p(&accel_bias_body[i]); + regs = (u8 *)out; + result = mem_w_key(KEY_D_ACCEL_BIAS, sizeof(out), regs); + + return result; +} + +static int inv_set_gyro_sf_dmp(struct inv_mpu_iio_s *st) { /*The gyro threshold, in dps, above which taps will be rejected*/ - int result, out; + int result; /* DMP Algorithm */ - unsigned char sampleDivider; - unsigned char *regs; - int gyro_sf; + u8 sampleDivider; + u32 gyro_sf; + const u32 gyro_sens = 0x03e80000; sampleDivider = st->sample_divider; gyro_sf = inv_q30_mult(gyro_sens, - (int)(DMP_TAP_SCALE * (sampleDivider+1))); + (int)(DMP_TAP_SCALE * (sampleDivider + 1))); + result = write_be32_key_to_mem(st, gyro_sf, KEY_D_0_104); - out = cpu_to_be32p(&gyro_sf); - regs = (unsigned char *)&out; - result = mem_w_key(KEY_D_0_104, sizeof(out), regs); return result; } -static int inv_set_shake_reject_thresh_dmp(struct inv_gyro_state_s *st, + +static int inv_set_shake_reject_thresh_dmp(struct inv_mpu_iio_s *st, int thresh) { /*THIS FUNCTION FAILS MEM_W*/ /*The gyro threshold, in dps, above which taps will be rejected */ - int result, out; + int result; /* DMP Algorithm */ - unsigned char sampleDivider; + u8 sampleDivider; int thresh_scaled; - unsigned char *regs; - long gyro_sf; + u32 gyro_sf; + const u32 gyro_sens = 0x03e80000; sampleDivider = st->sample_divider; gyro_sf = inv_q30_mult(gyro_sens, (int)(DMP_TAP_SCALE * - (sampleDivider+1))); + (sampleDivider + 1))); /* We're in units of DPS, convert it back to chip units*/ /*split the operation to aviod overflow of integer*/ - thresh_scaled = gyro_sens/(1L<<16); - thresh_scaled = thresh_scaled/thresh; + thresh_scaled = gyro_sens / (1L << 16); + thresh_scaled = thresh_scaled / thresh; thresh_scaled = gyro_sf / thresh_scaled; - out = cpu_to_be32p(&thresh_scaled); - regs = (unsigned char *)&out; + result = write_be32_key_to_mem(st, thresh_scaled, KEY_D_1_92); - result = mem_w_key(KEY_D_1_92, sizeof(out), regs); return result; } -static int inv_set_shake_reject_time_dmp(struct inv_gyro_state_s *st, - unsigned int time) + +static int inv_set_shake_reject_time_dmp(struct inv_mpu_iio_s *st, + u32 time) { /* How long a gyro axis must remain above its threshold before taps are rejected */ int result; /* DMP Algorithm */ - unsigned short dmpTime; - unsigned char data[2]; - unsigned char sampleDivider; + u16 dmpTime; + u8 data[2]; + u8 sampleDivider; sampleDivider = st->sample_divider; sampleDivider++; @@ -1393,16 +1769,16 @@ static int inv_set_shake_reject_time_dmp(struct inv_gyro_state_s *st, return result; } -static int inv_set_shake_reject_timeout_dmp(struct inv_gyro_state_s *st, - unsigned int time) +static int inv_set_shake_reject_timeout_dmp(struct inv_mpu_iio_s *st, + u32 time) { /*How long the gyros must remain below their threshold, after taps have been rejected, before taps can be detected again*/ int result; /* DMP Algorithm */ - unsigned short dmpTime; - unsigned char data[2]; - unsigned char sampleDivider; + u16 dmpTime; + u8 data[2]; + u8 sampleDivider; sampleDivider = st->sample_divider; sampleDivider++; @@ -1416,45 +1792,49 @@ static int inv_set_shake_reject_timeout_dmp(struct inv_gyro_state_s *st, return result; } -static int inv_set_interrupt_on_gesture_event(struct inv_gyro_state_s *st, - char on) +int inv_set_interrupt_on_gesture_event(struct inv_mpu_iio_s *st, bool on) { - unsigned char result; - const unsigned char regs_on[] = {DINADA, DINADA, DINAB1, DINAB9, - DINAF3, DINA8B, DINAA3, DINA91, - DINAB6, DINADA, DINAB4, DINADA}; - const unsigned char regs_off[] = {0xd8, 0xd8, 0xb1, 0xb9, 0xf3, 0x8b, - 0xa3, 0x91, 0xb6, 0x09, 0xb4, 0xd9}; + u8 result; + const u8 regs_on[] = {DINADA, DINAB1, DINAB9, + DINAF3, DINA8B, DINAA3, DINA91, + DINAB6, DINADA, DINAB4, DINADA}; + const u8 regs_off[] = {0xd8, 0xb1, 0xb9, 0xf3, 0x8b, + 0xa3, 0x91, 0xb6, 0x09, 0xb4, 0xd9}; /*For some reason DINAC4 is defined as 0xb8, but DINBC4 is not defined.*/ - const unsigned char regs_end[] = {DINAFE, DINAF2, DINAAB, 0xc4, - DINAAA, DINAF1, DINADF, DINADF}; - if (on) { + const u8 regs_end[] = {DINAFE, DINAF2, DINAAB, 0xc4, + DINAAA, DINAF1, DINADF, DINADF, + 0xbb, 0xaf, DINADF, DINADF}; + const u8 regs[] = {0, 0}; + /* reset fifo count to zero */ + result = mem_w_key(KEY_D_1_178, ARRAY_SIZE(regs), regs); + if (result) + return result; + + if (on) /*Sets the DMP to send an interrupt and put a FIFO packet in the FIFO if and only if a tap/orientation event just occurred*/ result = mem_w_key(KEY_CFG_FIFO_ON_EVENT, ARRAY_SIZE(regs_on), regs_on); - if (result) - return result; - } else { + else /*Sets the DMP to send an interrupt and put a FIFO packet in the FIFO at the rate specified by the FIFO div. see inv_set_fifo_div in hw_setup.c to set the FIFO div.*/ result = mem_w_key(KEY_CFG_FIFO_ON_EVENT, ARRAY_SIZE(regs_off), regs_off); - if (result) - return result; - } + if (result) + return result; result = mem_w_key(KEY_CFG_6, ARRAY_SIZE(regs_end), regs_end); + return result; } /** * inv_enable_tap_dmp() - calling this function will enable/disable tap function. */ -int inv_enable_tap_dmp(struct inv_gyro_state_s *st, unsigned char on) +int inv_enable_tap_dmp(struct inv_mpu_iio_s *st, bool on) { int result; result = inv_set_tap_interrupt_dmp(st, on); @@ -1462,15 +1842,15 @@ int inv_enable_tap_dmp(struct inv_gyro_state_s *st, unsigned char on) return result; if (on) { result = inv_set_tap_threshold_dmp(st, INV_TAP_AXIS_X, - st->tap.thresh); + st->tap.thresh); if (result) return result; result = inv_set_tap_threshold_dmp(st, INV_TAP_AXIS_Y, - st->tap.thresh); + st->tap.thresh); if (result) return result; result = inv_set_tap_threshold_dmp(st, INV_TAP_AXIS_Z, - st->tap.thresh); + st->tap.thresh); if (result) return result; } @@ -1503,103 +1883,17 @@ int inv_enable_tap_dmp(struct inv_gyro_state_s *st, unsigned char on) return result; result = inv_set_shake_reject_timeout_dmp(st, - DMP_SHAKE_REJECT_TIMEOUT); - if (result) - return result; - - result = inv_set_interrupt_on_gesture_event(st, 0); - return result; -} -static int inv_set_orientation_dmp(struct inv_gyro_state_s *st, - int orientation) -{ - /*Set a mask in the DMP determining what orientations - will trigger interrupts*/ - unsigned char regs[4]; - unsigned char result; - - /* check if any spurious bit other the ones expected are set */ - if (orientation & (~(INV_ORIENTATION_ALL | INV_ORIENTATION_FLIP))) - return -EINVAL; - - regs[0] = (unsigned char)orientation; - result = mem_w_key(KEY_D_1_74, 1, regs); - return result; -} -static int inv_set_orientation_thresh_dmp(struct inv_gyro_state_s *st, - int angle) -{ - /*Set an angle threshold in the DMP determining - when orientations change*/ - unsigned char *regs; - unsigned char result; - unsigned int out; - unsigned int d; - const unsigned int threshold[] = {138952416, 268435455, 379625062, - 464943848, 518577479, 536870912}; - /*threshold = (long)((1<<29) * sin((angle * M_PI) / 180.));*/ - d = angle/DMP_ANGLE_SCALE; - d -= 1; - if (d >= ARRAY_SIZE(threshold)) - return -EPERM; - out = cpu_to_be32p(&threshold[d]); - regs = (unsigned char *)&out; - - result = mem_w_key(KEY_D_1_232, sizeof(out), regs); - return result; -} -static int inv_set_orientation_time_dmp(struct inv_gyro_state_s *st, - unsigned int time) -{ - /*Determines the stability time required before a - new orientation can be adopted */ - unsigned short dmpTime; - unsigned char data[2]; - unsigned char sampleDivider; - unsigned char result; - /* First check if we are allowed to call this function here */ - sampleDivider = st->sample_divider; - sampleDivider++; - /* 60 ms minimum time added */ - dmpTime = ((time) / sampleDivider); - data[0] = dmpTime >> 8; - data[1] = dmpTime & 0xFF; - result = mem_w_key(KEY_D_1_250, 2, data); - - return result; -} - -/** - * inv_enable_orientation_dmp() - calling this function will - * enable/disable orientation function. - */ -int inv_enable_orientation_dmp(struct inv_gyro_state_s *st, int on) -{ - int result; - result = inv_set_orientation_interrupt_dmp(st, on); - if (result) - return result; - result = inv_set_orientation_dmp(st, 0x40 | INV_ORIENTATION_ALL); - if (result) - return result; - result = inv_set_gyro_sf_dmp(st); - if (result) - return result; - result = inv_set_orientation_thresh_dmp(st, DMP_ORIENTATION_ANGLE); - if (result) - return result; - result = inv_set_orientation_time_dmp(st, DMP_ORIENTATION_TIME); + DMP_SHAKE_REJECT_TIMEOUT); return result; } -static int inv_send_sensor_data(struct inv_gyro_state_s *st, - unsigned short elements) +int inv_send_sensor_data(struct inv_mpu_iio_s *st, u16 elements) { int result; - unsigned char regs[] = { DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, + u8 regs[] = {DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, - DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, DINAA0 + 3 - }; + DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, + DINAA0 + 3}; if (elements & INV_ELEMENT_1) regs[0] = DINACA; @@ -1609,8 +1903,8 @@ static int inv_send_sensor_data(struct inv_gyro_state_s *st, regs[5] = DINACC; if (elements & INV_ELEMENT_4) regs[6] = DINBC6; - if ((elements & INV_ELEMENT_5) || (elements & INV_ELEMENT_6) - || (elements & INV_ELEMENT_7)) { + if ((elements & INV_ELEMENT_5) || (elements & INV_ELEMENT_6) || + (elements & INV_ELEMENT_7)) { regs[1] = DINBC0; regs[2] = DINAC8; regs[3] = DINBC2; @@ -1618,12 +1912,18 @@ static int inv_send_sensor_data(struct inv_gyro_state_s *st, result = mem_w_key(KEY_CFG_15, ARRAY_SIZE(regs), regs); return result; } -static int inv_send_interrupt_word(struct inv_gyro_state_s *st) + +int inv_send_interrupt_word(struct inv_mpu_iio_s *st, bool on) { - const unsigned char regs[] = { DINA20 }; - unsigned char result; + const u8 regs_on[] = { DINA20 }; + const u8 regs_off[] = { DINAA3 }; + u8 result; + + if (on) + result = mem_w_key(KEY_CFG_27, ARRAY_SIZE(regs_on), regs_on); + else + result = mem_w_key(KEY_CFG_27, ARRAY_SIZE(regs_off), regs_off); - result = mem_w_key(KEY_CFG_27, ARRAY_SIZE(regs), regs); return result; } @@ -1635,25 +1935,44 @@ ssize_t inv_dmp_firmware_write(struct file *fp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t pos, size_t size) { - unsigned char *firmware; + u8 *firmware; int result; struct inv_reg_map_s *reg; struct iio_dev *indio_dev; - struct inv_gyro_state_s *st; + struct inv_mpu_iio_s *st; indio_dev = dev_get_drvdata(container_of(kobj, struct device, kobj)); st = iio_priv(indio_dev); - if (st->chip_config.is_asleep) - return -EPERM; - if (1 == st->chip_config.firmware_loaded) + if (st->chip_config.firmware_loaded) return -EINVAL; + if (st->chip_config.enable) + return -EBUSY; + reg = &st->reg; + if (DMP_IMAGE_SIZE != size) { + pr_err("wrong DMP image size\n"); + return -EINVAL; + } firmware = kmalloc(size, GFP_KERNEL); if (!firmware) return -ENOMEM; + + mutex_lock(&indio_dev->mlock); + memcpy(firmware, buf, size); + result = crc32(CRC_FIRMWARE_SEED, firmware, size); + if (DMP_IMAGE_CRC_VALUE != result) { + pr_err("firmware CRC error - 0x%08x vs 0x%08x\n", + result, DMP_IMAGE_CRC_VALUE); + result = -EINVAL; + goto firmware_write_fail; + } + + result = st->set_power_state(st, true); + if (result) + goto firmware_write_fail; result = inv_load_firmware(st, firmware, size); if (result) @@ -1672,16 +1991,7 @@ ssize_t inv_dmp_firmware_write(struct file *fp, struct kobject *kobj, if (result) goto firmware_write_fail; - result = inv_verify_firmware(st, firmware, size); - if (result) - goto firmware_write_fail; result = inv_set_fifo_rate(st, DMP_DEFAULT_FIFO_RATE); - if (result) - goto firmware_write_fail; - result = inv_send_sensor_data(st, INV_GYRO_ACC_MASK); - if (result) - goto firmware_write_fail; - result = inv_send_interrupt_word(st); if (result) goto firmware_write_fail; result = inv_gyro_dmp_cal(st); @@ -1690,28 +2000,44 @@ ssize_t inv_dmp_firmware_write(struct file *fp, struct kobject *kobj, result = inv_accel_dmp_cal(st); if (result) goto firmware_write_fail; + /* result = inv_disable_gyro_cal(st); */ if (result) goto firmware_write_fail; + st->chip_config.firmware_loaded = 1; - result = size; + firmware_write_fail: + result |= st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); kfree(firmware); - return result; + if (result) + return result; + return size; } + ssize_t inv_dmp_firmware_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { int bank, write_size, size, data, result; - unsigned short memaddr; + u16 memaddr; struct iio_dev *indio_dev; - struct inv_gyro_state_s *st; - size = count; + struct inv_mpu_iio_s *st; + size = count; indio_dev = dev_get_drvdata(container_of(kobj, struct device, kobj)); st = iio_priv(indio_dev); + data = 0; + mutex_lock(&indio_dev->mlock); + if (!st->chip_config.enable) { + result = st->set_power_state(st, true); + if (result) { + mutex_unlock(&indio_dev->mlock); + return result; + } + } for (bank = 0; size > 0; bank++, size -= write_size, data += write_size) { if (size > MPU_MEM_BANK_SIZE) @@ -1719,15 +2045,22 @@ ssize_t inv_dmp_firmware_read(struct file *filp, else write_size = size; - memaddr = ((bank << 8) | 0x00); - result = mpu_memory_read(st->sl_handle, + memaddr = (bank << 8); + result = mpu_memory_read(st, st->i2c_addr, memaddr, write_size, &buf[data]); - if (result) + if (result) { + mutex_unlock(&indio_dev->mlock); return result; + } } + if (!st->chip_config.enable) + result = st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); + if (result) + return result; + return count; } /** * @} */ - diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_ring.c b/drivers/staging/iio/imu/mpu/inv_mpu_ring.c index 880b0b8c790..fcebf833569 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_ring.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_ring.c @@ -17,11 +17,13 @@ * @brief Hardware drivers. * * @{ - * @file inv_gyro_misc.c + * @file inv_mpu_ring.c * @brief A sysfs device driver for Invensense gyroscopes. - * @details This file is part of inv_gyro driver code + * @details This file is part of inv mpu iio driver code */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -35,12 +37,14 @@ #include #include #include -#include "inv_mpu_iio.h" + #include "../../iio.h" #include "../../kfifo_buf.h" #include "../../trigger_consumer.h" #include "../../sysfs.h" +#include "inv_mpu_iio.h" + /** * reset_fifo_mpu3050() - Reset FIFO related registers * @st: Device driver instance. @@ -49,27 +53,13 @@ static int reset_fifo_mpu3050(struct iio_dev *indio_dev) { struct inv_reg_map_s *reg; int result; - unsigned char val, user_ctrl; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - struct iio_buffer *ring = indio_dev->buffer; - + u8 val, user_ctrl; + struct inv_mpu_iio_s *st = iio_priv(indio_dev); reg = &st->reg; - if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_X) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_Y) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_Z)) - st->chip_config.gyro_fifo_enable = 1; - else - st->chip_config.gyro_fifo_enable = 0; - - if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_X) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_Y) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_Z)) - st->chip_config.accl_fifo_enable = 1; - else - st->chip_config.accl_fifo_enable = 0; /* disable interrupt */ - result = inv_i2c_single_write(st, reg->int_enable, 0); + result = inv_i2c_single_write(st, reg->int_enable, + st->plat_data.int_config); if (result) return result; /* disable the sensor output to FIFO */ @@ -87,11 +77,11 @@ static int reset_fifo_mpu3050(struct iio_dev *indio_dev) result = inv_i2c_single_write(st, reg->user_ctrl, val); if (result) goto reset_fifo_fail; - st->last_isr_time = iio_get_time_ns(); + st->last_isr_time = get_time_ns(); if (st->chip_config.dmp_on) { /* enable interrupt when DMP is done */ result = inv_i2c_single_write(st, reg->int_enable, - BIT_DMP_INT_EN); + st->plat_data.int_config | BIT_DMP_INT_EN); if (result) return result; @@ -102,9 +92,9 @@ static int reset_fifo_mpu3050(struct iio_dev *indio_dev) } else { /* enable interrupt */ if (st->chip_config.accl_fifo_enable || - st->chip_config.gyro_fifo_enable){ + st->chip_config.gyro_fifo_enable) { result = inv_i2c_single_write(st, reg->int_enable, - BIT_DATA_RDY_EN); + st->plat_data.int_config | BIT_DATA_RDY_EN); if (result) return result; } @@ -130,48 +120,129 @@ static int reset_fifo_mpu3050(struct iio_dev *indio_dev) val = BIT_DMP_INT_EN; else val = BIT_DATA_RDY_EN; - inv_i2c_single_write(st, reg->int_enable, val); - pr_err("%s failed\n", __func__); + inv_i2c_single_write(st, reg->int_enable, + st->plat_data.int_config | val); + pr_err("reset fifo failed\n"); + return result; } + /** - * reset_fifo_itg() - Reset FIFO related registers. - * @st: Device driver instance. + * inv_set_lpf() - set low pass filer based on fifo rate. */ -static int reset_fifo_itg(struct iio_dev *indio_dev) +static int inv_set_lpf(struct inv_mpu_iio_s *st, int rate) { + const short hz[] = {188, 98, 42, 20, 10, 5}; + const int d[] = {INV_FILTER_188HZ, INV_FILTER_98HZ, + INV_FILTER_42HZ, INV_FILTER_20HZ, + INV_FILTER_10HZ, INV_FILTER_5HZ}; + int i, h, data, result; struct inv_reg_map_s *reg; + reg = &st->reg; + h = (rate >> 1); + i = 0; + while ((h < hz[i]) && (i < ARRAY_SIZE(d) - 1)) + i++; + data = d[i]; + if (INV_MPU3050 == st->chip_type) { + if (st->mpu_slave != NULL) { + result = st->mpu_slave->set_lpf(st, rate); + if (result) + return result; + } + result = inv_i2c_single_write(st, reg->lpf, data | + (st->chip_config.fsr << GYRO_CONFIG_FSR_SHIFT)); + } else { + result = inv_i2c_single_write(st, reg->lpf, data); + } + if (result) + return result; + st->chip_config.lpf = data; + + return 0; +} + +/** + * set_fifo_rate_reg() - Set fifo rate in hardware register + */ +static int set_fifo_rate_reg(struct inv_mpu_iio_s *st) +{ + u8 data; + u16 fifo_rate; int result; - unsigned char val; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - struct iio_buffer *ring = indio_dev->buffer; + struct inv_reg_map_s *reg; reg = &st->reg; - if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_X) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_Y) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_Z)) - st->chip_config.gyro_fifo_enable = 1; - else - st->chip_config.gyro_fifo_enable = 0; + fifo_rate = st->chip_config.new_fifo_rate; + data = ONE_K_HZ / fifo_rate - 1; + result = inv_i2c_single_write(st, reg->sample_rate_div, data); + if (result) + return result; + result = inv_set_lpf(st, fifo_rate); + if (result) + return result; + st->chip_config.fifo_rate = fifo_rate; - if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_X) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_Y) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_Z)) - st->chip_config.accl_fifo_enable = 1; - else - st->chip_config.accl_fifo_enable = 0; + return 0; +} - if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_MAGN_X) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_MAGN_Y) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_MAGN_Z)) - st->chip_config.compass_fifo_enable = 1; +/** + * inv_lpa_mode() - store current low power mode settings + */ +static int inv_lpa_mode(struct inv_mpu_iio_s *st, int lpa_mode) +{ + unsigned long result; + u8 d; + struct inv_reg_map_s *reg; + + reg = &st->reg; + result = inv_i2c_read(st, reg->pwr_mgmt_1, 1, &d); + if (result) + return result; + if (lpa_mode) + d |= BIT_CYCLE; else - st->chip_config.compass_fifo_enable = 0; + d &= ~BIT_CYCLE; + + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, d); + if (result) + return result; + if (INV_MPU6500 == st->chip_type) { + if (lpa_mode) + d = BIT_ACCEL_FCHOCIE_B; + else + d = 0; + result = inv_i2c_single_write(st, REG_6500_ACCEL_CONFIG2, d); + if (result) + return result; + } + + return 0; +} + +/** + * reset_fifo_itg() - Reset FIFO related registers. + * @st: Device driver instance. + */ +static int reset_fifo_itg(struct iio_dev *indio_dev) +{ + struct inv_reg_map_s *reg; + int result, data; + u8 val, int_word; + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + reg = &st->reg; + if (st->chip_config.lpa_mode) { + result = inv_lpa_mode(st, 0); + if (result) { + pr_err("reset lpa mode failed\n"); + return result; + } + } /* disable interrupt */ result = inv_i2c_single_write(st, reg->int_enable, 0); if (result) { - pr_err("%s failed\n", __func__); + pr_err("int_enable write failed\n"); return result; } /* disable the sensor output to FIFO */ @@ -182,41 +253,75 @@ static int reset_fifo_itg(struct iio_dev *indio_dev) result = inv_i2c_single_write(st, reg->user_ctrl, 0); if (result) goto reset_fifo_fail; + int_word = 0; + + /* MPU6500's BIT_6500_WOM_EN is the same as BIT_MOT_EN */ + if (st->mot_int.mot_on) + int_word |= BIT_MOT_EN; if (st->chip_config.dmp_on) { val = (BIT_FIFO_RST | BIT_DMP_RST); result = inv_i2c_single_write(st, reg->user_ctrl, val); if (result) goto reset_fifo_fail; - st->last_isr_time = iio_get_time_ns(); + st->last_isr_time = get_time_ns(); if (st->chip_config.dmp_int_on) { + int_word |= BIT_DMP_INT_EN; result = inv_i2c_single_write(st, reg->int_enable, - BIT_DMP_INT_EN); + int_word); if (result) return result; } val = (BIT_DMP_EN | BIT_FIFO_EN); - if (st->chip_config.compass_enable) + if (st->chip_config.compass_enable & + (!st->chip_config.dmp_event_int_on)) val |= BIT_I2C_MST_EN; result = inv_i2c_single_write(st, reg->user_ctrl, val); if (result) goto reset_fifo_fail; + + if (st->chip_config.compass_enable) { + /* I2C_MST_DLY is set according to sample rate, + slow down the power*/ + data = max(COMPASS_RATE_SCALE * + st->chip_config.new_fifo_rate / ONE_K_HZ, + st->chip_config.new_fifo_rate / + st->chip_config.dmp_output_rate); + if (data > 0) + data -= 1; + result = inv_i2c_single_write(st, REG_I2C_SLV4_CTRL, + data); + if (result) + return result; + } + val = 0; + if (st->chip_config.accl_fifo_enable) + val |= INV_ACCL_MASK; + if (st->chip_config.gyro_fifo_enable) + val |= INV_GYRO_MASK; + result = inv_send_sensor_data(st, val); + if (result) + return result; + if (st->chip_config.display_orient_on || st->chip_config.tap_on) + result = inv_send_interrupt_word(st, true); + else + result = inv_send_interrupt_word(st, false); } else { /* reset FIFO and possibly reset I2C*/ val = BIT_FIFO_RST; result = inv_i2c_single_write(st, reg->user_ctrl, val); if (result) goto reset_fifo_fail; - st->last_isr_time = iio_get_time_ns(); + st->last_isr_time = get_time_ns(); /* enable interrupt */ if (st->chip_config.accl_fifo_enable || - st->chip_config.gyro_fifo_enable || - st->chip_config.compass_enable){ - result = inv_i2c_single_write(st, reg->int_enable, - BIT_DATA_RDY_EN); - if (result) - return result; + st->chip_config.gyro_fifo_enable || + st->chip_config.compass_enable) { + int_word |= BIT_DATA_RDY_EN; } + result = inv_i2c_single_write(st, reg->int_enable, int_word); + if (result) + return result; /* enable FIFO reading and I2C master interface*/ val = BIT_FIFO_EN; if (st->chip_config.compass_enable) @@ -224,6 +329,18 @@ static int reset_fifo_itg(struct iio_dev *indio_dev) result = inv_i2c_single_write(st, reg->user_ctrl, val); if (result) goto reset_fifo_fail; + if (st->chip_config.compass_enable) { + /* I2C_MST_DLY is set according to sample rate, + slow down the power*/ + data = COMPASS_RATE_SCALE * + st->chip_config.new_fifo_rate / ONE_K_HZ; + if (data > 0) + data -= 1; + result = inv_i2c_single_write(st, REG_I2C_SLV4_CTRL, + data); + if (result) + return result; + } /* enable sensor output to FIFO */ val = 0; if (st->chip_config.gyro_fifo_enable) @@ -234,76 +351,143 @@ static int reset_fifo_itg(struct iio_dev *indio_dev) if (result) goto reset_fifo_fail; } + st->chip_config.normal_compass_measure = 0; + result = inv_lpa_mode(st, st->chip_config.lpa_mode); + if (result) + goto reset_fifo_fail; + return 0; + reset_fifo_fail: if (st->chip_config.dmp_on) val = BIT_DMP_INT_EN; else val = BIT_DATA_RDY_EN; inv_i2c_single_write(st, reg->int_enable, val); - pr_err("%s failed\n", __func__); + pr_err("reset fifo failed\n"); + return result; } + +/** + * inv_clear_kfifo() - clear time stamp fifo + * @st: Device driver instance. + */ +static void inv_clear_kfifo(struct inv_mpu_iio_s *st) +{ + unsigned long flags; + + spin_lock_irqsave(&st->time_stamp_lock, flags); + kfifo_reset(&st->timestamps); + spin_unlock_irqrestore(&st->time_stamp_lock, flags); +} + /** * inv_reset_fifo() - Reset FIFO related registers. * @st: Device driver instance. */ static int inv_reset_fifo(struct iio_dev *indio_dev) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + + inv_clear_kfifo(st); if (INV_MPU3050 == st->chip_type) return reset_fifo_mpu3050(indio_dev); else return reset_fifo_itg(indio_dev); } + +static int inv_set_dmp_sysfs(struct inv_mpu_iio_s *st) +{ + int result; + + result = inv_set_fifo_rate(st, st->chip_config.dmp_output_rate); + if (result) + return result; + result = inv_set_interrupt_on_gesture_event(st, + st->chip_config.dmp_event_int_on); + + return result; +} + /** * set_inv_enable() - Reset FIFO related registers. * This also powers on the chip if needed. * @st: Device driver instance. * @fifo_enable: enable/disable */ -int set_inv_enable(struct iio_dev *indio_dev, - unsigned long enable) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); +static int set_inv_enable(struct iio_dev *indio_dev, + bool enable) { + struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct inv_reg_map_s *reg; int result; - if (st->chip_config.is_asleep) - return -EINVAL; reg = &st->reg; if (enable) { + if (st->chip_config.new_fifo_rate != + st->chip_config.fifo_rate) { + result = set_fifo_rate_reg(st); + if (result) + return result; + } + if (st->chip_config.dmp_on) { + result = inv_set_dmp_sysfs(st); + if (result) + return result; + } + + if (st->chip_config.gyro_enable) { + result = st->switch_gyro_engine(st, true); + if (result) + return result; + } + if (st->chip_config.accl_enable) { + result = st->switch_accl_engine(st, true); + if (result) + return result; + } + result = inv_reset_fifo(indio_dev); if (result) return result; - st->chip_config.enable = 1; } else { + if ((INV_MPU3050 != st->chip_type) + && st->chip_config.lpa_mode) { + /* if the chip is in low power mode, + register write/read could fail */ + result = inv_lpa_mode(st, 0); + if (result) + return result; + } result = inv_i2c_single_write(st, reg->fifo_en, 0); - if (result) - return result; - result = inv_i2c_single_write(st, reg->int_enable, 0); if (result) return result; /* disable fifo reading */ if (INV_MPU3050 != st->chip_type) { - result = inv_i2c_single_write(st, reg->user_ctrl, 0); + result = inv_i2c_single_write(st, reg->int_enable, 0); if (result) return result; + result = inv_i2c_single_write(st, reg->user_ctrl, 0); + } else { + result = inv_i2c_single_write(st, reg->int_enable, + st->plat_data.int_config); } - st->chip_config.enable = 0; + if (result) + return result; + /* turn off the gyro/accl engine during disable phase */ + result = st->switch_gyro_engine(st, false); + if (result) + return result; + result = st->switch_accl_engine(st, false); + if (result) + return result; + result = st->set_power_state(st, false); + if (result) + return result; } - return 0; -} + st->chip_config.enable = enable; -/** - * inv_clear_kfifo() - clear time stamp fifo - * @st: Device driver instance. - */ -void inv_clear_kfifo(struct inv_gyro_state_s *st) -{ - unsigned long flags; - spin_lock_irqsave(&st->time_stamp_lock, flags); - kfifo_reset(&st->timestamps); - spin_unlock_irqrestore(&st->time_stamp_lock, flags); + return 0; } /** @@ -311,35 +495,34 @@ void inv_clear_kfifo(struct inv_gyro_state_s *st) */ static irqreturn_t inv_irq_handler(int irq, void *dev_id) { - struct inv_gyro_state_s *st; - long long timestamp; - int result, catch_up; - unsigned int time_since_last_irq; - - st = (struct inv_gyro_state_s *)dev_id; - timestamp = iio_get_time_ns(); - time_since_last_irq = ((unsigned int)(timestamp - - st->last_isr_time))/ONE_K_HZ; + struct inv_mpu_iio_s *st; + u64 timestamp; + int catch_up; + u64 time_since_last_irq; + + st = (struct inv_mpu_iio_s *)dev_id; + timestamp = get_time_ns(); + time_since_last_irq = timestamp - st->last_isr_time; spin_lock(&st->time_stamp_lock); catch_up = 0; - while ((time_since_last_irq > st->irq_dur_us*2) - && (catch_up < MAX_CATCH_UP) - && (0 == st->chip_config.lpa_mode)) { - - st->last_isr_time += st->irq_dur_us*ONE_K_HZ; - result = kfifo_in(&st->timestamps, - &st->last_isr_time, 1); - time_since_last_irq = ((unsigned int)(timestamp - - st->last_isr_time))/ONE_K_HZ; + while ((time_since_last_irq > st->irq_dur_ns * 2) && + (catch_up < MAX_CATCH_UP) && + (!st->chip_config.lpa_mode) && + (!st->chip_config.dmp_on)) { + st->last_isr_time += st->irq_dur_ns; + kfifo_in(&st->timestamps, + &st->last_isr_time, 1); + time_since_last_irq = timestamp - st->last_isr_time; catch_up++; } - result = kfifo_in(&st->timestamps, ×tamp, 1); + kfifo_in(&st->timestamps, ×tamp, 1); st->last_isr_time = timestamp; spin_unlock(&st->time_stamp_lock); return IRQ_WAKE_THREAD; } -static int put_scan_to_buf(struct iio_dev *indio_dev, unsigned char *d, + +static int put_scan_to_buf(struct iio_dev *indio_dev, u8 *d, short *s, int scan_index, int d_ind) { struct iio_buffer *ring = indio_dev->buffer; int st; @@ -353,31 +536,17 @@ static int put_scan_to_buf(struct iio_dev *indio_dev, unsigned char *d, } return d_ind; } -static int put_scan_to_buf_q(struct iio_dev *indio_dev, unsigned char *d, - int *s, int scan_index, int d_ind) { - struct iio_buffer *ring = indio_dev->buffer; - int st; - int i; - for (i = 0; i < 4; i++) { - st = iio_scan_mask_query(indio_dev, ring, scan_index + i); - if (st) { - memcpy(&d[d_ind], &s[i], sizeof(s[i])); - d_ind += sizeof(s[i]); - } - } - return d_ind; -} static void inv_report_data_3050(struct iio_dev *indio_dev, s64 t, - int has_footer, unsigned char *data) + int has_footer, u8 *data) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct iio_buffer *ring = indio_dev->buffer; int ind, i, d_ind; struct inv_chip_config_s *conf; - short g[3], a[3]; + short g[THREE_AXIS], a[THREE_AXIS]; s64 buf[8]; - unsigned char *tmp; + u8 *tmp; int bytes_per_datum, scan_count; conf = &st->chip_config; @@ -388,41 +557,48 @@ static void inv_report_data_3050(struct iio_dev *indio_dev, s64 t, ind = 0; if (has_footer) ind += 2; - tmp = (unsigned char *)buf; + tmp = (u8 *)buf; d_ind = 0; + if (conf->gyro_fifo_enable) { - g[0] = be16_to_cpup((__be16 *)(&data[ind])); - g[1] = be16_to_cpup((__be16 *)(&data[ind+2])); - g[2] = be16_to_cpup((__be16 *)(&data[ind+4])); - ind += 6; + for (i = 0; i < ARRAY_SIZE(g); i++) { + g[i] = be16_to_cpup((__be16 *)(&data[ind + i * 2])); + st->raw_gyro[i] = g[i]; + } + ind += BYTES_PER_SENSOR; d_ind = put_scan_to_buf(indio_dev, tmp, g, INV_MPU_SCAN_GYRO_X, d_ind); } if (conf->accl_fifo_enable) { st->mpu_slave->combine_data(&data[ind], a); - ind += 6; + for (i = 0; i < ARRAY_SIZE(a); i++) + st->raw_accel[i] = a[i]; + + ind += BYTES_PER_SENSOR; d_ind = put_scan_to_buf(indio_dev, tmp, a, INV_MPU_SCAN_ACCL_X, d_ind); } - i = (bytes_per_datum + 7)/8; + i = (bytes_per_datum + 7) / 8; if (ring->scan_timestamp) buf[i] = t; - ring->access->store_to(indio_dev->buffer, (u8 *) buf, t); + ring->access->store_to(indio_dev->buffer, (u8 *)buf, t); } + /** - * inv_read_fifo_mpu3050() - Transfer data from FIFO to ring buffer for mpu3050. + * inv_read_fifo_mpu3050() - Transfer data from FIFO to ring buffer for + * mpu3050. */ irqreturn_t inv_read_fifo_mpu3050(int irq, void *dev_id) { - struct inv_gyro_state_s *st = (struct inv_gyro_state_s *)dev_id; + struct inv_mpu_iio_s *st = (struct inv_mpu_iio_s *)dev_id; struct iio_dev *indio_dev = iio_priv_to_dev(st); int bytes_per_datum; - unsigned char data[64]; + u8 data[64]; int result; short fifo_count, byte_read; - unsigned int copied; + u32 copied; s64 timestamp; struct inv_reg_map_s *reg; reg = &st->reg; @@ -435,7 +611,7 @@ irqreturn_t inv_read_fifo_mpu3050(int irq, void *dev_id) bytes_per_datum = (st->chip_config.accl_fifo_enable + st->chip_config.gyro_fifo_enable)*BYTES_PER_SENSOR; if (st->chip_config.has_footer) - byte_read = bytes_per_datum + 2; + byte_read = bytes_per_datum + MPU3050_FOOTER_SIZE; else byte_read = bytes_per_datum; @@ -445,10 +621,10 @@ irqreturn_t inv_read_fifo_mpu3050(int irq, void *dev_id) FIFO_COUNT_BYTE, data); if (result) goto end_session; - fifo_count = (data[0] << 8) + data[1]; + fifo_count = be16_to_cpup((__be16 *)(&data[0])); if (fifo_count < byte_read) goto end_session; - if (fifo_count%2) + if (fifo_count & 1) goto flush_fifo; if (fifo_count > FIFO_THRESHOLD) goto flush_fifo; @@ -463,8 +639,9 @@ irqreturn_t inv_read_fifo_mpu3050(int irq, void *dev_id) ×tamp, sizeof(timestamp), &copied); if (result) goto flush_fifo; - } else + } else { goto flush_fifo; + } } } while ((bytes_per_datum != 0) && (fifo_count >= byte_read)) { @@ -477,204 +654,261 @@ irqreturn_t inv_read_fifo_mpu3050(int irq, void *dev_id) if (result) goto flush_fifo; inv_report_data_3050(indio_dev, timestamp, - st->chip_config.has_footer, data); + st->chip_config.has_footer, data); fifo_count -= byte_read; if (st->chip_config.has_footer == 0) { st->chip_config.has_footer = 1; byte_read = bytes_per_datum + MPU3050_FOOTER_SIZE; } } + end_session: return IRQ_HANDLED; + flush_fifo: /* Flush HW and SW FIFOs. */ inv_reset_fifo(indio_dev); inv_clear_kfifo(st); return IRQ_HANDLED; } + static int inv_report_gyro_accl_compass(struct iio_dev *indio_dev, - unsigned char *data, s64 t) + u8 *data, s64 t) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); - struct iio_buffer *ring = indio_dev->buffer; - short g[3], a[3], c[3]; + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + short g[THREE_AXIS], a[THREE_AXIS], c[THREE_AXIS]; int q[4]; - int result, ind, d_ind; - s64 buf[8]; - unsigned int word; - unsigned char d[8]; - unsigned char *tmp; - int source; + int result, ind; + u32 word; + u8 d[8], compass_divider; + u8 buf[64]; + u64 *tmp; + int source, i; struct inv_chip_config_s *conf; -#define INT_SRC_TAP 0x01 -#define INT_SRC_ORIENT 0x02 -#define INT_SRC_DISPLAY_ORIENT 0x08 -#define INT_SRC_SHAKE 0x10 conf = &st->chip_config; ind = 0; + if (conf->quaternion_on & conf->dmp_on) { - q[0] = be32_to_cpup((__be32 *)(&data[ind])); - q[1] = be32_to_cpup((__be32 *)(&data[ind+4])); - q[2] = be32_to_cpup((__be32 *)(&data[ind+8])); - q[3] = be32_to_cpup((__be32 *)(&data[ind+12])); - ind += 16; + for (i = 0; i < ARRAY_SIZE(q); i++) { + q[i] = be32_to_cpup((__be32 *)(&data[ind + i * 4])); + st->raw_quaternion[i] = q[i]; + memcpy(&buf[ind + i * sizeof(q[i])], &q[i], + sizeof(q[i])); + } + ind += QUATERNION_BYTES; } - if (conf->accl_fifo_enable | conf->dmp_on) { - a[0] = be16_to_cpup((__be16 *)(&data[ind])); - a[1] = be16_to_cpup((__be16 *)(&data[ind+2])); - a[2] = be16_to_cpup((__be16 *)(&data[ind+4])); - - a[0] *= st->chip_info.multi; - a[1] *= st->chip_info.multi; - a[2] *= st->chip_info.multi; - st->raw_accel[0] = a[0]; - st->raw_accel[1] = a[1]; - st->raw_accel[2] = a[2]; - ind += 6; + + if (conf->accl_fifo_enable) { + for (i = 0; i < ARRAY_SIZE(a); i++) { + a[i] = be16_to_cpup((__be16 *)(&data[ind + i * 2])); + memcpy(&buf[ind + i * sizeof(a[i])], &a[i], + sizeof(a[i])); + } + ind += BYTES_PER_SENSOR; } - if (conf->gyro_fifo_enable | conf->dmp_on) { - g[0] = be16_to_cpup((__be16 *)(&data[ind])); - g[1] = be16_to_cpup((__be16 *)(&data[ind+2])); - g[2] = be16_to_cpup((__be16 *)(&data[ind+4])); - - st->raw_gyro[0] = g[0]; - st->raw_gyro[1] = g[1]; - st->raw_gyro[2] = g[2]; - ind += 6; + + if (conf->gyro_fifo_enable) { + for (i = 0; i < ARRAY_SIZE(g); i++) { + g[i] = be16_to_cpup((__be16 *)(&data[ind + i * 2])); + memcpy(&buf[ind + i * sizeof(g[i])], &g[i], + sizeof(g[i])); + } + ind += BYTES_PER_SENSOR; } - if (conf->dmp_on) { - word = (unsigned int)(be32_to_cpup((unsigned int *)&data[ind])); - source = (word/65536)%256; + + if (conf->dmp_on && (conf->tap_on || conf->display_orient_on)) { + word = (u32)(be32_to_cpup((u32 *)&data[ind])); + source = ((word >> 16) & 0xff); if (source) { - st->tap_data = 0x3f & (word%256); - st->orient_data = (word/256)%256; - st->display_orient_data = ((0xc0 & (word%256))>>6); + st->tap_data = (DMP_MASK_TAP & (word & 0xff)); + st->display_orient_data = + ((DMP_MASK_DIS_ORIEN & (word & 0xff)) >> + DMP_DIS_ORIEN_SHIFT); } /* report tap information */ if (source & INT_SRC_TAP) sysfs_notify(&indio_dev->dev.kobj, NULL, "event_tap"); /* report orientation information */ - if (source & INT_SRC_ORIENT) - sysfs_notify(&indio_dev->dev.kobj, NULL, - "event_orientation"); - /* report orientation information */ if (source & INT_SRC_DISPLAY_ORIENT) sysfs_notify(&indio_dev->dev.kobj, NULL, - "event_display_orientation"); + "event_display_orientation"); } /*divider and counter is used to decrease the speed of read in high frequency sample rate*/ if (conf->compass_fifo_enable) { - c[0] = c[1] = c[2] = 0; - if (st->compass_divider == st->compass_counter) { + c[0] = 0; + c[1] = 0; + c[2] = 0; + if (conf->dmp_on) + compass_divider = st->compass_dmp_divider; + else + compass_divider = st->compass_divider; + if (compass_divider <= st->compass_counter) { /*read from external sensor data register */ - result = inv_i2c_read(st, REG_EXT_SENS_DATA_00, 8, d); + result = inv_i2c_read(st, REG_EXT_SENS_DATA_00, + NUM_BYTES_COMPASS_SLAVE, d); /* d[7] is status 2 register */ /*for AKM8975, bit 2 and 3 should be all be zero*/ /* for AMK8963, bit 3 should be zero*/ - if ((DATA_AKM_DRDY == d[0]) - && (0 == (d[7] & DATA_AKM_STAT_MASK)) - && (!result)) { - unsigned char *sens; + if ((DATA_AKM_DRDY == d[0]) && + (0 == (d[7] & DATA_AKM_STAT_MASK)) && + (!result)) { + u8 *sens; sens = st->chip_info.compass_sens; c[0] = (short)((d[2] << 8) | d[1]); c[1] = (short)((d[4] << 8) | d[3]); c[2] = (short)((d[6] << 8) | d[5]); - c[0] = ((c[0] * (sens[0] + 128)) >> 8); - c[1] = ((c[1] * (sens[1] + 128)) >> 8); - c[2] = ((c[2] * (sens[2] + 128)) >> 8); + c[0] = (short)(((int)c[0] * + (sens[0] + 128)) >> 8); + c[1] = (short)(((int)c[1] * + (sens[1] + 128)) >> 8); + c[2] = (short)(((int)c[2] * + (sens[2] + 128)) >> 8); st->raw_compass[0] = c[0]; st->raw_compass[1] = c[1]; st->raw_compass[2] = c[2]; } st->compass_counter = 0; - } else if (st->compass_divider != 0) + } else if (compass_divider != 0) { st->compass_counter++; + } + if (!conf->normal_compass_measure) { + c[0] = 0; + c[1] = 0; + c[2] = 0; + conf->normal_compass_measure = 1; + } + for (i = 0; i < 3; i++) + memcpy(&buf[ind + i * sizeof(c[i])], &c[i], + sizeof(c[i])); + ind += BYTES_PER_SENSOR; } + tmp = (u64 *)buf; + tmp[DIV_ROUND_UP(ind, 8)] = t; - tmp = (unsigned char *)buf; - d_ind = 0; - if (conf->quaternion_on & conf->dmp_on) - d_ind = put_scan_to_buf_q(indio_dev, tmp, q, - INV_MPU_SCAN_QUAT_R, d_ind); - if (conf->gyro_fifo_enable) - d_ind = put_scan_to_buf(indio_dev, tmp, g, - INV_MPU_SCAN_GYRO_X, d_ind); - if (conf->accl_fifo_enable) - d_ind = put_scan_to_buf(indio_dev, tmp, a, - INV_MPU_SCAN_ACCL_X, d_ind); - if (conf->compass_fifo_enable) - d_ind = put_scan_to_buf(indio_dev, tmp, c, - INV_MPU_SCAN_MAGN_X, d_ind); - if (ring->scan_timestamp) - buf[(d_ind + 7)/8] = t; - ring->access->store_to(indio_dev->buffer, (u8 *) buf, t); + if (ind > 0) + iio_push_to_buffer(indio_dev->buffer, buf, t); return 0; } +static void inv_process_motion(struct inv_mpu_iio_s *st) +{ + struct iio_dev *indio_dev = iio_priv_to_dev(st); + s32 diff, true_motion; + s64 timestamp; + int result; + u8 data[1]; + + /* motion interrupt */ + result = inv_i2c_read(st, REG_INT_STATUS, 1, data); + if (result) + return; + + if (data[0] & BIT_MOT_INT) { + timestamp = get_time_ns(); + diff = (int)(((timestamp - st->mpu6500_last_motion_time) >> + NS_PER_MS_SHIFT)); + if (diff > st->mot_int.mot_dur) { + st->mpu6500_last_motion_time = timestamp; + true_motion = 1; + } else { + true_motion = 0; + } + if (true_motion) + sysfs_notify(&indio_dev->dev.kobj, NULL, + "event_accel_motion"); + } +} + +static int get_bytes_per_datum(struct inv_mpu_iio_s *st) +{ + int bytes_per_datum; + + bytes_per_datum = 0; + if (st->chip_config.dmp_on) { + if (st->chip_config.quaternion_on) + bytes_per_datum += QUATERNION_BYTES; + if (st->chip_config.tap_on || + st->chip_config.display_orient_on) + bytes_per_datum += BYTES_FOR_EVENTS; + } + if (st->chip_config.accl_fifo_enable) + bytes_per_datum += BYTES_PER_SENSOR; + if (st->chip_config.gyro_fifo_enable) + bytes_per_datum += BYTES_PER_SENSOR; + + return bytes_per_datum; +} + /** * inv_read_fifo() - Transfer data from FIFO to ring buffer. */ irqreturn_t inv_read_fifo(int irq, void *dev_id) { - struct inv_gyro_state_s *st = (struct inv_gyro_state_s *)dev_id; + struct inv_mpu_iio_s *st = (struct inv_mpu_iio_s *)dev_id; struct iio_dev *indio_dev = iio_priv_to_dev(st); size_t bytes_per_datum; int result; - unsigned char data[BYTES_FOR_DMP + QUATERNION_BYTES]; - unsigned short fifo_count; - unsigned int copied; + u8 data[BYTES_FOR_DMP + QUATERNION_BYTES]; + u16 fifo_count; + u32 copied; s64 timestamp; struct inv_reg_map_s *reg; s64 buf[8]; - unsigned char *tmp; + s8 *tmp; + + mutex_lock(&indio_dev->mlock); + if (!(iio_buffer_enabled(indio_dev))) + goto end_session; + reg = &st->reg; if (!(st->chip_config.accl_fifo_enable | st->chip_config.gyro_fifo_enable | st->chip_config.dmp_on | - st->chip_config.compass_fifo_enable)) + st->chip_config.compass_fifo_enable | + st->mot_int.mot_on)) goto end_session; - if (st->chip_config.dmp_on && st->chip_config.flick_int_on) { - /*dmp interrupt status */ - inv_i2c_read(st, REG_DMP_INT_STATUS, 1, data); - if (data[0] & 8) - sysfs_notify(&indio_dev->dev.kobj, NULL, "event_flick"); + if (st->mot_int.mot_on) + inv_process_motion(st); + if (st->chip_config.dmp_on && st->chip_config.smd_enable) { + /* dmp interrupt status */ + result = inv_i2c_read(st, REG_DMP_INT_STATUS, 1, data); + if (!result) + if (data[0] & SMD_INT_ON) { + sysfs_notify(&indio_dev->dev.kobj, NULL, + "event_smd"); + st->chip_config.smd_enable = 0; + } } if (st->chip_config.lpa_mode) { - result = inv_i2c_read(st, reg->raw_accl, 6, data); + result = inv_i2c_read(st, reg->raw_accl, + BYTES_PER_SENSOR, data); if (result) goto end_session; inv_report_gyro_accl_compass(indio_dev, data, - iio_get_time_ns()); + get_time_ns()); goto end_session; } - - if (st->chip_config.dmp_on) - if (st->chip_config.quaternion_on) - bytes_per_datum = BYTES_FOR_DMP + QUATERNION_BYTES; - else - bytes_per_datum = BYTES_FOR_DMP; - else - bytes_per_datum = (st->chip_config.accl_fifo_enable + - st->chip_config.gyro_fifo_enable)*BYTES_PER_SENSOR; + bytes_per_datum = get_bytes_per_datum(st); fifo_count = 0; if (bytes_per_datum != 0) { result = inv_i2c_read(st, reg->fifo_count_h, FIFO_COUNT_BYTE, data); if (result) goto end_session; - fifo_count = (data[0] << 8) + data[1]; + fifo_count = be16_to_cpup((__be16 *)(&data[0])); if (fifo_count < bytes_per_datum) goto end_session; - if (fifo_count%2) + /* fifo count can't be odd number */ + if (fifo_count & 1) goto flush_fifo; if (fifo_count > FIFO_THRESHOLD) goto flush_fifo; - /* Timestamp mismatch. */ + /* timestamp mismatch. */ if (kfifo_len(&st->timestamps) < fifo_count / bytes_per_datum) goto flush_fifo; @@ -685,17 +919,17 @@ irqreturn_t inv_read_fifo(int irq, void *dev_id) ×tamp, sizeof(timestamp), &copied); if (result) goto flush_fifo; - } else + } else { goto flush_fifo; + } } - } - if (bytes_per_datum == 0) { + } else { result = kfifo_to_user(&st->timestamps, ×tamp, sizeof(timestamp), &copied); if (result) goto flush_fifo; } - tmp = (char *)buf; + tmp = (s8 *)buf; while ((bytes_per_datum != 0) && (fifo_count >= bytes_per_datum)) { result = inv_i2c_read(st, reg->fifo_r_w, bytes_per_datum, data); @@ -709,51 +943,178 @@ irqreturn_t inv_read_fifo(int irq, void *dev_id) inv_report_gyro_accl_compass(indio_dev, data, timestamp); fifo_count -= bytes_per_datum; } - if (bytes_per_datum == 0) + if (bytes_per_datum == 0 && st->chip_config.compass_fifo_enable) inv_report_gyro_accl_compass(indio_dev, data, timestamp); + end_session: + mutex_unlock(&indio_dev->mlock); + return IRQ_HANDLED; + flush_fifo: /* Flush HW and SW FIFOs. */ inv_reset_fifo(indio_dev); inv_clear_kfifo(st); + mutex_unlock(&indio_dev->mlock); + return IRQ_HANDLED; } void inv_mpu_unconfigure_ring(struct iio_dev *indio_dev) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); - free_irq(st->i2c->irq, st); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + free_irq(st->client->irq, st); iio_kfifo_free(indio_dev->buffer); }; -int inv_postenable(struct iio_dev *indio_dev) +static int inv_postenable(struct iio_dev *indio_dev) { - return set_inv_enable(indio_dev, 1); + return set_inv_enable(indio_dev, true); +} +static int inv_predisable(struct iio_dev *indio_dev) +{ + return set_inv_enable(indio_dev, false); } -int inv_predisable(struct iio_dev *indio_dev) + +static void inv_scan_query(struct iio_dev *indio_dev) { - return set_inv_enable(indio_dev, 0); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; + int result; + + if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_X) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_Y) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_Z)) + st->chip_config.gyro_fifo_enable = 1; + else + st->chip_config.gyro_fifo_enable = 0; + + if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_X) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_Y) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_Z)) + st->chip_config.accl_fifo_enable = 1; + else + st->chip_config.accl_fifo_enable = 0; + + if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_MAGN_X) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_MAGN_Y) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_MAGN_Z)) + st->chip_config.compass_fifo_enable = 1; + else + st->chip_config.compass_fifo_enable = 0; + + /* check to make sure engine is turned on if fifo is turned on */ + if (st->chip_config.gyro_fifo_enable && + (!st->chip_config.gyro_enable)) { + result = st->switch_gyro_engine(st, true); + if (result) + return; + st->chip_config.gyro_enable = true; + } + if (st->chip_config.accl_fifo_enable && + (!st->chip_config.accl_enable)) { + result = st->switch_accl_engine(st, true); + if (result) + return; + st->chip_config.accl_enable = true; + } +} + +static int inv_check_quaternion(struct iio_dev *indio_dev) +{ + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; + int result; + + if (st->chip_config.dmp_on) { + if ( + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_QUAT_R) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_QUAT_X) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_QUAT_Y) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_QUAT_Z)) + st->chip_config.quaternion_on = 1; + else + st->chip_config.quaternion_on = 0; + + result = inv_send_quaternion(st, + st->chip_config.quaternion_on); + if (result) + return result; + } else { + st->chip_config.quaternion_on = 0; + clear_bit(INV_MPU_SCAN_QUAT_R, ring->scan_mask); + clear_bit(INV_MPU_SCAN_QUAT_X, ring->scan_mask); + clear_bit(INV_MPU_SCAN_QUAT_Y, ring->scan_mask); + clear_bit(INV_MPU_SCAN_QUAT_Z, ring->scan_mask); + } + + return 0; +} + +static int inv_check_conflict_sysfs(struct iio_dev *indio_dev) +{ + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; + int result; + + if (st->chip_config.lpa_mode) { + /* dmp cannot run with low power mode on */ + st->chip_config.dmp_on = 0; + result = st->gyro_en(st, ring, false); + if (result) + return result; + result = st->compass_en(st, ring, false); + if (result) + return result; + result = st->quaternion_en(st, ring, false); + if (result) + return result; + + result = st->accl_en(st, ring, true); + if (result) + return result; + } + result = inv_check_quaternion(indio_dev); + if (result) + return result; + + return result; +} + +static int inv_preenable(struct iio_dev *indio_dev) +{ + int result; + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + + result = st->set_power_state(st, true); + if (result) + return result; + + result = inv_check_conflict_sysfs(indio_dev); + if (result) + return result; + inv_scan_query(indio_dev); + result = iio_sw_buffer_preenable(indio_dev); + + return result; } static const struct iio_buffer_setup_ops inv_mpu_ring_setup_ops = { - .preenable = &iio_sw_buffer_preenable, + .preenable = &inv_preenable, .postenable = &inv_postenable, .predisable = &inv_predisable, }; int inv_mpu_configure_ring(struct iio_dev *indio_dev) { - int ret = 0; - struct inv_gyro_state_s *st = iio_priv(indio_dev); + int ret; + struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct iio_buffer *ring; ring = iio_kfifo_allocate(indio_dev); - if (!ring) { - ret = -ENOMEM; - return ret; - } + if (!ring) + return -ENOMEM; indio_dev->buffer = ring; /* setup ring buffer */ ring->scan_timestamp = true; @@ -761,11 +1122,11 @@ int inv_mpu_configure_ring(struct iio_dev *indio_dev) /*scan count double count timestamp. should subtract 1. but number of channels still includes timestamp*/ if (INV_MPU3050 == st->chip_type) - ret = request_threaded_irq(st->i2c->irq, inv_irq_handler, + ret = request_threaded_irq(st->client->irq, inv_irq_handler, inv_read_fifo_mpu3050, IRQF_TRIGGER_RISING | IRQF_SHARED, "inv_irq", st); else - ret = request_threaded_irq(st->i2c->irq, inv_irq_handler, + ret = request_threaded_irq(st->client->irq, inv_irq_handler, inv_read_fifo, IRQF_TRIGGER_RISING | IRQF_SHARED, "inv_irq", st); if (ret) @@ -777,6 +1138,7 @@ int inv_mpu_configure_ring(struct iio_dev *indio_dev) iio_kfifo_free(indio_dev->buffer); return ret; } + /** * @} */ diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c b/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c index a36c7947583..45cd33932e6 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c @@ -17,9 +17,9 @@ * @brief Hardware drivers. * * @{ - * @file inv_mpu3050.c + * @file inv_mpu_trigger.c * @brief A sysfs device driver for Invensense devices - * @details This file is part of inv_gyro driver code + * @details This file is part of inv mpu iio driver code */ #include @@ -40,10 +40,11 @@ #include "../../iio.h" #include "../../sysfs.h" #include "../../trigger.h" + #include "inv_mpu_iio.h" /** - * inv_mpu_data_rdy_trigger_set_state() set datardy interrupt state + * inv_mpu_data_rdy_trigger_set_state() set data ready interrupt state **/ static int inv_mpu_data_rdy_trigger_set_state(struct iio_trigger *trig, bool state) @@ -51,7 +52,8 @@ static int inv_mpu_data_rdy_trigger_set_state(struct iio_trigger *trig, struct iio_dev *indio_dev = trig->private_data; dev_dbg(&indio_dev->dev, "%s (%d)\n", __func__, state); - return set_inv_enable(indio_dev, state); + + return 0; } static const struct iio_trigger_ops inv_mpu_trigger_ops = { @@ -62,36 +64,30 @@ static const struct iio_trigger_ops inv_mpu_trigger_ops = { int inv_mpu_probe_trigger(struct iio_dev *indio_dev) { int ret; - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); st->trig = iio_allocate_trigger("%s-dev%d", indio_dev->name, indio_dev->id); - if (st->trig == NULL) { - ret = -ENOMEM; - goto error_ret; - } - - /* select default trigger */ - st->trig->dev.parent = &st->i2c->dev; + if (st->trig == NULL) + return -ENOMEM; + st->trig->dev.parent = &st->client->dev; st->trig->private_data = indio_dev; st->trig->ops = &inv_mpu_trigger_ops; ret = iio_trigger_register(st->trig); - /* select default trigger */ + if (ret) { + iio_free_trigger(st->trig); + return -EPERM; + } indio_dev->trig = st->trig; - if (ret) - goto error_ret; return 0; - -error_ret: - return ret; } void inv_mpu_remove_trigger(struct iio_dev *indio_dev) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); iio_trigger_unregister(st->trig); iio_free_trigger(st->trig); diff --git a/drivers/staging/iio/imu/mpu/inv_slave_bma250.c b/drivers/staging/iio/imu/mpu/inv_slave_bma250.c index 85c6cc9f2c4..bd84f637af5 100644 --- a/drivers/staging/iio/imu/mpu/inv_slave_bma250.c +++ b/drivers/staging/iio/imu/mpu/inv_slave_bma250.c @@ -19,7 +19,7 @@ * @{ * @file inv_slave_bma250.c * @brief A sysfs device driver for Invensense devices - * @details This file is part of inv_gyro driver code + * @details This file is part of invensense mpu driver code * */ @@ -39,44 +39,43 @@ #include #include "inv_mpu_iio.h" -#define BMA250_CHIP_ID (3) -#define BMA250_RANGE_SET (0) -#define BMA250_BW_SET (4) +#define BMA250_CHIP_ID 3 +#define BMA250_RANGE_SET 0 +#define BMA250_BW_SET 4 /* range and bandwidth */ +#define BMA250_RANGE_2G 3 +#define BMA250_RANGE_4G 5 +#define BMA250_RANGE_8G 8 +#define BMA250_RANGE_16G 12 +#define BMA250_RANGE_MAX 4 +#define BMA250_RANGE_MASK 0xF0 -#define BMA250_RANGE_2G (3) -#define BMA250_RANGE_4G (5) -#define BMA250_RANGE_8G (8) -#define BMA250_RANGE_16G (12) -#define BMA250_RANGE_MAX (4) -#define BMA250_RANGE_MASK (0xF0) - -#define BMA250_BW_7_81HZ (0x08) -#define BMA250_BW_15_63HZ (0x09) -#define BMA250_BW_31_25HZ (0x0A) -#define BMA250_BW_62_50HZ (0x0B) -#define BMA250_BW_125HZ (0x0C) -#define BMA250_BW_250HZ (0x0D) -#define BMA250_BW_500HZ (0x0E) -#define BMA250_BW_1000HZ (0x0F) -#define BMA250_MAX_BW_SIZE (8) -#define BMA250_BW_REG_MASK (0xE0) +#define BMA250_BW_7_81HZ 0x08 +#define BMA250_BW_15_63HZ 0x09 +#define BMA250_BW_31_25HZ 0x0A +#define BMA250_BW_62_50HZ 0x0B +#define BMA250_BW_125HZ 0x0C +#define BMA250_BW_250HZ 0x0D +#define BMA250_BW_500HZ 0x0E +#define BMA250_BW_1000HZ 0x0F +#define BMA250_MAX_BW_SIZE 8 +#define BMA250_BW_REG_MASK 0xE0 /* register definitions */ -#define BMA250_X_AXIS_LSB_REG (0x02) -#define BMA250_RANGE_SEL_REG (0x0F) -#define BMA250_BW_SEL_REG (0x10) -#define BMA250_MODE_CTRL_REG (0x11) +#define BMA250_X_AXIS_LSB_REG 0x02 +#define BMA250_RANGE_SEL_REG 0x0F +#define BMA250_BW_SEL_REG 0x10 +#define BMA250_MODE_CTRL_REG 0x11 /* mode settings */ -#define BMA250_MODE_NORMAL (0) -#define BMA250_MODE_LOWPOWER (1) -#define BMA250_MODE_SUSPEND (2) -#define BMA250_MODE_MAX (3) -#define BMA250_MODE_MASK (0x3F) -#define BMA250_BIT_SUSPEND (0x80) -#define BMA250_BIT_LP (0x40) +#define BMA250_MODE_NORMAL 0 +#define BMA250_MODE_LOWPOWER 1 +#define BMA250_MODE_SUSPEND 2 +#define BMA250_MODE_MAX 3 +#define BMA250_MODE_MASK 0x3F +#define BMA250_BIT_SUSPEND 0x80 +#define BMA250_BIT_LP 0x40 struct bma_property { int range; @@ -90,57 +89,53 @@ static struct bma_property bma_static_property = { .mode = BMA250_MODE_SUSPEND }; -static int bma250_set_bandwidth(struct inv_gyro_state_s *st, unsigned char BW) +static int bma250_set_bandwidth(struct inv_mpu_iio_s *st, u8 bw) { - int res = 0; - unsigned char data; - int Bandwidth = 0; - if (BW >= BMA250_MAX_BW_SIZE) - return -1; - switch (BW) { + int res; + u8 data; + int bandwidth; + switch (bw) { case 0: - Bandwidth = BMA250_BW_7_81HZ; + bandwidth = BMA250_BW_7_81HZ; break; case 1: - Bandwidth = BMA250_BW_15_63HZ; + bandwidth = BMA250_BW_15_63HZ; break; case 2: - Bandwidth = BMA250_BW_31_25HZ; + bandwidth = BMA250_BW_31_25HZ; break; case 3: - Bandwidth = BMA250_BW_62_50HZ; + bandwidth = BMA250_BW_62_50HZ; break; case 4: - Bandwidth = BMA250_BW_125HZ; + bandwidth = BMA250_BW_125HZ; break; case 5: - Bandwidth = BMA250_BW_250HZ; + bandwidth = BMA250_BW_250HZ; break; case 6: - Bandwidth = BMA250_BW_500HZ; + bandwidth = BMA250_BW_500HZ; break; case 7: - Bandwidth = BMA250_BW_1000HZ; + bandwidth = BMA250_BW_1000HZ; break; default: - break; + return -EINVAL; } res = inv_secondary_read(BMA250_BW_SEL_REG, 1, &data); if (res) return res; data &= BMA250_BW_REG_MASK; - data |= Bandwidth; + data |= bandwidth; res = inv_secondary_write(BMA250_BW_SEL_REG, data); return res; } -static int bma250_set_range(struct inv_gyro_state_s *st, unsigned char Range) +static int bma250_set_range(struct inv_mpu_iio_s *st, u8 range) { - int res = 0; - unsigned char orig, data = 0; - if (Range >= BMA250_RANGE_MAX) - return -1; - switch (Range) { + int res; + u8 orig, data; + switch (range) { case 0: data = BMA250_RANGE_2G; break; @@ -154,7 +149,7 @@ static int bma250_set_range(struct inv_gyro_state_s *st, unsigned char Range) data = BMA250_RANGE_16G; break; default: - break; + return -EINVAL; } res = inv_secondary_read(BMA250_RANGE_SEL_REG, 1, &orig); if (res) @@ -162,15 +157,18 @@ static int bma250_set_range(struct inv_gyro_state_s *st, unsigned char Range) orig &= BMA250_RANGE_MASK; data |= orig; res = inv_secondary_write(BMA250_RANGE_SEL_REG, data); - bma_static_property.range = Range; - return res; + if (res) + return res; + bma_static_property.range = range; + + return 0; } -static int setup_slave_bma250(struct inv_gyro_state_s *st) +static int setup_slave_bma250(struct inv_mpu_iio_s *st) { int result; - unsigned char data[2]; - result = set_3050_bypass(st, 1); + u8 data[2]; + result = set_3050_bypass(st, true); if (result) return result; /*read secondary i2c ID register */ @@ -178,29 +176,28 @@ static int setup_slave_bma250(struct inv_gyro_state_s *st) if (result) return result; if (BMA250_CHIP_ID != data[0]) - return result; - result = set_3050_bypass(st, 0); + return -EINVAL; + result = set_3050_bypass(st, false); if (result) return result; /*AUX(accel), slave address is set inside set_3050_bypass*/ /* bma250 x axis LSB register address is 2 */ result = inv_i2c_single_write(st, REG_3050_AUX_BST_ADDR, BMA250_X_AXIS_LSB_REG); + return result; } -static int bma250_set_mode(struct inv_gyro_state_s *st, unsigned char Mode) +static int bma250_set_mode(struct inv_mpu_iio_s *st, u8 mode) { - int res = 0; - unsigned char data = 0; + int res; + u8 data; - if (Mode >= BMA250_RANGE_MASK) - return -1; res = inv_secondary_read(BMA250_MODE_CTRL_REG, 1, &data); if (res) return res; data &= BMA250_MODE_MASK; - switch (Mode) { + switch (mode) { case BMA250_MODE_NORMAL: break; case BMA250_MODE_LOWPOWER: @@ -210,94 +207,105 @@ static int bma250_set_mode(struct inv_gyro_state_s *st, unsigned char Mode) data |= BMA250_BIT_SUSPEND; break; default: - break; + return -EINVAL; } res = inv_secondary_write(BMA250_MODE_CTRL_REG, data); - bma_static_property.mode = Mode; - return res; + if (res) + return res; + bma_static_property.mode = mode; + + return 0; } -static int suspend_slave_bma250(struct inv_gyro_state_s *st) + +static int suspend_slave_bma250(struct inv_mpu_iio_s *st) { int result; if (bma_static_property.mode == BMA250_MODE_SUSPEND) return 0; /*set to bypass mode */ - result = set_3050_bypass(st, 1); + result = set_3050_bypass(st, true); if (result) return result; bma250_set_mode(st, BMA250_MODE_SUSPEND); /* no need to recover to non-bypass mode because we need it now */ - return result; + + return 0; } -static int resume_slave_bma250(struct inv_gyro_state_s *st) + +static int resume_slave_bma250(struct inv_mpu_iio_s *st) { int result; if (bma_static_property.mode == BMA250_MODE_NORMAL) return 0; /*set to bypass mode */ - result = set_3050_bypass(st, 1); + result = set_3050_bypass(st, true); if (result) return result; - bma250_set_mode(st, BMA250_MODE_NORMAL); + result = bma250_set_mode(st, BMA250_MODE_NORMAL); /* recover bypass mode */ - result = set_3050_bypass(st, 0); - return result; + result |= set_3050_bypass(st, false); + + return result ? (-EINVAL) : 0; } -static int combine_data_slave_bma250(unsigned char *in, short *out) + +static int combine_data_slave_bma250(u8 *in, short *out) { out[0] = le16_to_cpup((__le16 *)(&in[0])); out[1] = le16_to_cpup((__le16 *)(&in[2])); out[2] = le16_to_cpup((__le16 *)(&in[4])); + return 0; } -static int get_mode_slave_bma250(struct inv_gyro_state_s *st) + +static int get_mode_slave_bma250(void) { - if (bma_static_property.mode == BMA250_MODE_SUSPEND) - return 0; - else if (bma_static_property.mode == BMA250_MODE_NORMAL) - return 1; - return -1; -}; + switch (bma_static_property.mode) { + case BMA250_MODE_SUSPEND: + return INV_MODE_SUSPEND; + case BMA250_MODE_NORMAL: + return INV_MODE_NORMAL; + default: + return -EINVAL; + } +} + /** * set_lpf_bma250() - set lpf value */ -static int set_lpf_bma250(struct inv_gyro_state_s *st, int rate) +static int set_lpf_bma250(struct inv_mpu_iio_s *st, int rate) { const short hz[] = {1000, 500, 250, 125, 62, 31, 15, 7}; const int d[] = {7, 6, 5, 4, 3, 2, 1, 0}; int i, h, data, result; h = (rate >> 1); i = 0; - while ((h < hz[i]) && (i < ARRAY_SIZE(hz))) + while ((h < hz[i]) && (i < ARRAY_SIZE(hz) - 1)) i++; - if (i == ARRAY_SIZE(hz)) - i -= 1; data = d[i]; - result = set_3050_bypass(st, 1); + result = set_3050_bypass(st, true); if (result) return result; - result = bma250_set_bandwidth(st, (unsigned char) data); - result |= set_3050_bypass(st, 0); + result = bma250_set_bandwidth(st, (u8) data); + result |= set_3050_bypass(st, false); - return result; + return result ? (-EINVAL) : 0; } /** * set_fs_bma250() - set range value */ -static int set_fs_bma250(struct inv_gyro_state_s *st, int fs) +static int set_fs_bma250(struct inv_mpu_iio_s *st, int fs) { int result; - result = set_3050_bypass(st, 1); + result = set_3050_bypass(st, true); if (result) return result; - result = bma250_set_range(st, (unsigned char) fs); - result |= set_3050_bypass(st, 0); - if (result) - return -EINVAL; - return result; + result = bma250_set_range(st, (u8) fs); + result |= set_3050_bypass(st, false); + + return result ? (-EINVAL) : 0; } static struct inv_mpu_slave slave_bma250 = { @@ -310,9 +318,10 @@ static struct inv_mpu_slave slave_bma250 = { .set_fs = set_fs_bma250 }; -int inv_register_bma250_slave(struct inv_gyro_state_s *st) +int inv_register_mpu3050_slave(struct inv_mpu_iio_s *st) { st->mpu_slave = &slave_bma250; + return 0; } /** diff --git a/drivers/staging/iio/inv_test/Kconfig b/drivers/staging/iio/inv_test/Kconfig new file mode 100644 index 00000000000..e96a514b28a --- /dev/null +++ b/drivers/staging/iio/inv_test/Kconfig @@ -0,0 +1,11 @@ +# +# Kconfig for Invensense IIO testing hooks +# + +config INV_TESTING + boolean "Invensense IIO testing hooks" + depends on INV_MPU_IIO || INV_AMI306_IIO || INV_YAS530 || INV_HUB_IIO + default n + help + This flag enables display of additional testing information from the + Invensense IIO drivers diff --git a/drivers/staging/iio/inv_test/Makefile b/drivers/staging/iio/inv_test/Makefile new file mode 100644 index 00000000000..4f0edd3de90 --- /dev/null +++ b/drivers/staging/iio/inv_test/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for Invensense IIO testing hooks. +# + +obj-$(CONFIG_INV_TESTING) += inv_counters.o + diff --git a/drivers/staging/iio/inv_test/inv_counters.c b/drivers/staging/iio/inv_test/inv_counters.c new file mode 100644 index 00000000000..3b26ca97284 --- /dev/null +++ b/drivers/staging/iio/inv_test/inv_counters.c @@ -0,0 +1,154 @@ +/* + * @file inv_counters.c + * @brief Exports i2c read write counts through sysfs + * + * @version 0.1 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "inv_counters.h" + +static int mpu_irq; +static int accel_irq; +static int compass_irq; + +struct inv_counters { + uint32_t i2c_tempreads; + uint32_t i2c_mpureads; + uint32_t i2c_mpuwrites; + uint32_t i2c_accelreads; + uint32_t i2c_accelwrites; + uint32_t i2c_compassreads; + uint32_t i2c_compasswrites; + uint32_t i2c_compassirq; + uint32_t i2c_accelirq; +}; + +static struct inv_counters Counters; + +static ssize_t i2c_counters_show(struct class *cls, + struct class_attribute *attr, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, + "%ld.%03ld %u %u %u %u %u %u %u %u %u %u\n", + jiffies / HZ, ((jiffies % HZ) * (1024 / HZ)), + mpu_irq ? kstat_irqs(mpu_irq) : 0, + Counters.i2c_tempreads, + Counters.i2c_mpureads, Counters.i2c_mpuwrites, + accel_irq ? kstat_irqs(accel_irq) : Counters.i2c_accelirq, + Counters.i2c_accelreads, Counters.i2c_accelwrites, + compass_irq ? kstat_irqs(compass_irq) : Counters.i2c_compassirq, + Counters.i2c_compassreads, Counters.i2c_compasswrites); +} + +void inv_iio_counters_set_i2cirq(enum irqtype type, int irq) +{ + switch (type) { + case IRQ_MPU: + mpu_irq = irq; + break; + case IRQ_ACCEL: + accel_irq = irq; + break; + case IRQ_COMPASS: + compass_irq = irq; + break; + } +} +EXPORT_SYMBOL_GPL(inv_iio_counters_set_i2cirq); + +void inv_iio_counters_tempread(int count) +{ + Counters.i2c_tempreads += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_tempread); + +void inv_iio_counters_mpuread(int count) +{ + Counters.i2c_mpureads += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_mpuread); + +void inv_iio_counters_mpuwrite(int count) +{ + Counters.i2c_mpuwrites += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_mpuwrite); + +void inv_iio_counters_accelread(int count) +{ + Counters.i2c_accelreads += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_accelread); + +void inv_iio_counters_accelwrite(int count) +{ + Counters.i2c_accelwrites += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_accelwrite); + +void inv_iio_counters_compassread(int count) +{ + Counters.i2c_compassreads += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_compassread); + +void inv_iio_counters_compasswrite(int count) +{ + Counters.i2c_compasswrites += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_compasswrite); + +void inv_iio_counters_compassirq(void) +{ + Counters.i2c_compassirq++; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_compassirq); + +void inv_iio_counters_accelirq(void) +{ + Counters.i2c_accelirq++; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_accelirq); + +static struct class_attribute inv_class_attr[] = { + __ATTR(i2c_counter, S_IRUGO, i2c_counters_show, NULL), + __ATTR_NULL +}; + +static struct class inv_counters_class = { + .name = "inv_counters", + .owner = THIS_MODULE, + .class_attrs = (struct class_attribute *) &inv_class_attr +}; + +static int __init inv_counters_init(void) +{ + memset(&Counters, 0, sizeof(Counters)); + + return class_register(&inv_counters_class); +} + +static void __exit inv_counters_exit(void) +{ + class_unregister(&inv_counters_class); +} + +module_init(inv_counters_init); +module_exit(inv_counters_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("GESL"); +MODULE_DESCRIPTION("inv_counters debug support"); + diff --git a/drivers/staging/iio/inv_test/inv_counters.h b/drivers/staging/iio/inv_test/inv_counters.h new file mode 100644 index 00000000000..d60dac9d97b --- /dev/null +++ b/drivers/staging/iio/inv_test/inv_counters.h @@ -0,0 +1,72 @@ +/* + * @file inv_counters.h + * @brief Debug file to keep track of various counters for the InvenSense + * sensor drivers. + * + * @version 0.1 + */ + +#ifndef _INV_COUNTERS_H_ +#define _INV_COUNTERS_H_ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_INV_TESTING + +enum irqtype { + IRQ_MPU, + IRQ_ACCEL, + IRQ_COMPASS +}; + +#define INV_I2C_INC_MPUREAD(x) inv_iio_counters_mpuread(x) +#define INV_I2C_INC_MPUWRITE(x) inv_iio_counters_mpuwrite(x) +#define INV_I2C_INC_ACCELREAD(x) inv_iio_counters_accelread(x) +#define INV_I2C_INC_ACCELWRITE(x) inv_iio_counters_accelwrite(x) +#define INV_I2C_INC_COMPASSREAD(x) inv_iio_counters_compassread(x) +#define INV_I2C_INC_COMPASSWRITE(x) inv_iio_counters_compasswrite(x) + +#define INV_I2C_INC_TEMPREAD(x) inv_iio_counters_tempread(x) + +#define INV_I2C_SETIRQ(type, irq) inv_iio_counters_set_i2cirq(type, irq) +#define INV_I2C_INC_COMPASSIRQ() inv_iio_counters_compassirq() +#define INV_I2C_INC_ACCELIRQ() inv_iio_counters_accelirq() + +void inv_iio_counters_mpuread(int count); +void inv_iio_counters_mpuwrite(int count); +void inv_iio_counters_accelread(int count); +void inv_iio_counters_accelwrite(int count); +void inv_iio_counters_compassread(int count); +void inv_iio_counters_compasswrite(int count); + +void inv_iio_counters_tempread(int count); + +void inv_iio_counters_set_i2cirq(enum irqtype type, int irq); +void inv_iio_counters_compassirq(void); +void inv_iio_counters_accelirq(void); + +#else + +#define INV_I2C_INC_MPUREAD(x) +#define INV_I2C_INC_MPUWRITE(x) +#define INV_I2C_INC_ACCELREAD(x) +#define INV_I2C_INC_ACCELWRITE(x) +#define INV_I2C_INC_COMPASSREAD(x) +#define INV_I2C_INC_COMPASSWRITE(x) + +#define INV_I2C_INC_TEMPREAD(x) + +#define INV_I2C_SETIRQ(type, irq) +#define INV_I2C_INC_COMPASSIRQ() +#define INV_I2C_INC_ACCELIRQ() + +#endif /* CONFIG_INV_TESTING */ + +#endif /* _INV_COUNTERS_H_ */ + diff --git a/drivers/staging/iio/magnetometer/Kconfig b/drivers/staging/iio/magnetometer/Kconfig index fe9ef564dd5..3d96ca31e9f 100644 --- a/drivers/staging/iio/magnetometer/Kconfig +++ b/drivers/staging/iio/magnetometer/Kconfig @@ -24,14 +24,5 @@ config SENSORS_HMC5843 To compile this driver as a module, choose M here: the module will be called hmc5843 -config AMI306 - tristate "invensense implementation of ami306" - depends on I2C && IIO_KFIFO_BUF && SYSFS && IIO && IIO_TRIGGER - default n - help - This driver supports the ami306. It is Invensense implementation - of ami306 compass device. - This driver can be built as a module. The module will be called - inv-ami306-iio. - +source "drivers/staging/iio/magnetometer/inv_compass/Kconfig" endmenu diff --git a/drivers/staging/iio/magnetometer/Makefile b/drivers/staging/iio/magnetometer/Makefile index 7330159a0da..71c4bd500c3 100644 --- a/drivers/staging/iio/magnetometer/Makefile +++ b/drivers/staging/iio/magnetometer/Makefile @@ -4,9 +4,5 @@ obj-$(CONFIG_SENSORS_AK8975) += ak8975.o obj-$(CONFIG_SENSORS_HMC5843) += hmc5843.o -obj-$(CONFIG_AMI306) += inv-ami306.o - -inv-ami306-objs := inv_ami306_core.o -inv-ami306-objs += inv_ami306_ring.o -inv-ami306-objs += inv_ami306_trigger.o +obj-$(CONFIG_INV_AMI306_IIO) += inv_compass/ diff --git a/drivers/staging/iio/magnetometer/inv_compass/Kconfig b/drivers/staging/iio/magnetometer/inv_compass/Kconfig new file mode 100644 index 00000000000..34e001ef829 --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/Kconfig @@ -0,0 +1,25 @@ +# +# Kconfig for Invensense IIO compass drivers of 3rd party compass devices. +# + +# Yamaha YAS530/YAS532/YAS533 +config INV_YAS53X_IIO + tristate "Invensense IIO driver for Yamaha YAS530/YAS532/YAS533 compass" + depends on I2C && SYSFS && IIO && IIO_KFIFO_BUF + default n + help + This driver supports the Yamaha YAS530/YAS532/YAS533. It is the Invensense + implementation of YAS53x series compass devices. + This driver can be built as a module. The module will be called + inv_yas53x_iio. + +# Aichi AMI306 +config INV_AMI306_IIO + tristate "Invensense IIO driver for Aichi AMI306 compass" + depends on I2C && SYSFS && IIO && IIO_KFIFO_BUF + default n + help + This driver supports the Aichi AMI306 compass. It is the Invensense + IIO implementation for the AMI306 compass device. + This driver can be built as a module. The module will be called + inv-ami306-iio. diff --git a/drivers/staging/iio/magnetometer/inv_compass/Makefile b/drivers/staging/iio/magnetometer/inv_compass/Makefile new file mode 100644 index 00000000000..adc7dd93e1d --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/Makefile @@ -0,0 +1,25 @@ +# +# Makefile for Invensense IIO compass drivers of 3rd party compass devices. +# + +# Yamaha YAS530/YAS532/YAS533 +obj-$(CONFIG_INV_YAS53X_IIO) += inv_yas53x.o + +inv_yas53x-objs := inv_yas53x_core.o +inv_yas53x-objs += inv_yas53x_ring.o +inv_yas53x-objs += inv_yas53x_trigger.o + +CFLAGS_inv_yas53x_core.o += -Idrivers/staging/iio +CFLAGS_inv_yas53x_ring.o += -Idrivers/staging/iio +CFLAGS_inv_yas53x_trigger.o += -Idrivers/staging/iio + +# Aichi AMI306 +obj-$(CONFIG_INV_AMI306_IIO) += inv-ami306-iio.o + +inv-ami306-iio-objs := inv_ami306_core.o +inv-ami306-iio-objs += inv_ami306_ring.o +inv-ami306-iio-objs += inv_ami306_trigger.o + +CFLAGS_inv_ami306_core.o += -Idrivers/staging/iio +CFLAGS_inv_ami306_ring.o += -Idrivers/staging/iio +CFLAGS_inv_ami306_trigger.o += -Idrivers/staging/iio diff --git a/drivers/staging/iio/magnetometer/inv_compass/README b/drivers/staging/iio/magnetometer/inv_compass/README new file mode 100644 index 00000000000..54f2bb8ded2 --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/README @@ -0,0 +1,176 @@ +Kernel driver +Author: Invensense + +Table of Contents: +================== +- Description +- Integrating the Driver in the Linux Kernel +- Board and Platform Data + > Platform Data +- Board File Modifications for compass + > AMI306 + > YAS530/532/533 +- IIO Subsystem + > Communicating with the Driver in Userspace +- Streaming Data to an Userspace Application +- Test Applications + > Running Test Applications with AMI306 or YAS53x + +Description +=========== +This document describes how to install the Invensense device driver for AMI306 +and YAS53x series compass chip into a Linux kernel. The Invensense driver +currently supports the following sensors: +- AMI306 +- YAS530 +- YAS532 +- YAS533 + +Please refer to the appropriate product specification +document for further information regarding the slave address. + +The following files are included in this package: +- Kconfig +- Makefile +- inv_ami306_core.c +- inv_ami306_ring.c +- inv_ami306_trigger.c +- inv_ami306_iio.h +- inv_yas53x_core.c +- inv_yas53x_ring.c +- inv_yas53x_trigger.c +- inv_yas53x_iio.h + +Integrating the Driver in the Linux Kernel +========================================== +Please add the files as follows: +- Add all above files to drivers/staging/iio/magnetometer/inv_compass +(another directory is acceptable, but this is the recommended destination) + +In order to see the driver in menuconfig when building the kernel, please +make modifications as shown below: + + modify "drivers/staging/iio/magnetometer/Kconfig" with: + >> source "drivers/staging/iio/magnetometer/inv_compass/Kconfig" + + modify "drivers/staging/iio/magnetometer/Makefile" with: + >> obj-y += inv_compass/ + + +Board and Platform Data +======================= +In order to recognize the Invensense device on the I2C bus, the board file must +be modified. +The i2c_board_info instance must be defined as shown below. + +Platform Data +------------- +The platform data (orientation matrix and secondary bus configurations) must be +modified as show below, according to your particular platform configuration. + +Board File Modifications for Secondary I2C Configuration +======================================================== +For the Panda Board, the board file can be found at +arch/arm/mach-omap2/board-omap4panda.c. +Please modify the pertinent baord file in your system according to the examples +shown below: + +AMI306 +------------------------------------------------- +static struct mpu_platform_data compass_data = { + .orientation = { 0, 0, 1, + 0, 1, 0, + 1, 0, 0 }, +}; + +static struct i2c_board_info __initdata chip_board_info[] = { + { + I2C_BOARD_INFO("ami306", 0x0E), + .platform_data = &compass_data, + }, +}; + +YAS53x(Use YAS532 as an example) +------------------------------------------------- +static struct mpu_platform_data compass_data = { + .orientation = { 0, -1, 0, + 1, 0, 0, + 0, 0, 1 }, +}; + +static struct i2c_board_info __initdata compass_board_info[] = { + { + I2C_BOARD_INFO("yas532", 0x2E), + .platform_data = &compass_data, + }, +}; + +IIO subsystem +============= +A successful installation will create the following two new directories under +/sys/bus/iio/devices: + - iio:device0 + - trigger0 + +Also, a new file, "iio:device0", will be created in the /dev/ diretory. +(if you have more than one IIO device, the file will be named "iio:deviceX", +where X is a number) + + +Communicating with the Driver in Userspace +------------------------------------------ +The driver generates several files in sysfs upon installation. +These files are used to communicate with the driver. The files can be found +at /sys/bus/iio/devices/iio:device0 (or ../iio:deviceX as shown above). + +A brief description of the pertinent files for each Invensense device is shown +below: + +AMI306 +-------- +compass_matrix (read-only) +--show the orientation matrix obtained from the board file. + +sampling_frequency(read and write) +--show and change the sampling rate of the sensor. + +YAS53x +--------------------- +YAS53x has all the attributes AMI306 has. It has one more additional attribute: + +overunderflow(read-only) +--value 1 shows an overflow or underflow happens. Need to write into it to make + it zero. + +Streaming Data to an Userspace Application +========================================== +When streaming data to an userspace application, we recommend that you access +compass data via /dev/iio:device0. + +Please follow the steps below to read data at a constant rate from the driver: + +1. Write the desired output rate to sampling_frequency. +2. Write 1 to enable to turn on the event. +3. Read /dev/iio:device0 to get a string of gyro/accel/compass data. +4. Parse this string to obtain each compass element. + +Test Applications +================= +A test application is located under software/simple_apps/mpu_iio. +This application is stand-alone in that it cannot be run concurrently with other +entities trying to access the device node(s) or sysfs entries; in particular, +the + +Running Test Applications +--------------------------------------------------------- +To run test applications with AMI306 or YAS53x devices, +please use the following commands: + +1. for ami306: + mpu_iio -n ami306 -c 10 -l 3 + +2. for yas532: + mpu_iio -n yas532 -c 10 -l 3 + +Please use mpu_iio.c and iio_utils.h as example code for your development +purposes. diff --git a/drivers/staging/iio/magnetometer/inv_ami306_core.c b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_core.c similarity index 85% rename from drivers/staging/iio/magnetometer/inv_ami306_core.c rename to drivers/staging/iio/magnetometer/inv_compass/inv_ami306_core.c index c14a9033983..c003a0ea9e5 100644 --- a/drivers/staging/iio/magnetometer/inv_ami306_core.c +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_core.c @@ -36,19 +36,36 @@ #include #include #include + #include "inv_ami306_iio.h" -#include "../sysfs.h" +#include "../../sysfs.h" +#include "../../inv_test/inv_counters.h" static unsigned char late_initialize = true; +s32 i2c_write(const struct i2c_client *client, + u8 command, u8 length, const u8 *values) +{ + INV_I2C_INC_COMPASSWRITE(3); + return i2c_smbus_write_i2c_block_data(client, command, length, values); +} + +s32 i2c_read(const struct i2c_client *client, + u8 command, u8 length, u8 *values) +{ + INV_I2C_INC_COMPASSWRITE(3); + INV_I2C_INC_COMPASSREAD(length); + return i2c_smbus_read_i2c_block_data(client, command, length, values); +} + static int ami306_read_param(struct inv_ami306_state_s *st) { int result = 0; unsigned char regs[AMI_PARAM_LEN]; struct ami_sensor_parametor *param = &st->param; - result = i2c_smbus_read_i2c_block_data(st->i2c, REG_AMI_SENX, - AMI_PARAM_LEN, regs); + result = i2c_read(st->i2c, REG_AMI_SENX, + AMI_PARAM_LEN, regs); if (result < 0) return result; @@ -82,13 +99,13 @@ static int ami306_write_offset(const struct i2c_client *client, unsigned char dat[3]; dat[0] = (0x7f & fine[0]); dat[1] = 0; - result = i2c_smbus_write_i2c_block_data(client, REG_AMI_OFFX, 2, dat); + result = i2c_write(client, REG_AMI_OFFX, 2, dat); dat[0] = (0x7f & fine[1]); dat[1] = 0; - result = i2c_smbus_write_i2c_block_data(client, REG_AMI_OFFY, 2, dat); + result = i2c_write(client, REG_AMI_OFFY, 2, dat); dat[0] = (0x7f & fine[2]); dat[1] = 0; - result = i2c_smbus_write_i2c_block_data(client, REG_AMI_OFFZ, 2, dat); + result = i2c_write(client, REG_AMI_OFFZ, 2, dat); return result; } @@ -101,8 +118,7 @@ static int ami306_wait_data_ready(struct inv_ami306_state_s *st, for (; 0 < times; --times) { udelay(usecs); - result = i2c_smbus_read_i2c_block_data(st->i2c, - REG_AMI_STA1, 1, &buf); + result = i2c_read(st->i2c, REG_AMI_STA1, 1, &buf); if (result < 0) return INV_ERROR_COMPASS_DATA_NOT_READY; if (buf & AMI_STA1_DRDY_BIT) @@ -110,6 +126,7 @@ static int ami306_wait_data_ready(struct inv_ami306_state_s *st, else if (buf & AMI_STA1_DOR_BIT) return INV_ERROR_COMPASS_DATA_OVERFLOW; } + return INV_ERROR_COMPASS_DATA_NOT_READY; } int ami306_read_raw_data(struct inv_ami306_state_s *st, @@ -117,13 +134,13 @@ int ami306_read_raw_data(struct inv_ami306_state_s *st, { int result; unsigned char buf[6]; - result = i2c_smbus_read_i2c_block_data(st->i2c, REG_AMI_DATAX, - sizeof(buf), buf); + result = i2c_read(st->i2c, REG_AMI_DATAX, sizeof(buf), buf); if (result < 0) return result; dat[0] = le16_to_cpup((__le16 *)(&buf[0])); dat[1] = le16_to_cpup((__le16 *)(&buf[2])); dat[2] = le16_to_cpup((__le16 *)(&buf[4])); + return 0; } @@ -136,19 +153,19 @@ static int ami306_force_measurement(struct inv_ami306_state_s *st, int status; char buf; buf = AMI_CTRL3_FORCE_BIT; - result = i2c_smbus_write_i2c_block_data(st->i2c, - REG_AMI_CTRL3, 1, &buf); + result = i2c_write(st->i2c, REG_AMI_CTRL3, 1, &buf); if (result < 0) return result; - result = ami306_wait_data_ready(st, - AMI_DRDYWAIT, AMI_WAIT_DATAREADY_RETRY); + result = ami306_wait_data_ready(st, + AMI_DRDYWAIT, AMI_WAIT_DATAREADY_RETRY); if (result && result != INV_ERROR_COMPASS_DATA_OVERFLOW) return result; /* READ DATA X,Y,Z */ status = ami306_read_raw_data(st, ver); if (status) return status; + return result; } @@ -163,15 +180,13 @@ static int ami306_initial_b0_adjust(struct inv_ami306_state_s *st) unsigned char buf[3]; buf[0] = AMI_CTRL2_DREN; - result = i2c_smbus_write_i2c_block_data(st->i2c, REG_AMI_CTRL2, - 1, buf); + result = i2c_write(st->i2c, REG_AMI_CTRL2, 1, buf); if (result) return result; buf[0] = AMI_CTRL4_HS & 0xFF; buf[1] = (AMI_CTRL4_HS >> 8) & 0xFF; - result = i2c_smbus_write_i2c_block_data(st->i2c, REG_AMI_CTRL4, - 2, buf); + result = i2c_write(st->i2c, REG_AMI_CTRL4, 2, buf); if (result < 0) return result; @@ -199,8 +214,7 @@ static int ami306_initial_b0_adjust(struct inv_ami306_state_s *st) /* Software Reset */ buf[0] = AMI_CTRL3_SRST_BIT; - result = i2c_smbus_write_i2c_block_data(st->i2c, REG_AMI_CTRL3, 1, - buf); + result = i2c_write(st->i2c, REG_AMI_CTRL3, 1, buf); if (result < 0) return result; else @@ -214,27 +228,25 @@ static int ami306_start_sensor(struct inv_ami306_state_s *st) /* Step 1 */ buf[0] = (AMI_CTRL1_PC1 | AMI_CTRL1_FS1_FORCE); - result = i2c_smbus_write_i2c_block_data(st->i2c, REG_AMI_CTRL1, 1, - buf); + result = i2c_write(st->i2c, REG_AMI_CTRL1, 1, buf); if (result < 0) return result; /* Step 2 */ buf[0] = AMI_CTRL2_DREN; - result = i2c_smbus_write_i2c_block_data(st->i2c, REG_AMI_CTRL2, 1, - buf); + result = i2c_write(st->i2c, REG_AMI_CTRL2, 1, buf); if (result < 0) return result; /* Step 3 */ buf[0] = (AMI_CTRL4_HS & 0xFF); buf[1] = (AMI_CTRL4_HS >> 8) & 0xFF; - result = i2c_smbus_write_i2c_block_data(st->i2c, REG_AMI_CTRL4, 2, - buf); + result = i2c_write(st->i2c, REG_AMI_CTRL4, 2, buf); if (result < 0) return result; /* Step 4 */ result = ami306_write_offset(st->i2c, st->fine); + return result; } @@ -243,32 +255,30 @@ int set_ami306_enable(struct iio_dev *indio_dev, int state) struct inv_ami306_state_s *st = iio_priv(indio_dev); int result; char buf; - if (state) { - buf = (AMI_CTRL1_PC1 | AMI_CTRL1_FS1_FORCE); - result = i2c_smbus_write_i2c_block_data(st->i2c, - REG_AMI_CTRL1, 1, &buf); - if (result < 0) - return result; - result = ami306_read_param(st); - if (result) - return result; - if (late_initialize) { - result = ami306_initial_b0_adjust(st); - if (result) - return result; - late_initialize = false; - } - result = ami306_start_sensor(st); - if (result) - return result; - buf = AMI_CTRL3_FORCE_BIT; - st->timestamp = iio_get_time_ns(); - result = i2c_smbus_write_i2c_block_data(st->i2c, - REG_AMI_CTRL3, 1, &buf); + buf = (AMI_CTRL1_PC1 | AMI_CTRL1_FS1_FORCE); + result = i2c_write(st->i2c, REG_AMI_CTRL1, 1, &buf); + if (result < 0) + return result; + + result = ami306_read_param(st); + if (result) + return result; + if (late_initialize) { + result = ami306_initial_b0_adjust(st); if (result) return result; + late_initialize = false; } + result = ami306_start_sensor(st); + if (result) + return result; + buf = AMI_CTRL3_FORCE_BIT; + st->timestamp = iio_get_time_ns(); + result = i2c_write(st->i2c, REG_AMI_CTRL3, 1, &buf); + if (result) + return result; + return 0; } @@ -281,8 +291,11 @@ static int ami306_read_raw(struct iio_dev *indio_dev, int *val2, long mask) { struct inv_ami306_state_s *st = iio_priv(indio_dev); + switch (mask) { case 0: + if (!(iio_buffer_enabled(indio_dev))) + return -EINVAL; if (chan->type == IIO_MAGN) { *val = st->compass_data[chan->channel2 - IIO_MOD_X]; return IIO_VAL_INT; @@ -300,25 +313,6 @@ static int ami306_read_raw(struct iio_dev *indio_dev, } } -/** - * ami306_write_raw() - write raw method. - */ -static int ami306_write_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, - int val, - int val2, - long mask) { - int result; - switch (mask) { - case IIO_CHAN_INFO_SCALE: - result = -EINVAL; - return result; - default: - return -EINVAL; - } - return 0; -} - /** * inv_compass_matrix_show() - show orientation matrix */ @@ -389,11 +383,8 @@ static ssize_t compass_cali_test(struct device *dev, /* Check if raw data match the gain from calibration file */ for (ii = 0; ii < 3; ii++) { - val = (short)(st->data_chk.ori[ii]); - - if (st->data_chk.gain[ii] > 0) - val = (short)(st->data_chk.ori[ii] * - 100 / st->data_chk.gain[ii]); + val = (short)(st->data_chk.ori[ii] * + st->data_chk.gain[ii] / 100); if (val == st->data_chk.post[ii]) bufcnt += sprintf(tmpbuf, @@ -401,7 +392,7 @@ static ssize_t compass_cali_test(struct device *dev, ii); else bufcnt += sprintf(tmpbuf, - "[axis-%d] Compensation FAIL. %d != %d\n", + "[axis-%d] Compensation FAIL. %d != %d", ii, val, st->data_chk.post[ii]); strncat(buf, tmpbuf, strlen(tmpbuf)); @@ -428,9 +419,17 @@ static void ami306_work_func(struct work_struct *work) struct iio_dev *indio_dev = iio_priv_to_dev(st); unsigned long delay = msecs_to_jiffies(st->delay); - inv_read_ami306_fifo(indio_dev); + mutex_lock(&indio_dev->mlock); + if (!(iio_buffer_enabled(indio_dev))) + goto error_ret; + st->timestamp = iio_get_time_ns(); schedule_delayed_work(&st->work, delay); + inv_read_ami306_fifo(indio_dev); + INV_I2C_INC_COMPASSIRQ(); + +error_ret: + mutex_unlock(&indio_dev->mlock); } static const struct iio_chan_spec compass_channels[] = { @@ -478,7 +477,6 @@ static const struct attribute_group inv_attribute_group = { static const struct iio_info ami306_info = { .driver_module = THIS_MODULE, .read_raw = &ami306_read_raw, - .write_raw = &ami306_write_raw, .attrs = &inv_attribute_group, }; @@ -506,7 +504,6 @@ static int inv_ami306_probe(struct i2c_client *client, } st = iio_priv(indio_dev); st->i2c = client; - st->sl_handle = client->adapter; st->plat_data = *(struct mpu_platform_data *)dev_get_platdata(&client->dev); st->delay = 10; @@ -519,7 +516,7 @@ static int inv_ami306_probe(struct i2c_client *client, /* Make state variables available to all _show and _store functions. */ i2c_set_clientdata(client, indio_dev); - result = i2c_smbus_read_i2c_block_data(st->i2c, REG_AMI_WIA, 1, &data); + result = i2c_read(st->i2c, REG_AMI_WIA, 1, &data); if (result < 0) goto out_free; if (data != DATA_WIA) diff --git a/drivers/staging/iio/magnetometer/inv_ami306_iio.h b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_iio.h similarity index 88% rename from drivers/staging/iio/magnetometer/inv_ami306_iio.h rename to drivers/staging/iio/magnetometer/inv_compass/inv_ami306_iio.h index 41c355eed97..b4e03b43d2a 100644 --- a/drivers/staging/iio/magnetometer/inv_ami306_iio.h +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_iio.h @@ -17,8 +17,9 @@ * @brief Hardware drivers. * * @{ - * @file inv_gyro.h - * @brief Struct definitions for the Invensense gyro driver. + * @file inv_ami306_iio.h + * @brief Struct definitions for the Invensense implementation + * of ami306 driver. */ #ifndef _INV_GYRO_H_ @@ -30,9 +31,11 @@ #include #include #include -#include "../iio.h" -#include "../buffer.h" -#include "../trigger.h" + +#include "../../iio.h" +#include "../../buffer.h" +#include "../../trigger.h" + /** axis sensitivity(gain) calibration parameter information */ struct ami_vector3d { signed short x; /**< X-axis */ @@ -79,21 +82,24 @@ struct cali_data_check { /** * struct inv_ami306_state_s - Driver state variables. + * @plat_data: board file platform data. * @i2c: i2c client handle. - * @sl_handle: Handle to I2C port. + * @trig: not used. for compatibility. + * @param: ami specific sensor data. + * @work: work data structure. + * @delay: delay between each scheduled work. + * @fine: fine tunign parameters. + * @compass_data: compass data store. + * @timestamp: time stamp. */ struct inv_ami306_state_s { struct mpu_platform_data plat_data; struct i2c_client *i2c; - struct inv_chip_chan_info *chan_info; struct iio_trigger *trig; struct ami_sensor_parametor param; - unsigned char i2c_addr; - void *sl_handle; struct delayed_work work; int delay; - char enable; - char fine[3]; + s8 fine[3]; short compass_data[3]; s64 timestamp; struct cali_data_check data_chk; diff --git a/drivers/staging/iio/magnetometer/inv_ami306_ring.c b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_ring.c similarity index 82% rename from drivers/staging/iio/magnetometer/inv_ami306_ring.c rename to drivers/staging/iio/magnetometer/inv_compass/inv_ami306_ring.c index 9cff293f86a..d304c49c7ab 100644 --- a/drivers/staging/iio/magnetometer/inv_ami306_ring.c +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_ring.c @@ -17,9 +17,9 @@ * @brief Hardware drivers. * * @{ - * @file inv_gyro_misc.c - * @brief A sysfs device driver for Invensense gyroscopes. - * @details This file is part of inv_gyro driver code + * @file inv_ami306_ring.c + * @brief Invensense implementation for AMI306 + * @details This driver currently works for the AMI306 */ #include @@ -35,11 +35,13 @@ #include #include #include + +#include "../../iio.h" +#include "../../kfifo_buf.h" +#include "../../trigger_consumer.h" +#include "../../sysfs.h" + #include "inv_ami306_iio.h" -#include "../iio.h" -#include "../kfifo_buf.h" -#include "../trigger_consumer.h" -#include "../sysfs.h" #define AMI30X_CALIBRATION_PATH "/data/sensors/AMI304_Config.ini" #define AMI306_CALIBRATION_PATH "/data/sensors/AMI306_Config.ini" @@ -59,7 +61,7 @@ static int access_cali_file(int *gain, int target) int data[23]; int ii; - oldfs=get_fs(); + oldfs = get_fs(); set_fs(get_ds()); memset(buf, 0, sizeof(u8)*256); @@ -92,20 +94,19 @@ static int access_cali_file(int *gain, int target) if ((data[19] > 150) || (data[19] < 50) || (data[20] > 150) || (data[20] < 50) || (data[21] > 150) || (data[21] < 50)) { - for(ii = 0; ii < 3; ii++) + for (ii = 0; ii < 3; ii++) gain[ii] = 100; - }else{ - for(ii = 0; ii < 3; ii++) + } else { + for (ii = 0; ii < 3; ii++) gain[ii] = data[ii + 19]; } pr_info("gain: %d %d %d\n", gain[0], gain[1], gain[2]); return 0; - } - else - { - pr_info("Compass compensation: No target File. (%d)\n", target); + } else { + pr_info("Compass compensation: No target File. (%d)\n", + target); set_fs(oldfs); return -1; } @@ -114,18 +115,6 @@ static int access_cali_file(int *gain, int target) return -1; } -/** - * inv_irq_handler() - Cache a timestamp at each data ready interrupt. - */ -static irqreturn_t inv_ami_irq_handler(int irq, void *p) -{ - struct iio_poll_func *pf = p; - struct iio_dev *indio_dev = pf->indio_dev; - struct inv_ami306_state_s *st = iio_priv(indio_dev); - st->timestamp = iio_get_time_ns(); - - return IRQ_WAKE_THREAD; -} static int put_scan_to_buf(struct iio_dev *indio_dev, unsigned char *d, short *s, int scan_index) { struct iio_buffer *ring = indio_dev->buffer; @@ -167,7 +156,8 @@ int inv_read_ami306_fifo(struct iio_dev *indio_dev) if (!st->data_chk.load_cali) { for (ii = 0; ii < AMICaliMax; ii++) { - result = access_cali_file(st->data_chk.gain, ii); + result = + access_cali_file(st->data_chk.gain, ii); if (!result) { st->data_chk.fexist = 0; break; @@ -209,18 +199,34 @@ void inv_ami306_unconfigure_ring(struct iio_dev *indio_dev) static int inv_ami306_postenable(struct iio_dev *indio_dev) { struct inv_ami306_state_s *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; int result; + /* when all the outputs are disabled, even though buffer/enable is on, + do nothing */ + if (!(iio_scan_mask_query(indio_dev, ring, INV_AMI306_SCAN_MAGN_X) || + iio_scan_mask_query(indio_dev, ring, INV_AMI306_SCAN_MAGN_Y) || + iio_scan_mask_query(indio_dev, ring, INV_AMI306_SCAN_MAGN_Z))) + return 0; + result = set_ami306_enable(indio_dev, true); - schedule_delayed_work(&st->work, - msecs_to_jiffies(st->delay)); + if (result) + return result; + schedule_delayed_work(&st->work, msecs_to_jiffies(st->delay)); + return 0; } static int inv_ami306_predisable(struct iio_dev *indio_dev) { + struct iio_buffer *ring = indio_dev->buffer; struct inv_ami306_state_s *st = iio_priv(indio_dev); + cancel_delayed_work_sync(&st->work); + clear_bit(INV_AMI306_SCAN_MAGN_X, ring->scan_mask); + clear_bit(INV_AMI306_SCAN_MAGN_Y, ring->scan_mask); + clear_bit(INV_AMI306_SCAN_MAGN_Z, ring->scan_mask); + return 0; } diff --git a/drivers/staging/iio/magnetometer/inv_ami306_trigger.c b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_trigger.c similarity index 91% rename from drivers/staging/iio/magnetometer/inv_ami306_trigger.c rename to drivers/staging/iio/magnetometer/inv_compass/inv_ami306_trigger.c index 95f82ede2f8..2159edeaafc 100644 --- a/drivers/staging/iio/magnetometer/inv_ami306_trigger.c +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_trigger.c @@ -18,8 +18,8 @@ * * @{ * @file inv_ami306_trigger.c - * @brief A sysfs device driver for Invensense devices - * @details This file is part of inv_gyro driver code + * @brief Invensense implementation for AMI306 + * @details This driver currently works for the AMI306 */ #include @@ -37,9 +37,9 @@ #include #include -#include "../iio.h" -#include "../sysfs.h" -#include "../trigger.h" +#include "../../iio.h" +#include "../../sysfs.h" +#include "../../trigger.h" #include "inv_ami306_iio.h" static const struct iio_trigger_ops inv_ami306_trigger_ops = { diff --git a/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_core.c b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_core.c new file mode 100644 index 00000000000..6af420bb5cf --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_core.c @@ -0,0 +1,969 @@ +/* +* Copyright (C) 2012 Invensense, Inc. +* +* This software is licensed under the terms of the GNU General Public +* License version 2, as published by the Free Software Foundation, and +* may be copied, distributed, and modified under those terms. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +*/ + +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file inv_yas53x_core.c + * @brief Invensense implementation for yas530/yas532/yas533. + * @details This driver currently works for yas530/yas532/yas533. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "inv_yas53x_iio.h" +#include "sysfs.h" +#include "inv_test/inv_counters.h" + +/* -------------------------------------------------------------------------- */ +static int Cx, Cy1, Cy2; +static int /*a1, */ a2, a3, a4, a5, a6, a7, a8, a9; +static int k; + +static u8 dx, dy1, dy2; +static u8 d2, d3, d4, d5, d6, d7, d8, d9, d0; +static u8 dck, ver; + +/** + * inv_serial_read() - Read one or more bytes from the device registers. + * @st: Device driver instance. + * @reg: First device register to be read from. + * @length: Number of bytes to read. + * @data: Data read from device. + * NOTE: The slave register will not increment when reading from the FIFO. + */ +int inv_serial_read(struct inv_compass_state *st, u8 reg, u16 length, u8 *data) +{ + int result; + INV_I2C_INC_COMPASSWRITE(3); + INV_I2C_INC_COMPASSREAD(length); + result = i2c_smbus_read_i2c_block_data(st->client, reg, length, data); + if (result != length) { + if (result < 0) + return result; + else + return -EINVAL; + } else { + return 0; + } +} + +/** + * inv_serial_single_write() - Write a byte to a device register. + * @st: Device driver instance. + * @reg: Device register to be written to. + * @data: Byte to write to device. + */ +int inv_serial_single_write(struct inv_compass_state *st, u8 reg, u8 data) +{ + u8 d[1]; + d[0] = data; + INV_I2C_INC_COMPASSWRITE(3); + + return i2c_smbus_write_i2c_block_data(st->client, reg, 1, d); +} + +static int set_hardware_offset(struct inv_compass_state *st, + char offset_x, char offset_y1, char offset_y2) +{ + char data; + int result = 0; + + data = offset_x & 0x3f; + result = inv_serial_single_write(st, YAS530_REGADDR_OFFSET_X, data); + if (result) + return result; + + data = offset_y1 & 0x3f; + result = inv_serial_single_write(st, YAS530_REGADDR_OFFSET_Y1, data); + if (result) + return result; + + data = offset_y2 & 0x3f; + result = inv_serial_single_write(st, YAS530_REGADDR_OFFSET_Y2, data); + return result; +} + +static int set_measure_command(struct inv_compass_state *st) +{ + int result = 0; + result = inv_serial_single_write(st, + YAS530_REGADDR_MEASURE_COMMAND, 0x01); + return result; +} + +static int measure_normal(struct inv_compass_state *st, + int *busy, unsigned short *t, + unsigned short *x, unsigned short *y1, + unsigned short *y2) +{ + int result; + ktime_t sleeptime; + result = set_measure_command(st); + sleeptime = ktime_set(0, 2 * NSEC_PER_MSEC); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_hrtimeout(&sleeptime, HRTIMER_MODE_REL); + + result = st->read_data(st, busy, t, x, y1, y2); + + return result; +} + +static int measure_int(struct inv_compass_state *st, + int *busy, unsigned short *t, + unsigned short *x, unsigned short *y1, + unsigned short *y2) +{ + int result; + if (st->first_read_after_reset) { + st->first_read_after_reset = 0; + result = 1; + } else { + result = st->read_data(st, busy, t, x, y1, y2); + } + result |= set_measure_command(st); + + return result; +} + +static int yas530_read_data(struct inv_compass_state *st, + int *busy, u16 *t, u16 *x, u16 *y1, u16 *y2) +{ + u8 data[8]; + u16 b, to, xo, y1o, y2o; + int result; + + result = inv_serial_read(st, + YAS530_REGADDR_MEASURE_DATA, 8, data); + if (result) + return result; + + b = (data[0] >> 7) & 0x01; + to = (s16)(((data[0] << 2) & 0x1fc) | ((data[1] >> 6) & 0x03)); + xo = (s16)(((data[2] << 5) & 0xfe0) | ((data[3] >> 3) & 0x1f)); + y1o = (s16)(((data[4] << 5) & 0xfe0) | ((data[5] >> 3) & 0x1f)); + y2o = (s16)(((data[6] << 5) & 0xfe0) | ((data[7] >> 3) & 0x1f)); + + *busy = b; + *t = to; + *x = xo; + *y1 = y1o; + *y2 = y2o; + + return 0; +} + +static int yas532_533_read_data(struct inv_compass_state *st, + int *busy, u16 *t, u16 *x, u16 *y1, u16 *y2) +{ + u8 data[8]; + u16 b, to, xo, y1o, y2o; + int result; + + result = inv_serial_read(st, + YAS530_REGADDR_MEASURE_DATA, 8, data); + if (result) + return result; + + b = (data[0] >> 7) & 0x01; + to = (s16)((((s32)data[0] << 3) & 0x3f8) | ((data[1] >> 5) & 0x07)); + xo = (s16)((((s32)data[2] << 6) & 0x1fc0) | ((data[3] >> 2) & 0x3f)); + y1o = (s16)((((s32)data[4] << 6) & 0x1fc0) | ((data[5] >> 2) & 0x3f)); + y2o = (s16)((((s32)data[6] << 6) & 0x1fc0) | ((data[7] >> 2) & 0x3f)); + + *busy = b; + *t = to; + *x = xo; + *y1 = y1o; + *y2 = y2o; + + return 0; +} + +static int check_offset(struct inv_compass_state *st, + char offset_x, char offset_y1, char offset_y2, + int *flag_x, int *flag_y1, int *flag_y2) +{ + int result; + int busy; + short t, x, y1, y2; + + result = set_hardware_offset(st, offset_x, offset_y1, offset_y2); + if (result) + return result; + result = measure_normal(st, &busy, &t, &x, &y1, &y2); + if (result) + return result; + *flag_x = 0; + *flag_y1 = 0; + *flag_y2 = 0; + + if (x > st->center) + *flag_x = 1; + if (y1 > st->center) + *flag_y1 = 1; + if (y2 > st->center) + *flag_y2 = 1; + if (x < st->center) + *flag_x = -1; + if (y1 < st->center) + *flag_y1 = -1; + if (y2 < st->center) + *flag_y2 = -1; + + return result; +} + +static int measure_and_set_offset(struct inv_compass_state *st, + char *offset) +{ + int i; + int result = 0; + char offset_x = 0, offset_y1 = 0, offset_y2 = 0; + int flag_x = 0, flag_y1 = 0, flag_y2 = 0; + static const int correct[5] = {16, 8, 4, 2, 1}; + + for (i = 0; i < 5; i++) { + result = check_offset(st, + offset_x, offset_y1, offset_y2, + &flag_x, &flag_y1, &flag_y2); + if (result) + return result; + if (flag_x) + offset_x += flag_x * correct[i]; + if (flag_y1) + offset_y1 += flag_y1 * correct[i]; + if (flag_y2) + offset_y2 += flag_y2 * correct[i]; + } + + result = set_hardware_offset(st, offset_x, offset_y1, offset_y2); + if (result) + return result; + offset[0] = offset_x; + offset[1] = offset_y1; + offset[2] = offset_y2; + + return result; +} + +static void coordinate_conversion(short x, short y1, short y2, short t, + int *xo, int *yo, int *zo) +{ + int sx, sy1, sy2, sy, sz; + int hx, hy, hz; + + sx = x - (Cx * t) / 100; + sy1 = y1 - (Cy1 * t) / 100; + sy2 = y2 - (Cy2 * t) / 100; + + sy = sy1 - sy2; + sz = -sy1 - sy2; + + hx = k * ((100 * sx + a2 * sy + a3 * sz) / 10); + hy = k * ((a4 * sx + a5 * sy + a6 * sz) / 10); + hz = k * ((a7 * sx + a8 * sy + a9 * sz) / 10); + + *xo = hx; + *yo = hy; + *zo = hz; +} + +static int get_cal_data_yas532_533(struct inv_compass_state *st) +{ + u8 data[YAS_YAS532_533_CAL_DATA_SIZE]; + int result; + + result = inv_serial_read(st, YAS530_REGADDR_CAL, + YAS_YAS532_533_CAL_DATA_SIZE, data); + if (result) + return result; + /* CAL data Second Read */ + result = inv_serial_read(st, YAS530_REGADDR_CAL, + YAS_YAS532_533_CAL_DATA_SIZE, data); + if (result) + return result; + + dx = data[0]; + dy1 = data[1]; + dy2 = data[2]; + d2 = (data[3] >> 2) & 0x03f; + d3 = (u8)(((data[3] << 2) & 0x0c) | ((data[4] >> 6) & 0x03)); + d4 = (u8)(data[4] & 0x3f); + d5 = (data[5] >> 2) & 0x3f; + d6 = (u8)(((data[5] << 4) & 0x30) | ((data[6] >> 4) & 0x0f)); + d7 = (u8)(((data[6] << 3) & 0x78) | ((data[7] >> 5) & 0x07)); + d8 = (u8)(((data[7] << 1) & 0x3e) | ((data[8] >> 7) & 0x01)); + d9 = (u8)(((data[8] << 1) & 0xfe) | ((data[9] >> 7) & 0x01)); + d0 = (u8)((data[9] >> 2) & 0x1f); + dck = (u8)(((data[9] << 1) & 0x06) | ((data[10] >> 7) & 0x01)); + ver = (u8)((data[13]) & 0x01); + + Cx = dx * 10 - 1280; + Cy1 = dy1 * 10 - 1280; + Cy2 = dy2 * 10 - 1280; + a2 = d2 - 32; + a3 = d3 - 8; + a4 = d4 - 32; + a5 = d5 + 38; + a6 = d6 - 32; + a7 = d7 - 64; + a8 = d8 - 32; + a9 = d9; + k = d0; + + return 0; +} + +static int get_cal_data_yas530(struct inv_compass_state *st) +{ + u8 data[YAS_YAS530_CAL_DATA_SIZE]; + int result; + /* CAL data read */ + result = inv_serial_read(st, YAS530_REGADDR_CAL, + YAS_YAS530_CAL_DATA_SIZE, data); + if (result) + return result; + /* CAL data Second Read */ + result = inv_serial_read(st, YAS530_REGADDR_CAL, + YAS_YAS530_CAL_DATA_SIZE, data); + if (result) + return result; + /*Cal data */ + dx = data[0]; + dy1 = data[1]; + dy2 = data[2]; + d2 = (data[3] >> 2) & 0x03f; + d3 = ((data[3] << 2) & 0x0c) | ((data[4] >> 6) & 0x03); + d4 = data[4] & 0x3f; + d5 = (data[5] >> 2) & 0x3f; + d6 = ((data[5] << 4) & 0x30) | ((data[6] >> 4) & 0x0f); + d7 = ((data[6] << 3) & 0x78) | ((data[7] >> 5) & 0x07); + d8 = ((data[7] << 1) & 0x3e) | ((data[8] >> 7) & 0x01); + d9 = ((data[8] << 1) & 0xfe) | ((data[9] >> 7) & 0x01); + d0 = (data[9] >> 2) & 0x1f; + dck = ((data[9] << 1) & 0x06) | ((data[10] >> 7) & 0x01); + ver = (u8)((data[15]) & 0x03); + + /*Correction Data */ + Cx = (int)dx * 6 - 768; + Cy1 = (int)dy1 * 6 - 768; + Cy2 = (int)dy2 * 6 - 768; + a2 = (int)d2 - 32; + a3 = (int)d3 - 8; + a4 = (int)d4 - 32; + a5 = (int)d5 + 38; + a6 = (int)d6 - 32; + a7 = (int)d7 - 64; + a8 = (int)d8 - 32; + a9 = (int)d9; + k = (int)d0 + 10; + + return 0; +} + + +static void thresh_filter_init(struct yas_thresh_filter *thresh_filter, + int threshold) +{ + thresh_filter->threshold = threshold; + thresh_filter->last = 0; +} + +static void +adaptive_filter_init(struct yas_adaptive_filter *adap_filter, int len, + int noise) +{ + int i; + + adap_filter->num = 0; + adap_filter->index = 0; + adap_filter->filter_noise = noise; + adap_filter->filter_len = len; + + for (i = 0; i < adap_filter->filter_len; ++i) + adap_filter->sequence[i] = 0; +} + +static void yas_init_adap_filter(struct inv_compass_state *st) +{ + struct yas_filter *f; + int i; + int noise[] = {YAS_MAG_DEFAULT_FILTER_NOISE_X, + YAS_MAG_DEFAULT_FILTER_NOISE_Y, + YAS_MAG_DEFAULT_FILTER_NOISE_Z}; + + f = &st->filter; + f->filter_len = YAS_MAG_DEFAULT_FILTER_LEN; + for (i = 0; i < 3; i++) + f->filter_noise[i] = noise[i]; + + for (i = 0; i < 3; i++) { + adaptive_filter_init(&f->adap_filter[i], f->filter_len, + f->filter_noise[i]); + thresh_filter_init(&f->thresh_filter[i], f->filter_thresh); + } +} + +int yas53x_resume(struct inv_compass_state *st) +{ + int result = 0; + + unsigned char dummyData = 0x00; + unsigned char read_reg[1]; + + /* =============================================== */ + + /* Step 1 - Test register initialization */ + dummyData = 0x00; + result = inv_serial_single_write(st, + YAS530_REGADDR_TEST1, dummyData); + if (result) + return result; + result = + inv_serial_single_write(st, + YAS530_REGADDR_TEST2, dummyData); + if (result) + return result; + /* Device ID read */ + result = inv_serial_read(st, + YAS530_REGADDR_DEVICE_ID, 1, read_reg); + + /*Step 2 Read the CAL register */ + st->get_cal_data(st); + + /*Obtain the [49:47] bits */ + dck &= 0x07; + + /*Step 3 : Storing the CONFIG with the CLK value */ + dummyData = 0x00 | (dck << 2); + result = inv_serial_single_write(st, + YAS530_REGADDR_CONFIG, dummyData); + if (result) + return result; + /*Step 4 : Set Acquisition Interval Register */ + dummyData = 0x00; + result = inv_serial_single_write(st, + YAS530_REGADDR_MEASURE_INTERVAL, + dummyData); + if (result) + return result; + + /*Step 5 : Reset Coil */ + dummyData = 0x00; + result = inv_serial_single_write(st, + YAS530_REGADDR_ACTUATE_INIT_COIL, + dummyData); + if (result) + return result; + /* Offset Measurement and Set */ + result = measure_and_set_offset(st, st->offset); + if (result) + return result; + st->first_measure_after_reset = 1; + st->first_read_after_reset = 1; + st->reset_timer = 0; + + yas_init_adap_filter(st); + + return result; +} + +static int inv_check_range(struct inv_compass_state *st, s16 x, s16 y1, s16 y2) +{ + int result = 0; + + if (x == 0) + result |= 0x01; + if (x == st->overflow_bound) + result |= 0x02; + if (y1 == 0) + result |= 0x04; + if (y1 == st->overflow_bound) + result |= 0x08; + if (y2 == 0) + result |= 0x10; + if (y2 == st->overflow_bound) + result |= 0x20; + + return result; +} +static int square(int data) +{ + return data * data; +} + +static int +adaptive_filter_filter(struct yas_adaptive_filter *adap_filter, int in) +{ + int avg, sum; + int i; + + if (adap_filter->filter_len == 0) + return in; + if (adap_filter->num < adap_filter->filter_len) { + adap_filter->sequence[adap_filter->index++] = in / 100; + adap_filter->num++; + return in; + } + if (adap_filter->filter_len <= adap_filter->index) + adap_filter->index = 0; + adap_filter->sequence[adap_filter->index++] = in / 100; + + avg = 0; + for (i = 0; i < adap_filter->filter_len; i++) + avg += adap_filter->sequence[i]; + avg /= adap_filter->filter_len; + + sum = 0; + for (i = 0; i < adap_filter->filter_len; i++) + sum += square(avg - adap_filter->sequence[i]); + sum /= adap_filter->filter_len; + + if (sum <= adap_filter->filter_noise) + return avg * 100; + + return ((in/100 - avg) * (sum - adap_filter->filter_noise) / sum + avg) + * 100; +} + +static int +thresh_filter_filter(struct yas_thresh_filter *thresh_filter, int in) +{ + if (in < thresh_filter->last - thresh_filter->threshold + || thresh_filter->last + + thresh_filter->threshold < in) { + thresh_filter->last = in; + return in; + } else { + return thresh_filter->last; + } +} + +static void +filter_filter(struct yas_filter *d, int *orig, int *filtered) +{ + int i; + + for (i = 0; i < 3; i++) { + filtered[i] = adaptive_filter_filter(&d->adap_filter[i], + orig[i]); + filtered[i] = thresh_filter_filter(&d->thresh_filter[i], + filtered[i]); + } +} + +int yas53x_read(struct inv_compass_state *st, short rawfixed[3], + int *overunderflow) +{ + int result = 0; + + int busy, i, ov; + short t, x, y1, y2; + s32 xyz[3], disturb[3]; + + result = measure_int(st, &busy, &t, &x, &y1, &y2); + if (result) + return result; + if (busy) + return -1; + coordinate_conversion(x, y1, y2, t, &xyz[0], &xyz[1], &xyz[2]); + filter_filter(&st->filter, xyz, xyz); + for (i = 0; i < 3; i++) + rawfixed[i] = (short)(xyz[i] / 100); + + if (st->first_measure_after_reset) { + for (i = 0; i < 3; i++) + st->base_compass_data[i] = rawfixed[i]; + st->first_measure_after_reset = 0; + } + ov = 0; + for (i = 0; i < 3; i++) { + disturb[i] = abs(st->base_compass_data[i] - rawfixed[i]); + if (disturb[i] > YAS_MAG_DISTURBURNCE_THRESHOLD) + ov = 1; + } + if (ov) + st->reset_timer += st->delay; + else + st->reset_timer = 0; + + if (st->reset_timer > YAS_RESET_COIL_TIME_THRESHOLD) + *overunderflow = (1<<8); + else + *overunderflow = 0; + *overunderflow |= inv_check_range(st, x, y1, y2); + + return 0; +} + +/** + * yas53x_read_raw() - read raw method. + */ +static int yas53x_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val, + int *val2, + long mask) { + struct inv_compass_state *st = iio_priv(indio_dev); + + switch (mask) { + case 0: + if (!(iio_buffer_enabled(indio_dev))) + return -EINVAL; + if (chan->type == IIO_MAGN) { + *val = st->compass_data[chan->channel2 - IIO_MOD_X]; + return IIO_VAL_INT; + } + + return -EINVAL; + case IIO_CHAN_INFO_SCALE: + if (chan->type == IIO_MAGN) { + *val = YAS530_SCALE; + return IIO_VAL_INT; + } + return -EINVAL; + default: + return -EINVAL; + } +} + +/** + * inv_compass_matrix_show() - show orientation matrix + */ +static ssize_t inv_compass_matrix_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + signed char *m; + struct inv_compass_state *st = iio_priv(indio_dev); + m = st->plat_data.orientation; + return sprintf(buf, + "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", + m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); +} + +static ssize_t yas53x_rate_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 data; + int error; + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct inv_compass_state *st = iio_priv(indio_dev); + + error = kstrtoint(buf, 10, &data); + if (error) + return error; + if (0 == data) + return -EINVAL; + /* transform rate to delay in ms */ + data = MSEC_PER_SEC / data; + + if (data > YAS530_MAX_DELAY) + data = YAS530_MAX_DELAY; + if (data < YAS530_MIN_DELAY) + data = YAS530_MIN_DELAY; + st->delay = data; + + return count; +} + +static ssize_t yas53x_rate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct inv_compass_state *st = iio_priv(indio_dev); + /* transform delay in ms to rate */ + return sprintf(buf, "%d\n", (int)MSEC_PER_SEC / st->delay); +} + +static ssize_t yas53x_overunderflow_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 data; + int error; + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct inv_compass_state *st = iio_priv(indio_dev); + + error = kstrtoint(buf, 10, &data); + if (error) + return error; + if (data) + return -EINVAL; + st->overunderflow = data; + + return count; +} + +static ssize_t yas53x_overunderflow_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct inv_compass_state *st = iio_priv(indio_dev); + + return sprintf(buf, "%d\n", st->overunderflow); +} + +void set_yas53x_enable(struct iio_dev *indio_dev, bool enable) +{ + struct inv_compass_state *st = iio_priv(indio_dev); + + yas_init_adap_filter(st); + st->first_measure_after_reset = 1; + st->first_read_after_reset = 1; + schedule_delayed_work(&st->work, msecs_to_jiffies(st->delay)); +} + +static void yas53x_work_func(struct work_struct *work) +{ + struct inv_compass_state *st = + container_of((struct delayed_work *)work, + struct inv_compass_state, work); + struct iio_dev *indio_dev = iio_priv_to_dev(st); + u32 delay = msecs_to_jiffies(st->delay); + + mutex_lock(&indio_dev->mlock); + if (!(iio_buffer_enabled(indio_dev))) + goto error_ret; + + schedule_delayed_work(&st->work, delay); + inv_read_yas53x_fifo(indio_dev); + INV_I2C_INC_COMPASSIRQ(); + +error_ret: + mutex_unlock(&indio_dev->mlock); +} + +static const struct iio_chan_spec compass_channels[] = { + { + .type = IIO_MAGN, + .modified = 1, + .channel2 = IIO_MOD_X, + .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, + .scan_index = INV_YAS53X_SCAN_MAGN_X, + .scan_type = IIO_ST('s', 16, 16, 0) + }, { + .type = IIO_MAGN, + .modified = 1, + .channel2 = IIO_MOD_Y, + .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, + .scan_index = INV_YAS53X_SCAN_MAGN_Y, + .scan_type = IIO_ST('s', 16, 16, 0) + }, { + .type = IIO_MAGN, + .modified = 1, + .channel2 = IIO_MOD_Z, + .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, + .scan_index = INV_YAS53X_SCAN_MAGN_Z, + .scan_type = IIO_ST('s', 16, 16, 0) + }, + IIO_CHAN_SOFT_TIMESTAMP(INV_YAS53X_SCAN_TIMESTAMP) +}; + +static DEVICE_ATTR(compass_matrix, S_IRUGO, inv_compass_matrix_show, NULL); +static DEVICE_ATTR(sampling_frequency, S_IRUGO | S_IWUSR, yas53x_rate_show, + yas53x_rate_store); +static DEVICE_ATTR(overunderflow, S_IRUGO | S_IWUSR, + yas53x_overunderflow_show, yas53x_overunderflow_store); + +static struct attribute *inv_yas53x_attributes[] = { + &dev_attr_compass_matrix.attr, + &dev_attr_sampling_frequency.attr, + &dev_attr_overunderflow.attr, + NULL, +}; +static const struct attribute_group inv_attribute_group = { + .name = "yas53x", + .attrs = inv_yas53x_attributes +}; + +static const struct iio_info yas53x_info = { + .driver_module = THIS_MODULE, + .read_raw = &yas53x_read_raw, + .attrs = &inv_attribute_group, +}; + +/*constant IIO attribute */ +/** + * inv_yas53x_probe() - probe function. + */ +static int inv_yas53x_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct inv_compass_state *st; + struct iio_dev *indio_dev; + int result; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + result = -ENODEV; + goto out_no_free; + } + indio_dev = iio_allocate_device(sizeof(*st)); + if (indio_dev == NULL) { + result = -ENOMEM; + goto out_no_free; + } + st = iio_priv(indio_dev); + st->client = client; + st->plat_data = + *(struct mpu_platform_data *)dev_get_platdata(&client->dev); + st->delay = 10; + + i2c_set_clientdata(client, indio_dev); + + if (!strcmp(id->name, "yas530")) { + st->read_data = yas530_read_data; + st->get_cal_data = get_cal_data_yas530; + st->overflow_bound = YAS_YAS530_DATA_OVERFLOW; + st->center = YAS_YAS530_DATA_CENTER; + st->filter.filter_thresh = YAS530_MAG_DEFAULT_FILTER_THRESH; + } else { + st->read_data = yas532_533_read_data; + st->get_cal_data = get_cal_data_yas532_533; + st->overflow_bound = YAS_YAS532_533_DATA_OVERFLOW; + st->center = YAS_YAS532_533_DATA_CENTER; + st->filter.filter_thresh = YAS532_MAG_DEFAULT_FILTER_THRESH; + } + st->upper_bound = st->center + (st->center >> 1); + st->lower_bound = (st->center >> 1); + + result = yas53x_resume(st); + if (result) + goto out_free; + + indio_dev->dev.parent = &client->dev; + indio_dev->name = id->name; + indio_dev->channels = compass_channels; + indio_dev->num_channels = ARRAY_SIZE(compass_channels); + indio_dev->info = &yas53x_info; + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->currentmode = INDIO_DIRECT_MODE; + + result = inv_yas53x_configure_ring(indio_dev); + if (result) + goto out_free; + result = iio_buffer_register(indio_dev, indio_dev->channels, + indio_dev->num_channels); + if (result) + goto out_unreg_ring; + result = inv_yas53x_probe_trigger(indio_dev); + if (result) + goto out_remove_ring; + + result = iio_device_register(indio_dev); + if (result) + goto out_remove_trigger; + INIT_DELAYED_WORK(&st->work, yas53x_work_func); + pr_info("%s: Probe name %s\n", __func__, id->name); + + return 0; +out_remove_trigger: + if (indio_dev->modes & INDIO_BUFFER_TRIGGERED) + inv_yas53x_remove_trigger(indio_dev); +out_remove_ring: + iio_buffer_unregister(indio_dev); +out_unreg_ring: + inv_yas53x_unconfigure_ring(indio_dev); +out_free: + iio_free_device(indio_dev); +out_no_free: + dev_err(&client->adapter->dev, "%s failed %d\n", __func__, result); + return -EIO; +} + +/** + * inv_yas53x_remove() - remove function. + */ +static int inv_yas53x_remove(struct i2c_client *client) +{ + struct iio_dev *indio_dev = i2c_get_clientdata(client); + struct inv_compass_state *st = iio_priv(indio_dev); + cancel_delayed_work_sync(&st->work); + iio_device_unregister(indio_dev); + inv_yas53x_remove_trigger(indio_dev); + iio_buffer_unregister(indio_dev); + inv_yas53x_unconfigure_ring(indio_dev); + iio_free_device(indio_dev); + + dev_info(&client->adapter->dev, "inv_yas53x_iio module removed.\n"); + return 0; +} +static const unsigned short normal_i2c[] = { I2C_CLIENT_END }; +/* device id table is used to identify what device can be + * supported by this driver + */ +static const struct i2c_device_id inv_yas53x_id[] = { + {"yas530", 0}, + {"yas532", 0}, + {"yas533", 0}, + {} +}; + +MODULE_DEVICE_TABLE(i2c, inv_yas53x_id); + +static struct i2c_driver inv_yas53x_driver = { + .class = I2C_CLASS_HWMON, + .probe = inv_yas53x_probe, + .remove = inv_yas53x_remove, + .id_table = inv_yas53x_id, + .driver = { + .owner = THIS_MODULE, + .name = "inv_yas53x_iio", + }, + .address_list = normal_i2c, +}; + +static int __init inv_yas53x_init(void) +{ + int result = i2c_add_driver(&inv_yas53x_driver); + if (result) { + pr_err("%s failed\n", __func__); + return result; + } + return 0; +} + +static void __exit inv_yas53x_exit(void) +{ + i2c_del_driver(&inv_yas53x_driver); +} + +module_init(inv_yas53x_init); +module_exit(inv_yas53x_exit); + +MODULE_AUTHOR("Invensense Corporation"); +MODULE_DESCRIPTION("Invensense device driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("inv_yas53x_iio"); +/** + * @} + */ + diff --git a/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_iio.h b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_iio.h new file mode 100644 index 00000000000..92bf0af7ec7 --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_iio.h @@ -0,0 +1,172 @@ +/* +* Copyright (C) 2012 Invensense, Inc. +* +* This software is licensed under the terms of the GNU General Public +* License version 2, as published by the Free Software Foundation, and +* may be copied, distributed, and modified under those terms. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +*/ + +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file inv_yas53x_iio.h + * @brief Struct definitions for the Invensense implementation + * of yas53x driver. + */ + +#ifndef _INV_GYRO_H_ +#define _INV_GYRO_H_ + +#include +#include +#include +#include +#include +#include + +#include "iio.h" +#include "buffer.h" +#include "trigger.h" + +#define YAS_MAG_MAX_FILTER_LEN 30 +struct yas_adaptive_filter { + int num; + int index; + int filter_len; + int filter_noise; + int sequence[YAS_MAG_MAX_FILTER_LEN]; +}; + +struct yas_thresh_filter { + int threshold; + int last; +}; + +struct yas_filter { + int filter_len; + int filter_thresh; + int filter_noise[3]; + struct yas_adaptive_filter adap_filter[3]; + struct yas_thresh_filter thresh_filter[3]; +}; + +/** + * struct inv_compass_state - Driver state variables. + * @plat_data: mpu platform data from board file. + * @client: i2c client handle. + * @chan_info: channel information. + * @trig: IIO trigger. + * @work: work structure. + * @delay: delay to schedule the next work. + * @overflow_bound: bound to determine overflow. + * @center: center of the measurement. + * @compass_data[3]: compass data store. + * @offset[3]: yas530 specific data. + * @base_compass_data[3]: first measure data after reset. + * @first_measure_after_reset:1: flag for first measurement after reset. + * @first_read_after_reset:1: flag for first read after reset. + * @reset_timer: timer to accumulate overflow conditions. + * @overunderflow:1: overflow and underflow flag. + * @filter: filter data structure. + * @read_data: function pointer of reading data from device. + * @get_cal_data: function pointer of reading cal data. + */ +struct inv_compass_state { + struct mpu_platform_data plat_data; + struct i2c_client *client; + struct iio_trigger *trig; + struct delayed_work work; + s16 delay; + s16 overflow_bound; + s16 upper_bound; + s16 lower_bound; + s16 center; + s16 compass_data[3]; + s8 offset[3]; + s16 base_compass_data[3]; + u8 first_measure_after_reset:1; + u8 first_read_after_reset:1; + u8 overunderflow:1; + s32 reset_timer; + struct yas_filter filter; + int (*read_data)(struct inv_compass_state *st, + int *, u16 *, u16 *, u16 *, u16 *); + int (*get_cal_data)(struct inv_compass_state *); +}; + +/* scan element definition */ +enum inv_mpu_scan { + INV_YAS53X_SCAN_MAGN_X, + INV_YAS53X_SCAN_MAGN_Y, + INV_YAS53X_SCAN_MAGN_Z, + INV_YAS53X_SCAN_TIMESTAMP, +}; + +#define YAS530_REGADDR_DEVICE_ID 0x80 +#define YAS530_REGADDR_ACTUATE_INIT_COIL 0x81 +#define YAS530_REGADDR_MEASURE_COMMAND 0x82 +#define YAS530_REGADDR_CONFIG 0x83 +#define YAS530_REGADDR_MEASURE_INTERVAL 0x84 +#define YAS530_REGADDR_OFFSET_X 0x85 +#define YAS530_REGADDR_OFFSET_Y1 0x86 +#define YAS530_REGADDR_OFFSET_Y2 0x87 +#define YAS530_REGADDR_TEST1 0x88 +#define YAS530_REGADDR_TEST2 0x89 +#define YAS530_REGADDR_CAL 0x90 +#define YAS530_REGADDR_MEASURE_DATA 0xb0 + +#define YAS530_MAX_DELAY 200 +#define YAS530_MIN_DELAY 5 +#define YAS530_SCALE 107374182L + +#define YAS_YAS530_VERSION_A 0 /* YAS530 (MS-3E Aver) */ +#define YAS_YAS530_VERSION_B 1 /* YAS530B (MS-3E Bver) */ +#define YAS_YAS530_VERSION_A_COEF 380 +#define YAS_YAS530_VERSION_B_COEF 550 +#define YAS_YAS530_DATA_CENTER 2048 +#define YAS_YAS530_DATA_OVERFLOW 4095 +#define YAS_YAS530_CAL_DATA_SIZE 16 + +/*filter related defines */ +#define YAS_MAG_DEFAULT_FILTER_NOISE_X 144 /* sd: 1200 nT */ +#define YAS_MAG_DEFAULT_FILTER_NOISE_Y 144 /* sd: 1200 nT */ +#define YAS_MAG_DEFAULT_FILTER_NOISE_Z 144 /* sd: 1200 nT */ +#define YAS_MAG_DEFAULT_FILTER_LEN 20 + +#define YAS530_MAG_DEFAULT_FILTER_THRESH 100 +#define YAS532_MAG_DEFAULT_FILTER_THRESH 300 + +#define YAS_YAS532_533_VERSION_AB 0 /* YAS532_533AB (MS-3R/3F ABver) */ +#define YAS_YAS532_533_VERSION_AC 1 /* YAS532_533AC (MS-3R/3F ACver) */ +#define YAS_YAS532_533_VERSION_AB_COEF 1800 +#define YAS_YAS532_533_VERSION_AC_COEF 900 +#define YAS_YAS532_533_DATA_CENTER 4096 +#define YAS_YAS532_533_DATA_OVERFLOW 8190 +#define YAS_YAS532_533_CAL_DATA_SIZE 14 + +#define YAS_MAG_DISTURBURNCE_THRESHOLD 1600 +#define YAS_RESET_COIL_TIME_THRESHOLD 3000 + +#define INV_ERROR_COMPASS_DATA_OVERFLOW (-1) +#define INV_ERROR_COMPASS_DATA_NOT_READY (-2) + +int inv_yas53x_configure_ring(struct iio_dev *indio_dev); +void inv_yas53x_unconfigure_ring(struct iio_dev *indio_dev); +int inv_yas53x_probe_trigger(struct iio_dev *indio_dev); +void inv_yas53x_remove_trigger(struct iio_dev *indio_dev); +void set_yas53x_enable(struct iio_dev *indio_dev, bool enable); +void inv_read_yas53x_fifo(struct iio_dev *indio_dev); +int yas53x_read(struct inv_compass_state *st, short rawfixed[3], + s32 *overunderflow); +int yas53x_resume(struct inv_compass_state *st); + +#endif /* #ifndef _INV_GYRO_H_ */ + diff --git a/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_ring.c b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_ring.c new file mode 100644 index 00000000000..efcf49c6839 --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_ring.c @@ -0,0 +1,165 @@ +/* +* Copyright (C) 2012 Invensense, Inc. +* +* This software is licensed under the terms of the GNU General Public +* License version 2, as published by the Free Software Foundation, and +* may be copied, distributed, and modified under those terms. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +*/ + +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file inv_yas53x_ring.c + * @brief Invensense implementation for yas530/yas532/yas533. + * @details This driver currently works for the yas530/yas532/yas533. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iio.h" +#include "kfifo_buf.h" +#include "trigger_consumer.h" +#include "sysfs.h" + +#include "inv_yas53x_iio.h" + +static s64 get_time_ns(void) +{ + struct timespec ts; + ktime_get_ts(&ts); + + return timespec_to_ns(&ts); +} + +static int put_scan_to_buf(struct iio_dev *indio_dev, unsigned char *d, + short *s, int scan_index) +{ + struct iio_buffer *ring = indio_dev->buffer; + int st; + int i, d_ind; + + d_ind = 0; + for (i = 0; i < 3; i++) { + st = iio_scan_mask_query(indio_dev, ring, scan_index + i); + if (st) { + memcpy(&d[d_ind], &s[i], sizeof(s[i])); + d_ind += sizeof(s[i]); + } + } + + return d_ind; +} + +/** + * inv_read_yas53x_fifo() - Transfer data from FIFO to ring buffer. + */ +void inv_read_yas53x_fifo(struct iio_dev *indio_dev) +{ + struct inv_compass_state *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; + int d_ind; + s32 overunderflow; + s8 *tmp; + s64 tmp_buf[2]; + + if (!yas53x_read(st, st->compass_data, &overunderflow)) { + tmp = (u8 *)tmp_buf; + d_ind = put_scan_to_buf(indio_dev, tmp, st->compass_data, + INV_YAS53X_SCAN_MAGN_X); + if (ring->scan_timestamp) + tmp_buf[(d_ind + 7) / 8] = get_time_ns(); + ring->access->store_to(indio_dev->buffer, tmp, 0); + + if (overunderflow) { + yas53x_resume(st); + if (!st->overunderflow) + st->overunderflow = 1; + } + } +} + +void inv_yas53x_unconfigure_ring(struct iio_dev *indio_dev) +{ + iio_kfifo_free(indio_dev->buffer); +}; + +static int inv_yas53x_postenable(struct iio_dev *indio_dev) +{ + struct inv_compass_state *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; + + /* when all the outputs are disabled, even though buffer/enable is on, + do nothing */ + if (!(iio_scan_mask_query(indio_dev, ring, INV_YAS53X_SCAN_MAGN_X) || + iio_scan_mask_query(indio_dev, ring, INV_YAS53X_SCAN_MAGN_Y) || + iio_scan_mask_query(indio_dev, ring, INV_YAS53X_SCAN_MAGN_Z))) + return 0; + + set_yas53x_enable(indio_dev, true); + schedule_delayed_work(&st->work, + msecs_to_jiffies(st->delay)); + + return 0; +} + +static int inv_yas53x_predisable(struct iio_dev *indio_dev) +{ + struct inv_compass_state *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; + + cancel_delayed_work_sync(&st->work); + clear_bit(INV_YAS53X_SCAN_MAGN_X, ring->scan_mask); + clear_bit(INV_YAS53X_SCAN_MAGN_Y, ring->scan_mask); + clear_bit(INV_YAS53X_SCAN_MAGN_Z, ring->scan_mask); + + return 0; +} + +static const struct iio_buffer_setup_ops inv_yas53x_ring_setup_ops = { + .preenable = &iio_sw_buffer_preenable, + .postenable = &inv_yas53x_postenable, + .predisable = &inv_yas53x_predisable, +}; + +int inv_yas53x_configure_ring(struct iio_dev *indio_dev) +{ + int ret = 0; + struct iio_buffer *ring; + + ring = iio_kfifo_allocate(indio_dev); + if (!ring) { + ret = -ENOMEM; + return ret; + } + indio_dev->buffer = ring; + /* setup ring buffer */ + ring->scan_timestamp = true; + indio_dev->setup_ops = &inv_yas53x_ring_setup_ops; + + indio_dev->modes |= INDIO_BUFFER_TRIGGERED; + return 0; +} +/** + * @} + */ + diff --git a/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_trigger.c b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_trigger.c new file mode 100644 index 00000000000..a20ce2baa7e --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_trigger.c @@ -0,0 +1,91 @@ +/* +* Copyright (C) 2012 Invensense, Inc. +* +* This software is licensed under the terms of the GNU General Public +* License version 2, as published by the Free Software Foundation, and +* may be copied, distributed, and modified under those terms. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +*/ + +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file inv_yas53x_trigger.c + * @brief Invensense implementation for yas530/yas532/yas533 + * @details This driver currently works for the yas530/yas532/yas533 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iio.h" +#include "sysfs.h" +#include "trigger.h" + +#include "inv_yas53x_iio.h" + +static const struct iio_trigger_ops inv_yas53x_trigger_ops = { + .owner = THIS_MODULE, +}; + +int inv_yas53x_probe_trigger(struct iio_dev *indio_dev) +{ + int ret; + struct inv_compass_state *st = iio_priv(indio_dev); + + st->trig = iio_allocate_trigger("%s-dev%d", + indio_dev->name, + indio_dev->id); + if (st->trig == NULL) { + ret = -ENOMEM; + goto error_ret; + } + /* select default trigger */ + st->trig->dev.parent = &st->client->dev; + st->trig->private_data = indio_dev; + st->trig->ops = &inv_yas53x_trigger_ops; + ret = iio_trigger_register(st->trig); + + /* select default trigger */ + indio_dev->trig = st->trig; + if (ret) + goto error_free_trig; + + return 0; + +error_free_trig: + iio_free_trigger(st->trig); +error_ret: + return ret; +} + +void inv_yas53x_remove_trigger(struct iio_dev *indio_dev) +{ + struct inv_compass_state *st = iio_priv(indio_dev); + + iio_trigger_unregister(st->trig); + iio_free_trigger(st->trig); +} +/** + * @} + */ + diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 4d087ac1106..426cd4bf6a9 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -504,7 +504,7 @@ static int iscsi_target_do_authentication( break; case 1: pr_debug("iSCSI security negotiation" - " completed sucessfully.\n"); + " completed successfully.\n"); login->auth_complete = 1; if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE1) && (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) { diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 162b736c734..49fd0a9b0a5 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -593,7 +593,7 @@ int core_tpg_set_initiator_node_queue_depth( if (init_sess) tpg->se_tpg_tfo->close_session(init_sess); - pr_debug("Successfuly changed queue depth to: %d for Initiator" + pr_debug("Successfully changed queue depth to: %d for Initiator" " Node: %s on %s Target Portal Group: %u\n", queue_depth, initiatorname, tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg)); diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index 3f27ad217fa..89ffdc2d70c 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -261,8 +261,10 @@ static void acc_complete_in(struct usb_ep *ep, struct usb_request *req) { struct acc_dev *dev = _acc_dev; - if (req->status != 0) + if (req->status == -ESHUTDOWN) { + pr_debug("acc_complete_in set disconnected"); acc_set_disconnected(dev); + } req_put(dev, &dev->tx_idle, req); @@ -274,8 +276,10 @@ static void acc_complete_out(struct usb_ep *ep, struct usb_request *req) struct acc_dev *dev = _acc_dev; dev->rx_done = 1; - if (req->status != 0) + if (req->status == -ESHUTDOWN) { + pr_debug("acc_complete_out set disconnected"); acc_set_disconnected(dev); + } wake_up(&dev->read_wq); } @@ -557,8 +561,10 @@ static ssize_t acc_read(struct file *fp, char __user *buf, pr_debug("acc_read(%d)\n", count); - if (dev->disconnected) + if (dev->disconnected) { + pr_debug("acc_read disconnected"); return -ENODEV; + } if (count > BULK_BUFFER_SIZE) count = BULK_BUFFER_SIZE; @@ -571,6 +577,12 @@ static ssize_t acc_read(struct file *fp, char __user *buf, goto done; } + if (dev->rx_done) { + // last req cancelled. try to get it. + req = dev->rx_req[0]; + goto copy_data; + } + requeue_req: /* queue a request */ req = dev->rx_req[0]; @@ -588,9 +600,17 @@ static ssize_t acc_read(struct file *fp, char __user *buf, ret = wait_event_interruptible(dev->read_wq, dev->rx_done); if (ret < 0) { r = ret; - usb_ep_dequeue(dev->ep_out, req); + ret = usb_ep_dequeue(dev->ep_out, req); + if (ret != 0) { + // cancel failed. There can be a data already received. + // it will be retrieved in the next read. + pr_debug("acc_read: cancelling failed %d", ret); + } goto done; } + +copy_data: + dev->rx_done = 0; if (dev->online) { /* If we got a 0-len packet, throw it back and try again. */ if (req->actual == 0) @@ -619,8 +639,10 @@ static ssize_t acc_write(struct file *fp, const char __user *buf, pr_debug("acc_write(%d)\n", count); - if (!dev->online || dev->disconnected) + if (!dev->online || dev->disconnected) { + pr_debug("acc_write disconnected or not online"); return -ENODEV; + } while (count > 0) { if (!dev->online) { diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c index 37647d11896..7a4d4271982 100755 --- a/drivers/usb/gadget/fsl_udc_core.c +++ b/drivers/usb/gadget/fsl_udc_core.c @@ -136,6 +136,7 @@ static struct delayed_work smb347_hc_mode_work; extern int smb347_hc_mode_callback(bool enable, int cur); extern void fsl_wake_lock_timeout(void); +extern void usb_det_cable_callback(unsigned cable_type); /* Export the function "unsigned int get_usb_cable_status(void)" for others to query the USB cable status. */ unsigned int get_usb_cable_status(void) @@ -274,6 +275,7 @@ static void cable_detection_work_handler(struct work_struct *w) { mutex_lock(&s_cable_info.cable_info_mutex); s_cable_info.cable_status = 0x00; //0000 + u32 val; printk(KERN_INFO "%s(): vbus_active = %d and is_active = %d\n", __func__, s_cable_info.udc_vbus_active, s_cable_info.is_active); @@ -285,6 +287,8 @@ static void cable_detection_work_handler(struct work_struct *w) s_cable_info.ac_connected = 0; + usb_det_cable_callback(s_cable_info.cable_status); + if ((pcb_id_version <= 0x2) && (project_id == GROUPER_PROJECT_NAKASI)) { #if BATTERY_CALLBACK_ENABLED battery_callback(s_cable_info.cable_status); @@ -294,27 +298,37 @@ static void cable_detection_work_handler(struct work_struct *w) touch_callback(s_cable_info.cable_status); #endif } else if (!s_cable_info.udc_vbus_active && s_cable_info.is_active) { - switch (fsl_readl(&dr_regs->portsc1) & PORTSCX_LINE_STATUS_BITS) { - case PORTSCX_LINE_STATUS_SE0: - s_cable_info.ac_connected = 0; break; - case PORTSCX_LINE_STATUS_JSTATE: - s_cable_info.ac_connected = 0; break; - case PORTSCX_LINE_STATUS_KSTATE: - s_cable_info.ac_connected = 0; break; - case PORTSCX_LINE_STATUS_UNDEF: - s_cable_info.ac_connected = 1; break; - default: - s_cable_info.ac_connected = 0; break; + val = fsl_readl(&dr_regs->usbcmd); + if (val & USB_CMD_RUN_STOP) { + switch (fsl_readl(&dr_regs->portsc1) & PORTSCX_LINE_STATUS_BITS) { + case PORTSCX_LINE_STATUS_SE0: + s_cable_info.ac_connected = 0; break; + case PORTSCX_LINE_STATUS_JSTATE: + s_cable_info.ac_connected = 0; break; + case PORTSCX_LINE_STATUS_KSTATE: + s_cable_info.ac_connected = 0; break; + case PORTSCX_LINE_STATUS_UNDEF: + s_cable_info.ac_connected = 1; break; + default: + s_cable_info.ac_connected = 0; break; + } + } else { + printk(KERN_INFO "USB device controller was not ready\n"); + mutex_unlock(&s_cable_info.cable_info_mutex); + return; } if(!s_cable_info.ac_connected) { printk(KERN_INFO "The USB cable is connected\n"); s_cable_info.cable_status = 0x01; //0001 + smb347_hc_mode_callback(1,1); } else { printk(KERN_INFO "AC adapter connect\n"); s_cable_info.cable_status = 0x03; //0011 } + usb_det_cable_callback(s_cable_info.cable_status); + if ((pcb_id_version <= 0x2) && (project_id == GROUPER_PROJECT_NAKASI)) { fsl_smb347_hc_mode_callback_work(1,1); #if BATTERY_CALLBACK_ENABLED diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index 76f40688f82..d5212ebc968 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -761,16 +761,13 @@ static int tegra_usb_resume(struct usb_hcd *hcd, bool is_dpd) * Must not be called with a lock on ehci->lock */ static void tegra_ehci_disable_phy_interrupt(struct usb_hcd *hcd) { - struct tegra_ehci_hcd *tegra; u32 val; if (hcd->irq >= 0) { - tegra = dev_get_drvdata(hcd->self.controller); - if (tegra->phy->hotplug) { - /* Disable PHY clock valid interrupts */ - val = readl(hcd->regs + TEGRA_USB_SUSP_CTRL_OFFSET); - val &= ~TEGRA_USB_PHY_CLK_VALID_INT_ENB; - writel(val , (hcd->regs + TEGRA_USB_SUSP_CTRL_OFFSET)); - } + /* Disable PHY clock valid interrupts */ + val = readl(hcd->regs + TEGRA_USB_SUSP_CTRL_OFFSET); + val &= ~TEGRA_USB_PHY_CLK_VALID_INT_ENB; + writel(val , (hcd->regs + TEGRA_USB_SUSP_CTRL_OFFSET)); + /* Wait for the interrupt handler to finish */ synchronize_irq(hcd->irq); } diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c old mode 100644 new mode 100755 index c1fe7f899f1..4a1bae6d1d2 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -42,6 +42,7 @@ #define USB_VBUS_INT_STATUS (1 << 9) #define USB_VBUS_STATUS (1 << 10) #define USB_INTS (USB_VBUS_INT_STATUS | USB_ID_INT_STATUS) +#define USB_INT_ENS (USB_VBUS_INT_EN | USB_ID_INT_EN | USB_VBUS_WAKEUP_EN | USB_ID_PIN_WAKEUP_EN) typedef void (*callback_t)(enum usb_otg_state to, enum usb_otg_state from, void *args); @@ -64,6 +65,9 @@ struct tegra_otg_data { }; static struct tegra_otg_data *tegra_clone; +static bool tegra_otg_on_charging = false; +module_param(tegra_otg_on_charging, bool, 0664); + static inline unsigned long otg_readl(struct tegra_otg_data *tegra, unsigned int offset) { @@ -230,8 +234,15 @@ static void irq_work(struct work_struct *work) dev_info(tegra->otg.dev, "%s --> %s\n", tegra_state_name(from), tegra_state_name(to)); - if (tegra->charger_cb) - tegra->charger_cb(to, from, tegra->charger_cb_data); + if (tegra->charger_cb) { +// tegra->charger_cb(to, from, tegra->charger_cb_data); + if (tegra_otg_on_charging) + /* enable v_bus detection for charging */ + tegra->detect_vbus = true; + else + /* enable OTG to supply internal power */ + tegra->charger_cb(to, from, tegra->charger_cb_data); + } if (to == OTG_STATE_A_SUSPEND) { if (from == OTG_STATE_A_HOST) @@ -264,17 +275,6 @@ static irqreturn_t tegra_otg_irq(int irq, void *data) if (val & (USB_VBUS_INT_EN | USB_ID_INT_EN)) { otg_writel(tegra, val, USB_PHY_WAKEUP); if ((val & USB_ID_INT_STATUS) || (val & USB_VBUS_INT_STATUS)) { - tegra->int_status = val; - tegra->detect_vbus = false; - schedule_work(&tegra->work); - } - } else { - if ((val & USB_ID_INT_STATUS) || (val & USB_VBUS_INT_STATUS)) { - printk(KERN_INFO "%s(): WRONG! val = %#X\n", __func__, val); - val |= (USB_VBUS_INT_EN | USB_VBUS_WAKEUP_EN); - val |= (USB_ID_INT_EN | USB_ID_PIN_WAKEUP_EN); - otg_writel(tegra, val, USB_PHY_WAKEUP); - tegra->int_status = val; tegra->detect_vbus = false; schedule_work(&tegra->work); @@ -428,6 +428,8 @@ static int tegra_otg_probe(struct platform_device *pdev) if (!ehci_pdata->default_enable) clk_disable(tegra->clk); + + tegra->intr_reg_data = tegra->intr_reg_data | USB_INT_ENS; dev_info(&pdev->dev, "otg transceiver registered\n"); return 0; @@ -475,7 +477,7 @@ static int tegra_otg_suspend(struct device *dev) val = tegra_otg->intr_reg_data & ~(USB_ID_INT_EN | USB_VBUS_INT_EN); writel(val, (tegra_otg->regs + USB_PHY_WAKEUP)); clk_disable(tegra_otg->clk); - printk(KERN_INFO "%s(): tegra_otg->intr_reg_data = %#X\n", __func__, tegra_otg->intr_reg_data); + if (from == OTG_STATE_B_PERIPHERAL && otg->gadget) { usb_gadget_vbus_disconnect(otg->gadget); otg->state = OTG_STATE_A_SUSPEND; @@ -500,13 +502,6 @@ static void tegra_otg_resume(struct device *dev) msleep(1); /* restore the interupt enable for cable ID and VBUS */ clk_enable(tegra_otg->clk); - if (!(tegra_otg->intr_reg_data & USB_VBUS_INT_EN) || !(tegra_otg->intr_reg_data & USB_VBUS_WAKEUP_EN) || - !(tegra_otg->intr_reg_data & USB_ID_INT_EN) || !(tegra_otg->intr_reg_data & USB_ID_PIN_WAKEUP_EN)) { - printk(KERN_INFO "%s(): WRONG! tegra_otg->intr_reg_data = %#X\n", __func__, tegra_otg->intr_reg_data); - tegra_otg->intr_reg_data |= (USB_VBUS_INT_EN | USB_VBUS_WAKEUP_EN); - tegra_otg->intr_reg_data |= (USB_ID_INT_EN | USB_ID_PIN_WAKEUP_EN); - } - printk(KERN_INFO "%s(): tegra_otg->intr_reg_data = %#X\n", __func__, tegra_otg->intr_reg_data); writel(tegra_otg->intr_reg_data, (tegra_otg->regs + USB_PHY_WAKEUP)); val = readl(tegra_otg->regs + USB_PHY_WAKEUP); clk_disable(tegra_otg->clk); diff --git a/drivers/video/tegra/Kconfig b/drivers/video/tegra/Kconfig index 7de26267155..01d99e78cba 100644 --- a/drivers/video/tegra/Kconfig +++ b/drivers/video/tegra/Kconfig @@ -33,6 +33,12 @@ config TEGRA_DC_EXTENSIONS This exposes support for extended capabilities of the Tegra display controller to userspace drivers. +config TEGRA_SD_GEN2 + bool "Tegra SD Gen2 support" + default n + help + backported from Tegra4 (tripndroid) + config TEGRA_NVMAP bool "Tegra GPU memory management driver (nvmap)" default y diff --git a/drivers/video/tegra/dc/dc_reg.h b/drivers/video/tegra/dc/dc_reg.h index 2b8f8becb15..c1165e6eec3 100644 --- a/drivers/video/tegra/dc/dc_reg.h +++ b/drivers/video/tegra/dc/dc_reg.h @@ -491,6 +491,12 @@ #define SD_ONESHOT_ENABLE (1 << 10) #define SD_CORRECTION_MODE_AUTO (0 << 11) #define SD_CORRECTION_MODE_MAN (1 << 11) +#define SD_K_LIMIT_ENABLE (1 << 12) +#define SD_WINDOW_ENABLE (1 << 13) +#define SD_SOFT_CLIPPING_ENABLE (1 << 14) +#define SD_SMOOTH_K_ENABLE (1 << 15) +#define SD_VSYNC (0 << 28) +#define SD_VPULSE2 (1 << 28) #define NUM_BIN_WIDTHS 4 #define STEPS_PER_AGG_LVL 64 @@ -549,6 +555,24 @@ #define SD_MAN_K_G(x) (((x) & 0x3ff) << 10) #define SD_MAN_K_B(x) (((x) & 0x3ff) << 20) +#define DC_DISP_SD_K_LIMIT 0x4df +#define SD_K_LIMIT(x) (((x) & 0x3ff) << 0) + +#define DC_DISP_SD_WINDOW_POSITION 0x4e0 +#define SD_WIN_H_POSITION(x) (((x) & 0x1fff) << 0) +#define SD_WIN_V_POSITION(x) (((x) & 0x1fff) << 16) + +#define DC_DISP_SD_WINDOW_SIZE 0x4e1 +#define SD_WIN_H_SIZE(x) (((x) & 0x1fff) << 0) +#define SD_WIN_V_SIZE(x) (((x) & 0x1fff) << 16) + +#define DC_DISP_SD_SOFT_CLIPPING 0x4e2 +#define SD_SOFT_CLIPPING_THRESHOLD(x) (((x) & 0xff) << 0) +#define SD_SOFT_CLIPPING_RECIP(x) (((x) & 0xffff) << 16) + +#define DC_DISP_SD_SMOOTH_K 0x4e3 +#define SD_SMOOTH_K_INCR(x) (((x) & 0x3fff) << 0) + #define NUM_AGG_PRI_LVLS 4 #define SD_AGG_PRI_LVL(x) ((x) >> 3) #define SD_GET_AGG(x) ((x) & 0x7) diff --git a/drivers/video/tegra/dc/nvsd.c b/drivers/video/tegra/dc/nvsd.c index f320a1cfb3e..8eed135eb74 100644 --- a/drivers/video/tegra/dc/nvsd.c +++ b/drivers/video/tegra/dc/nvsd.c @@ -55,6 +55,17 @@ NVSD_ATTR(fc_time_limit); NVSD_ATTR(fc_threshold); NVSD_ATTR(lut); NVSD_ATTR(bltf); +#ifdef CONFIG_TEGRA_SD_GEN2 +NVSD_ATTR(k_limit_enable); +NVSD_ATTR(k_limit); +NVSD_ATTR(sd_window_enable); +NVSD_ATTR(sd_window); +NVSD_ATTR(soft_clipping_enable); +NVSD_ATTR(soft_clipping_threshold); +NVSD_ATTR(smooth_k_enable); +NVSD_ATTR(smooth_k_incr); +NVSD_ATTR(use_vpulse2); +#endif static struct kobj_attribute nvsd_attr_registers = __ATTR(registers, S_IRUGO, nvsd_registers_show, NULL); @@ -74,6 +85,17 @@ static struct attribute *nvsd_attrs[] = { NVSD_ATTRS_ENTRY(lut), NVSD_ATTRS_ENTRY(bltf), NVSD_ATTRS_ENTRY(registers), +#ifdef CONFIG_TEGRA_SD_GEN2 + NVSD_ATTRS_ENTRY(k_limit_enable), + NVSD_ATTRS_ENTRY(k_limit), + NVSD_ATTRS_ENTRY(sd_window_enable), + NVSD_ATTRS_ENTRY(sd_window), + NVSD_ATTRS_ENTRY(soft_clipping_enable), + NVSD_ATTRS_ENTRY(soft_clipping_threshold), + NVSD_ATTRS_ENTRY(smooth_k_enable), + NVSD_ATTRS_ENTRY(smooth_k_incr), + NVSD_ATTRS_ENTRY(use_vpulse2), +#endif NULL, }; @@ -429,6 +451,14 @@ void nvsd_init(struct tegra_dc *dc, struct tegra_dc_sd_settings *settings) bw_idx = nvsd_get_bw_idx(settings); + /* Values of SD LUT & BL TF are different according to bin_width on T30 + * due to HW bug. Therefore we use bin_width to select the correct table + * on T30. */ + +#ifdef CONFIG_TEGRA_SD_GEN2 + bw_idx = 0; +#endif + /* Write LUT */ if (!settings->cmd) { dev_dbg(&dc->ndev->dev, " LUT:\n"); @@ -504,6 +534,49 @@ void nvsd_init(struct tegra_dc *dc, struct tegra_dc_sd_settings *settings) tegra_dc_writel(dc, val, DC_DISP_SD_FLICKER_CONTROL); dev_dbg(&dc->ndev->dev, " FLICKER_CONTROL: 0x%08x\n", val); +#ifdef CONFIG_TEGRA_SD_GEN2 + /* Write K limit */ + if (settings->k_limit_enable) { + val = settings->k_limit; + if (val < 128) + val = 128; + else if (val > 255) + val = 255; + val = SD_K_LIMIT(val); + tegra_dc_writel(dc, val, DC_DISP_SD_K_LIMIT); + dev_dbg(&dc->ndev->dev, " K_LIMIT: 0x%08x\n", val); + } + + if (settings->sd_window_enable) { + /* Write sd window */ + val = SD_WIN_H_POSITION(settings->sd_window.h_position) | + SD_WIN_V_POSITION(settings->sd_window.v_position); + tegra_dc_writel(dc, val, DC_DISP_SD_WINDOW_POSITION); + dev_dbg(&dc->ndev->dev, " SD_WINDOW_POSITION: 0x%08x\n", val); + + val = SD_WIN_H_POSITION(settings->sd_window.h_size) | + SD_WIN_V_POSITION(settings->sd_window.v_size); + tegra_dc_writel(dc, val, DC_DISP_SD_WINDOW_SIZE); + dev_dbg(&dc->ndev->dev, " SD_WINDOW_SIZE: 0x%08x\n", val); + } + + if (settings->soft_clipping_enable) { + /* Write soft clipping */ + val = (64 * 1024) / (256 - settings->soft_clipping_threshold); + val = SD_SOFT_CLIPPING_RECIP(val) | + SD_SOFT_CLIPPING_THRESHOLD(settings->soft_clipping_threshold); + tegra_dc_writel(dc, val, DC_DISP_SD_SOFT_CLIPPING); + dev_dbg(&dc->ndev->dev, " SOFT_CLIPPING: 0x%08x\n", val); + } + + if (settings->smooth_k_enable) { + /* Write K incr value */ + val = SD_SMOOTH_K_INCR(settings->smooth_k_incr); + tegra_dc_writel(dc, val, DC_DISP_SD_SMOOTH_K); + dev_dbg(&dc->ndev->dev, " SMOOTH_K: 0x%08x\n", val); + } +#endif + /* Manage SD Control */ val = 0; /* Stay in manual correction mode until the next flip. */ @@ -520,6 +593,18 @@ void nvsd_init(struct tegra_dc *dc, struct tegra_dc_sd_settings *settings) val |= SD_AGGRESSIVENESS(settings->aggressiveness); /* Bin Width (value derived from bw_idx) */ val |= bw_idx << 3; +#ifdef CONFIG_TEGRA_SD_GEN2 + /* K limit enable */ + val |= (settings->k_limit_enable) ? SD_K_LIMIT_ENABLE : 0; + /* Programmable sd window enable */ + val |= (settings->sd_window_enable) ? SD_WINDOW_ENABLE : 0; + /* Soft clipping enable */ + val |= (settings->soft_clipping_enable) ? SD_SOFT_CLIPPING_ENABLE : 0; + /* Smooth K enable */ + val |= (settings->smooth_k_enable) ? SD_SMOOTH_K_ENABLE : 0; + /* SD proc control */ + val |= (settings->use_vpulse2) ? SD_VPULSE2 : SD_VSYNC; +#endif /* Finally, Write SD Control */ tegra_dc_writel(dc, val, DC_DISP_SD_CONTROL); dev_dbg(&dc->ndev->dev, " SD_CONTROL: 0x%08x\n", val); @@ -670,6 +755,39 @@ static ssize_t nvsd_settings_show(struct kobject *kobj, else if (IS_NVSD_ATTR(fc_threshold)) res = snprintf(buf, PAGE_SIZE, "%d\n", sd_settings->fc.threshold); +#ifdef CONFIG_TEGRA_SD_GEN2 + else if (IS_NVSD_ATTR(k_limit_enable)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->k_limit_enable); + else if (IS_NVSD_ATTR(k_limit)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->k_limit); + else if (IS_NVSD_ATTR(sd_window_enable)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->sd_window_enable); + else if (IS_NVSD_ATTR(sd_window)) + res = snprintf(buf, PAGE_SIZE, + "x: %d, y: %d, w: %d, h: %d\n", + sd_settings->sd_window.h_position, + sd_settings->sd_window.v_position, + sd_settings->sd_window.h_size, + sd_settings->sd_window.v_size); + else if (IS_NVSD_ATTR(soft_clipping_enable)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->soft_clipping_enable); + else if (IS_NVSD_ATTR(soft_clipping_threshold)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->soft_clipping_threshold); + else if (IS_NVSD_ATTR(smooth_k_enable)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->smooth_k_enable); + else if (IS_NVSD_ATTR(smooth_k_incr)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->smooth_k_incr); + else if (IS_NVSD_ATTR(use_vpulse2)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->use_vpulse2); +#endif else if (IS_NVSD_ATTR(lut)) res = nvsd_lut_show(sd_settings, buf, res); else if (IS_NVSD_ATTR(bltf)) @@ -824,6 +942,37 @@ static ssize_t nvsd_settings_store(struct kobject *kobj, nvsd_check_and_update(0, 255, fc.time_limit); } else if (IS_NVSD_ATTR(fc_threshold)) { nvsd_check_and_update(0, 255, fc.threshold); +#ifdef CONFIG_TEGRA_SD_GEN2 + } else if (IS_NVSD_ATTR(k_limit_enable)) { + nvsd_check_and_update(0, 1, k_limit_enable); + } else if (IS_NVSD_ATTR(k_limit)) { + nvsd_check_and_update(128, 255, k_limit); + } else if (IS_NVSD_ATTR(sd_window_enable)) { + nvsd_check_and_update(0, 1, sd_window_enable); + } else if (IS_NVSD_ATTR(sd_window)) { + int ele[4], i = 0, num = 4; + nvsd_get_multi(ele, num, i, 0, LONG_MAX); + + if (i == num) { + sd_settings->sd_window.h_position = ele[0]; + sd_settings->sd_window.v_position = ele[1]; + sd_settings->sd_window.h_size = ele[2]; + sd_settings->sd_window.v_size = ele[3]; + settings_updated = true; + } else { + res = -EINVAL; + } + } else if (IS_NVSD_ATTR(soft_clipping_enable)) { + nvsd_check_and_update(0, 1, soft_clipping_enable); + } else if (IS_NVSD_ATTR(soft_clipping_threshold)) { + nvsd_check_and_update(0, 255, soft_clipping_threshold); + } else if (IS_NVSD_ATTR(smooth_k_enable)) { + nvsd_check_and_update(0, 1, smooth_k_enable); + } else if (IS_NVSD_ATTR(smooth_k_incr)) { + nvsd_check_and_update(0, 16320, smooth_k_incr); + } else if (IS_NVSD_ATTR(use_vpulse2)) { + nvsd_check_and_update(0, 1, use_vpulse2); +#endif } else if (IS_NVSD_ATTR(lut)) { if (nvsd_lut_store(sd_settings, buf)) res = -EINVAL; @@ -912,6 +1061,13 @@ static ssize_t nvsd_registers_show(struct kobject *kobj, NVSD_PRINT_REG(DC_DISP_SD_BL_CONTROL); NVSD_PRINT_REG(DC_DISP_SD_HW_K_VALUES); NVSD_PRINT_REG(DC_DISP_SD_MAN_K_VALUES); +#ifdef CONFIG_TEGRA_SD_GEN2 + NVSD_PRINT_REG(DC_DISP_SD_K_LIMIT); + NVSD_PRINT_REG(DC_DISP_SD_WINDOW_POSITION); + NVSD_PRINT_REG(DC_DISP_SD_WINDOW_SIZE); + NVSD_PRINT_REG(DC_DISP_SD_SOFT_CLIPPING); + NVSD_PRINT_REG(DC_DISP_SD_SMOOTH_K); +#endif return res; } diff --git a/drivers/video/tegra/fb.c b/drivers/video/tegra/fb.c index 0272b7f68f0..43c85c985b2 100644 --- a/drivers/video/tegra/fb.c +++ b/drivers/video/tegra/fb.c @@ -411,7 +411,11 @@ static int tegra_fb_ioctl(struct fb_info *info, unsigned int cmd, unsigned long return 0; } -int tegra_fb_get_mode(struct tegra_dc *dc) { +int tegra_fb_get_mode(struct tegra_dc *dc) +{ + if (!dc->fb->info->mode) + return -1; + return dc->fb->info->mode->refresh; } diff --git a/drivers/video/tegra/host/bus_client.c b/drivers/video/tegra/host/bus_client.c index e56adecedb2..0d253b46265 100644 --- a/drivers/video/tegra/host/bus_client.c +++ b/drivers/video/tegra/host/bus_client.c @@ -192,7 +192,8 @@ static int nvhost_channelopen(struct inode *inode, struct file *filp) } filp->private_data = priv; priv->ch = ch; - nvhost_module_add_client(ch->dev, priv); + if(nvhost_module_add_client(ch->dev, priv)) + goto fail; if (ch->ctxhandler && ch->ctxhandler->alloc) { priv->hwctx = ch->ctxhandler->alloc(ch->ctxhandler, ch); @@ -252,6 +253,11 @@ static void reset_submit(struct nvhost_channel_userctx *ctx) ctx->hdr.num_relocs = 0; ctx->num_relocshifts = 0; ctx->hdr.num_waitchks = 0; + + if (ctx->job) { + nvhost_job_put(ctx->job); + ctx->job = NULL; + } } static ssize_t nvhost_channelwrite(struct file *filp, const char __user *buf, diff --git a/drivers/video/tegra/host/gr3d/gr3d_t30.c b/drivers/video/tegra/host/gr3d/gr3d_t30.c index 8ca6b7b44b9..174bbde1124 100644 --- a/drivers/video/tegra/host/gr3d/gr3d_t30.c +++ b/drivers/video/tegra/host/gr3d/gr3d_t30.c @@ -425,6 +425,8 @@ struct nvhost_hwctx_handler *nvhost_gr3d_t30_ctxhandler_init( setup_save(p, save_ptr); + nvmap_munmap(p->save_buf, save_ptr); + p->h.alloc = ctx3d_alloc_v1; p->h.save_push = save_push_v1; p->h.save_service = NULL; diff --git a/drivers/video/tegra/host/nvhost_acm.c b/drivers/video/tegra/host/nvhost_acm.c index 7865583b0fa..2f4fa060018 100644 --- a/drivers/video/tegra/host/nvhost_acm.c +++ b/drivers/video/tegra/host/nvhost_acm.c @@ -331,15 +331,17 @@ void nvhost_module_remove_client(struct nvhost_device *dev, void *priv) { int i; struct nvhost_module_client *m; + int found = 0; mutex_lock(&client_list_lock); list_for_each_entry(m, &dev->client_list, node) { if (priv == m->priv) { list_del(&m->node); + found = 1; break; } } - if (m) { + if (found) { kfree(m); for (i = 0; i < dev->num_clks; i++) nvhost_module_update_rate(dev, i); diff --git a/drivers/video/tegra/host/nvhost_job.c b/drivers/video/tegra/host/nvhost_job.c index df7a62d689b..576be43ad2e 100644 --- a/drivers/video/tegra/host/nvhost_job.c +++ b/drivers/video/tegra/host/nvhost_job.c @@ -74,6 +74,8 @@ static int alloc_gathers(struct nvhost_job *job, 32, NVMAP_HANDLE_CACHEABLE, 0); if (IS_ERR_OR_NULL(job->gather_mem)) { err = PTR_ERR(job->gather_mem); + if (!job->gather_mem) + err = -ENOMEM; job->gather_mem = NULL; goto error; } @@ -83,6 +85,8 @@ static int alloc_gathers(struct nvhost_job *job, job->gathers = nvmap_mmap(job->gather_mem); if (IS_ERR_OR_NULL(job->gathers)) { err = PTR_ERR(job->gathers); + if (!job->gathers) + err = -ENOMEM; job->gathers = NULL; goto error; } diff --git a/drivers/video/tegra/nvmap/nvmap_heap.c b/drivers/video/tegra/nvmap/nvmap_heap.c index 7474f31534f..5ddebf6b99c 100644 --- a/drivers/video/tegra/nvmap/nvmap_heap.c +++ b/drivers/video/tegra/nvmap/nvmap_heap.c @@ -3,7 +3,7 @@ * * GPU heap allocator. * - * Copyright (c) 2011, NVIDIA Corporation. + * Copyright (c) 2012, NVIDIA Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -420,6 +420,9 @@ static struct nvmap_heap_block *do_heap_alloc(struct nvmap_heap *heap, list_for_each_entry(i, &heap->free_list, free_list) { size_t fix_size; fix_base = ALIGN(i->block.base, align); + if(!fix_base || fix_base >= i->block.base + i->size) + continue; + fix_size = i->size - (fix_base - i->block.base); /* needed for compaction. relocated chunk diff --git a/fs/Kconfig b/fs/Kconfig index 99453badf45..aebcee21e5d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -219,6 +219,7 @@ source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" +source "fs/f2fs/Kconfig" endif # MISC_FILESYSTEMS @@ -281,4 +282,10 @@ endif source "fs/nls/Kconfig" source "fs/dlm/Kconfig" +config DYNAMIC_FSYNC + bool "dynamic file sync control" + default n + help + An experimental file sync control using Android's early suspend / late resume drivers + endmenu diff --git a/fs/Makefile b/fs/Makefile index a8bbb322701..63e532972b3 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -120,9 +120,12 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ +obj-$(CONFIG_F2FS_FS) += f2fs/ obj-$(CONFIG_EXOFS_FS) += exofs/ obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ # Patched by YAFFS obj-$(CONFIG_YAFFS_FS) += yaffs2/ + +obj-$(CONFIG_DYNAMIC_FSYNC) += dyn_sync_cntrl.o diff --git a/fs/aio.c b/fs/aio.c index e29ec485af2..75e05c91605 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1622,7 +1622,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, struct kioctx *ctx; long ret = 0; int i; - struct blk_plug plug; if (unlikely(nr < 0)) return -EINVAL; @@ -1639,8 +1638,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, return -EINVAL; } - blk_start_plug(&plug); - /* * AKPM: should this return a partial result if some of the IOs were * successfully submitted? @@ -1663,7 +1660,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, if (ret) break; } - blk_finish_plug(&plug); put_ioctx(ctx); return i ? i : ret; diff --git a/fs/dcache.c b/fs/dcache.c index 8b732a205d5..239f5e664aa 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1438,7 +1438,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode) return alias; } -static struct dentry * d_find_any_alias(struct inode *inode) +struct dentry * d_find_any_alias(struct inode *inode) { struct dentry *de; @@ -1447,7 +1447,7 @@ static struct dentry * d_find_any_alias(struct inode *inode) spin_unlock(&inode->i_lock); return de; } - +EXPORT_SYMBOL(d_find_any_alias); /** * d_obtain_alias - find or allocate a dentry for a given inode diff --git a/fs/dyn_sync_cntrl.c b/fs/dyn_sync_cntrl.c new file mode 100644 index 00000000000..59b4ffbd669 --- /dev/null +++ b/fs/dyn_sync_cntrl.c @@ -0,0 +1,206 @@ +/* + * Author: Paul Reioux aka Faux123 + * + * Copyright 2013 Paul Reioux + * Copyright 2012 Paul Reioux + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DYN_FSYNC_VERSION_MAJOR 1 +#define DYN_FSYNC_VERSION_MINOR 2 + +/* + * fsync_mutex protects dyn_fsync_active during early suspend / late resume + * transitions + */ +static DEFINE_MUTEX(fsync_mutex); + +bool early_suspend_active __read_mostly = false; +bool dyn_fsync_active __read_mostly = true; + +static ssize_t dyn_fsync_active_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", (dyn_fsync_active ? 1 : 0)); +} + +static ssize_t dyn_fsync_active_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int data; + + if(sscanf(buf, "%u\n", &data) == 1) { + if (data == 1) { + pr_info("%s: dynamic fsync enabled\n", __FUNCTION__); + dyn_fsync_active = true; + } + else if (data == 0) { + pr_info("%s: dyanamic fsync disabled\n", __FUNCTION__); + dyn_fsync_active = false; + } + else + pr_info("%s: bad value: %u\n", __FUNCTION__, data); + } else + pr_info("%s: unknown input!\n", __FUNCTION__); + + return count; +} + +static ssize_t dyn_fsync_version_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "version: %u.%u by faux123\n", + DYN_FSYNC_VERSION_MAJOR, + DYN_FSYNC_VERSION_MINOR); +} + +static ssize_t dyn_fsync_earlysuspend_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "early suspend active: %u\n", early_suspend_active); +} + +static struct kobj_attribute dyn_fsync_active_attribute = + __ATTR(Dyn_fsync_active, 0666, + dyn_fsync_active_show, + dyn_fsync_active_store); + +static struct kobj_attribute dyn_fsync_version_attribute = + __ATTR(Dyn_fsync_version, 0444, dyn_fsync_version_show, NULL); + +static struct kobj_attribute dyn_fsync_earlysuspend_attribute = + __ATTR(Dyn_fsync_earlysuspend, 0444, dyn_fsync_earlysuspend_show, NULL); + +static struct attribute *dyn_fsync_active_attrs[] = + { + &dyn_fsync_active_attribute.attr, + &dyn_fsync_version_attribute.attr, + &dyn_fsync_earlysuspend_attribute.attr, + NULL, + }; + +static struct attribute_group dyn_fsync_active_attr_group = + { + .attrs = dyn_fsync_active_attrs, + }; + +static struct kobject *dyn_fsync_kobj; + +static void dyn_fsync_force_flush(void) +{ + /* flush all outstanding buffers */ + wakeup_flusher_threads(0); + sync_filesystems(0); + sync_filesystems(1); +} + +static void dyn_fsync_early_suspend(struct early_suspend *h) +{ + mutex_lock(&fsync_mutex); + if (dyn_fsync_active) { + early_suspend_active = true; + dyn_fsync_force_flush(); + } + mutex_unlock(&fsync_mutex); +} + +static void dyn_fsync_late_resume(struct early_suspend *h) +{ + mutex_lock(&fsync_mutex); + early_suspend_active = false; + mutex_unlock(&fsync_mutex); +} + +static struct early_suspend dyn_fsync_early_suspend_handler = + { + .level = EARLY_SUSPEND_LEVEL_BLANK_SCREEN, + .suspend = dyn_fsync_early_suspend, + .resume = dyn_fsync_late_resume, + }; + +static int dyn_fsync_panic_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + early_suspend_active = true; + dyn_fsync_force_flush(); + //pr_warn("dyn fsync: panic: force flush!\n"); + + return NOTIFY_DONE; +} + +static struct notifier_block dyn_fsync_panic_block = { + .notifier_call = dyn_fsync_panic_event, + .priority = INT_MAX, +}; + +static int dyn_fsync_notify_sys(struct notifier_block *this, unsigned long code, + void *unused) +{ + if (code == SYS_DOWN || code == SYS_HALT) { + early_suspend_active = true; + dyn_fsync_force_flush(); + //pr_warn("dyn fsync: reboot: force flush!\n"); + } + return NOTIFY_DONE; +} + +static struct notifier_block dyn_fsync_notifier = { + .notifier_call = dyn_fsync_notify_sys, +}; + +static int dyn_fsync_init(void) +{ + int sysfs_result; + + register_early_suspend(&dyn_fsync_early_suspend_handler); + register_reboot_notifier(&dyn_fsync_notifier); + atomic_notifier_chain_register(&panic_notifier_list, + &dyn_fsync_panic_block); + + dyn_fsync_kobj = kobject_create_and_add("dyn_fsync", kernel_kobj); + if (!dyn_fsync_kobj) { + pr_err("%s dyn_fsync kobject create failed!\n", __FUNCTION__); + return -ENOMEM; + } + + sysfs_result = sysfs_create_group(dyn_fsync_kobj, + &dyn_fsync_active_attr_group); + + if (sysfs_result) { + pr_info("%s dyn_fsync sysfs create failed!\n", __FUNCTION__); + kobject_put(dyn_fsync_kobj); + } + return sysfs_result; +} + +static void dyn_fsync_exit(void) +{ + unregister_early_suspend(&dyn_fsync_early_suspend_handler); + unregister_reboot_notifier(&dyn_fsync_notifier); + atomic_notifier_chain_unregister(&panic_notifier_list, + &dyn_fsync_panic_block); + + if (dyn_fsync_kobj != NULL) + kobject_put(dyn_fsync_kobj); +} + +module_init(dyn_fsync_init); +module_exit(dyn_fsync_exit); diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig new file mode 100644 index 00000000000..e5ad9500e6a --- /dev/null +++ b/fs/f2fs/Kconfig @@ -0,0 +1,66 @@ +config F2FS_FS + tristate "F2FS filesystem support (EXPERIMENTAL)" + depends on BLOCK + help + F2FS is based on Log-structured File System (LFS), which supports + versatile "flash-friendly" features. The design has been focused on + addressing the fundamental issues in LFS, which are snowball effect + of wandering tree and high cleaning overhead. + + Since flash-based storages show different characteristics according to + the internal geometry or flash memory management schemes aka FTL, F2FS + and tools support various parameters not only for configuring on-disk + layout, but also for selecting allocation and cleaning algorithms. + + If unsure, say N. + +config F2FS_STAT_FS + bool "F2FS Status Information" + depends on F2FS_FS && DEBUG_FS + default y + help + /sys/kernel/debug/f2fs/ contains information about all the partitions + mounted as f2fs. Each file shows the whole f2fs information. + + /sys/kernel/debug/f2fs/status includes: + - major file system information managed by f2fs currently + - average SIT information about whole segments + - current memory footprint consumed by f2fs. + +config F2FS_FS_XATTR + bool "F2FS extended attributes" + depends on F2FS_FS + default y + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page, or visit + for details). + + If unsure, say N. + +config F2FS_FS_POSIX_ACL + bool "F2FS Access Control Lists" + depends on F2FS_FS_XATTR + select FS_POSIX_ACL + default y + help + Posix Access Control Lists (ACLs) support permissions for users and + gourps beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website . + + If you don't know what Access Control Lists are, say N + +config F2FS_FS_SECURITY + bool "F2FS Security Labels" + depends on F2FS_FS_XATTR + help + Security labels provide an access control facility to support Linux + Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO + Linux. This option enables an extended attribute handler for file + security labels in the f2fs filesystem, so that it requires enabling + the extended attribute support in advance. + + If you are not using a security module, say N. + diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile new file mode 100644 index 00000000000..bcc0bc34559 --- /dev/null +++ b/fs/f2fs/Makefile @@ -0,0 +1,8 @@ +obj-$(CONFIG_F2FS_FS) += f2fs.o + +f2fs-y := dir.o file.o inode.o namei.o hash.o super.o +f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o +f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o +f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o +f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o + diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c new file mode 100644 index 00000000000..a989e40ad9b --- /dev/null +++ b/fs/f2fs/acl.c @@ -0,0 +1,425 @@ +/* + * fs/f2fs/acl.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Copyright (c) 2014 XPerience(R) Project + * + * Portions of this code from linux/fs/ext2/acl.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include "f2fs.h" +#include "xattr.h" +#include "acl.h" + +#define get_inode_mode(i) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ + (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) + +static inline size_t f2fs_acl_size(int count) +{ + if (count <= 4) { + return sizeof(struct f2fs_acl_header) + + count * sizeof(struct f2fs_acl_entry_short); + } else { + return sizeof(struct f2fs_acl_header) + + 4 * sizeof(struct f2fs_acl_entry_short) + + (count - 4) * sizeof(struct f2fs_acl_entry); + } +} + +static inline int f2fs_acl_count(size_t size) +{ + ssize_t s; + size -= sizeof(struct f2fs_acl_header); + s = size - 4 * sizeof(struct f2fs_acl_entry_short); + if (s < 0) { + if (size % sizeof(struct f2fs_acl_entry_short)) + return -1; + return size / sizeof(struct f2fs_acl_entry_short); + } else { + if (s % sizeof(struct f2fs_acl_entry)) + return -1; + return s / sizeof(struct f2fs_acl_entry) + 4; + } +} + +static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) +{ + int i, count; + struct posix_acl *acl; + struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value; + struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); + const char *end = value + size; + + if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION)) + return ERR_PTR(-EINVAL); + + count = f2fs_acl_count(size); + if (count < 0) + return ERR_PTR(-EINVAL); + if (count == 0) + return NULL; + + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + + if ((char *)entry > end) + goto fail; + + acl->a_entries[i].e_tag = le16_to_cpu(entry->e_tag); + acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm); + + switch (acl->a_entries[i].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry_short)); + break; + + case ACL_USER: + case ACL_GROUP: + acl->a_entries[i].e_id = le32_to_cpu(entry->e_id); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + default: + goto fail; + } + } + if ((char *)entry != end) + goto fail; + return acl; +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) +{ + struct f2fs_acl_header *f2fs_acl; + struct f2fs_acl_entry *entry; + int i; + + f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * + sizeof(struct f2fs_acl_entry), GFP_KERNEL); + if (!f2fs_acl) + return ERR_PTR(-ENOMEM); + + f2fs_acl->a_version = cpu_to_le32(F2FS_ACL_VERSION); + entry = (struct f2fs_acl_entry *)(f2fs_acl + 1); + + for (i = 0; i < acl->a_count; i++) { + + entry->e_tag = cpu_to_le16(acl->a_entries[i].e_tag); + entry->e_perm = cpu_to_le16(acl->a_entries[i].e_perm); + + switch (acl->a_entries[i].e_tag) { + case ACL_USER: + case ACL_GROUP: + entry->e_id = cpu_to_le32(acl->a_entries[i].e_id); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry_short)); + break; + default: + goto fail; + } + } + *size = f2fs_acl_size(acl->a_count); + return (void *)f2fs_acl; + +fail: + kfree(f2fs_acl); + return ERR_PTR(-EINVAL); +} + +struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; + void *value = NULL; + struct posix_acl *acl; + int retval; + + if (!test_opt(sbi, POSIX_ACL)) + return NULL; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + if (type == ACL_TYPE_ACCESS) + name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; + + retval = f2fs_getxattr(inode, name_index, "", NULL, 0); + if (retval > 0) { + value = kmalloc(retval, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + retval = f2fs_getxattr(inode, name_index, "", value, retval); + } + + if (retval > 0) + acl = f2fs_acl_from_disk(value, retval); + else if (retval == -ENODATA) + acl = NULL; + else + acl = ERR_PTR(retval); + kfree(value); + + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); + + return acl; +} + +static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode_info *fi = F2FS_I(inode); + int name_index; + void *value = NULL; + size_t size = 0; + int error; + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; + if (acl) { + error = posix_acl_equiv_mode(acl, &inode->i_mode); + if (error < 0) + return error; + set_acl_inode(fi, inode->i_mode); + if (error == 0) + acl = NULL; + } + break; + + case ACL_TYPE_DEFAULT: + name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + + default: + return -EINVAL; + } + + if (acl) { + value = f2fs_acl_to_disk(acl, &size); + if (IS_ERR(value)) { + cond_clear_inode_flag(fi, FI_ACL_MODE); + return (int)PTR_ERR(value); + } + } + + error = f2fs_setxattr(inode, name_index, "", value, size, NULL); + + kfree(value); + if (!error) + set_cached_acl(inode, type, acl); + + cond_clear_inode_flag(fi, FI_ACL_MODE); + return error; +} + +int f2fs_init_acl(struct inode *inode, struct inode *dir) +{ + struct posix_acl *acl = NULL; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + int error = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (test_opt(sbi, POSIX_ACL)) { + acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl && !(test_opt(sbi, ANDROID_EMU) && + F2FS_I(inode)->i_advise & FADVISE_ANDROID_EMU)) + inode->i_mode &= ~current_umask(); + } + + if (test_opt(sbi, POSIX_ACL) && acl) { + + if (S_ISDIR(inode->i_mode)) { + error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl); + if (error) + goto cleanup; + } + error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); + if (error < 0) + return error; + if (error > 0) + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); + } +cleanup: + posix_acl_release(acl); + return error; +} + +int f2fs_acl_chmod(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct posix_acl *acl; + int error; + mode_t mode = get_inode_mode(inode); + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + if (S_ISLNK(mode)) + return -EOPNOTSUPP; + + acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + error = posix_acl_chmod(&acl, GFP_KERNEL, mode); + if (error) + return error; + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); + posix_acl_release(acl); + return error; +} + +int f2fs_android_emu(struct f2fs_sb_info *sbi, struct inode *inode, + u32 *uid, u32 *gid, umode_t *mode) +{ + F2FS_I(inode)->i_advise |= FADVISE_ANDROID_EMU; + + if (uid) + *uid = sbi->android_emu_uid; + if (gid) + *gid = sbi->android_emu_gid; + if (mode) { + *mode = (*mode & ~S_IRWXUGO) | sbi->android_emu_mode; + if (F2FS_I(inode)->i_advise & FADVISE_ANDROID_EMU_ROOT) + *mode &= ~S_IRWXO; + if (S_ISDIR(*mode)) { + if (*mode & S_IRUSR) + *mode |= S_IXUSR; + if (*mode & S_IRGRP) + *mode |= S_IXGRP; + if (*mode & S_IROTH) + *mode |= S_IXOTH; + } + } + + return 0; +} + +static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + const char *xname = POSIX_ACL_XATTR_DEFAULT; + size_t size; + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + + if (type == ACL_TYPE_ACCESS) + xname = POSIX_ACL_XATTR_ACCESS; + + size = strlen(xname) + 1; + if (list && size <= list_size) + memcpy(list, xname, size); + return size; +} + +static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct posix_acl *acl; + int error; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(sbi, POSIX_ACL)) + return -EOPNOTSUPP; + + acl = f2fs_get_acl(dentry->d_inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (!acl) + return -ENODATA; + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct inode *inode = dentry->d_inode; + struct posix_acl *acl = NULL; + int error; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(sbi, POSIX_ACL)) + return -EOPNOTSUPP; + if (!inode_owner_or_capable(inode)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + error = posix_acl_valid(acl); + if (error) + goto release_and_out; + } + } else { + acl = NULL; + } + + error = f2fs_set_acl(inode, type, acl); + +release_and_out: + posix_acl_release(acl); + return error; +} + +const struct xattr_handler f2fs_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .list = f2fs_xattr_list_acl, + .get = f2fs_xattr_get_acl, + .set = f2fs_xattr_set_acl, +}; + +const struct xattr_handler f2fs_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .flags = ACL_TYPE_ACCESS, + .list = f2fs_xattr_list_acl, + .get = f2fs_xattr_get_acl, + .set = f2fs_xattr_set_acl, +}; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h new file mode 100644 index 00000000000..813f545475b --- /dev/null +++ b/fs/f2fs/acl.h @@ -0,0 +1,60 @@ +/* + * fs/f2fs/acl.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Copyright (c) 2014 XPerience(R) Project + * + * + * Portions of this code from linux/fs/ext2/acl.h + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __F2FS_ACL_H__ +#define __F2FS_ACL_H__ + +#include + +#define F2FS_ACL_VERSION 0x0001 + +struct f2fs_acl_entry { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +}; + +struct f2fs_acl_entry_short { + __le16 e_tag; + __le16 e_perm; +}; + +struct f2fs_acl_header { + __le32 a_version; +}; + +#ifdef CONFIG_F2FS_FS_POSIX_ACL + +extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type); +extern int f2fs_acl_chmod(struct inode *inode); +extern int f2fs_init_acl(struct inode *inode, struct inode *dir); +#else +#define f2fs_check_acl NULL +#define f2fs_get_acl NULL +#define f2fs_set_acl NULL + +static inline int f2fs_acl_chmod(struct inode *inode) +{ + return 0; +} + +static inline int f2fs_init_acl(struct inode *inode, struct inode *dir) +{ + return 0; +} +#endif +#endif /* __F2FS_ACL_H__ */ diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c new file mode 100644 index 00000000000..58ebfee6e59 --- /dev/null +++ b/fs/f2fs/checkpoint.c @@ -0,0 +1,862 @@ +/* + * fs/f2fs/checkpoint.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Copyright (c) 2014 XPerience(R) Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include + +static struct kmem_cache *orphan_entry_slab; +static struct kmem_cache *inode_entry_slab; + +/* + * We guarantee no failure on the returned page. + */ +struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct page *page = NULL; +repeat: + page = grab_cache_page(mapping, index); + if (!page) { + cond_resched(); + goto repeat; + } + + /* We wait writeback only inside grab_meta_page() */ + wait_on_page_writeback(page); + SetPageUptodate(page); + return page; +} + +/* + * We guarantee no failure on the returned page. + */ +struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct page *page; +repeat: + page = grab_cache_page(mapping, index); + if (!page) { + cond_resched(); + goto repeat; + } + if (PageUptodate(page)) + goto out; + + if (f2fs_readpage(sbi, page, index, READ_SYNC)) + goto repeat; + + lock_page(page); + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } +out: + mark_page_accessed(page); + return page; +} + +static int f2fs_write_meta_page(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + + /* Should not write any meta pages, if any IO error was occurred */ + if (wbc->for_reclaim || sbi->por_doing || + is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { + dec_page_count(sbi, F2FS_DIRTY_META); + wbc->pages_skipped++; + set_page_dirty(page); + return AOP_WRITEPAGE_ACTIVATE; + } + + wait_on_page_writeback(page); + + write_meta_page(sbi, page); + dec_page_count(sbi, F2FS_DIRTY_META); + unlock_page(page); + return 0; +} + +static int f2fs_write_meta_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + struct block_device *bdev = sbi->sb->s_bdev; + long written; + + if (wbc->for_kupdate) + return 0; + + if (get_pages(sbi, F2FS_DIRTY_META) == 0) + return 0; + + /* if mounting is failed, skip writing node pages */ + mutex_lock(&sbi->cp_mutex); + written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev)); + mutex_unlock(&sbi->cp_mutex); + wbc->nr_to_write -= written; + return 0; +} + +long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, + long nr_to_write) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + pgoff_t index = 0, end = LONG_MAX; + struct pagevec pvec; + long nwritten = 0; + struct writeback_control wbc = { + .for_reclaim = 0, + }; + + pagevec_init(&pvec, 0); + + while (index <= end) { + int i, nr_pages; + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + lock_page(page); + BUG_ON(page->mapping != mapping); + BUG_ON(!PageDirty(page)); + clear_page_dirty_for_io(page); + if (f2fs_write_meta_page(page, &wbc)) { + unlock_page(page); + break; + } + if (nwritten++ >= nr_to_write) + break; + } + pagevec_release(&pvec); + cond_resched(); + } + + if (nwritten) + f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX); + + return nwritten; +} + +static int f2fs_set_meta_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + + SetPageUptodate(page); + if (!PageDirty(page)) { + __set_page_dirty_nobuffers(page); + inc_page_count(sbi, F2FS_DIRTY_META); + return 1; + } + return 0; +} + +const struct address_space_operations f2fs_meta_aops = { + .writepage = f2fs_write_meta_page, + .writepages = f2fs_write_meta_pages, + .set_page_dirty = f2fs_set_meta_page_dirty, +}; + +int acquire_orphan_inode(struct f2fs_sb_info *sbi) +{ + unsigned int max_orphans; + int err = 0; + + /* + * considering 512 blocks in a segment 5 blocks are needed for cp + * and log segment summaries. Remaining blocks are used to keep + * orphan entries with the limitation one reserved segment + * for cp pack we can have max 1020*507 orphan entries + */ + max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK; + mutex_lock(&sbi->orphan_inode_mutex); + if (sbi->n_orphans >= max_orphans) + err = -ENOSPC; + else + sbi->n_orphans++; + mutex_unlock(&sbi->orphan_inode_mutex); + return err; +} + +void release_orphan_inode(struct f2fs_sb_info *sbi) +{ + mutex_lock(&sbi->orphan_inode_mutex); + if (sbi->n_orphans == 0) { + f2fs_msg(sbi->sb, KERN_ERR, "releasing " + "unacquired orphan inode"); + f2fs_handle_error(sbi); + } else + sbi->n_orphans--; + mutex_unlock(&sbi->orphan_inode_mutex); +} + +void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head, *this; + struct orphan_inode_entry *new = NULL, *orphan = NULL; + + mutex_lock(&sbi->orphan_inode_mutex); + head = &sbi->orphan_inode_list; + list_for_each(this, head) { + orphan = list_entry(this, struct orphan_inode_entry, list); + if (orphan->ino == ino) + goto out; + if (orphan->ino > ino) + break; + orphan = NULL; + } +retry: + new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); + if (!new) { + cond_resched(); + goto retry; + } + new->ino = ino; + + /* add new_oentry into list which is sorted by inode number */ + if (orphan) + list_add(&new->list, this->prev); + else + list_add_tail(&new->list, head); +out: + mutex_unlock(&sbi->orphan_inode_mutex); +} + +void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head; + struct orphan_inode_entry *orphan; + + mutex_lock(&sbi->orphan_inode_mutex); + head = &sbi->orphan_inode_list; + list_for_each_entry(orphan, head, list) { + if (orphan->ino == ino) { + list_del(&orphan->list); + kmem_cache_free(orphan_entry_slab, orphan); + if (sbi->n_orphans == 0) { + f2fs_msg(sbi->sb, KERN_ERR, "removing " + "unacquired orphan inode %d", + ino); + f2fs_handle_error(sbi); + } else + sbi->n_orphans--; + break; + } + } + mutex_unlock(&sbi->orphan_inode_mutex); +} + +static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct inode *inode = f2fs_iget(sbi->sb, ino); + if (IS_ERR(inode)) { + f2fs_msg(sbi->sb, KERN_ERR, "unable to recover orphan inode %d", + ino); + f2fs_handle_error(sbi); + return; + } + clear_nlink(inode); + + /* truncate all the data during iput */ + iput(inode); +} + +int recover_orphan_inodes(struct f2fs_sb_info *sbi) +{ + block_t start_blk, orphan_blkaddr, i, j; + + if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) + return 0; + + sbi->por_doing = 1; + start_blk = __start_cp_addr(sbi) + 1; + orphan_blkaddr = __start_sum_addr(sbi) - 1; + + for (i = 0; i < orphan_blkaddr; i++) { + struct page *page = get_meta_page(sbi, start_blk + i); + struct f2fs_orphan_block *orphan_blk; + + orphan_blk = (struct f2fs_orphan_block *)page_address(page); + for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { + nid_t ino = le32_to_cpu(orphan_blk->ino[j]); + recover_orphan_inode(sbi, ino); + } + f2fs_put_page(page, 1); + } + /* clear Orphan Flag */ + clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); + sbi->por_doing = 0; + return 0; +} + +static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) +{ + struct list_head *head, *this, *next; + struct f2fs_orphan_block *orphan_blk = NULL; + struct page *page = NULL; + unsigned int nentries = 0; + unsigned short index = 1; + unsigned short orphan_blocks; + + orphan_blocks = (unsigned short)((sbi->n_orphans + + (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); + + mutex_lock(&sbi->orphan_inode_mutex); + head = &sbi->orphan_inode_list; + + /* loop for each orphan inode entry and write them in Jornal block */ + list_for_each_safe(this, next, head) { + struct orphan_inode_entry *orphan; + + orphan = list_entry(this, struct orphan_inode_entry, list); + + if (nentries == F2FS_ORPHANS_PER_BLOCK) { + /* + * an orphan block is full of 1020 entries, + * then we need to flush current orphan blocks + * and bring another one in memory + */ + orphan_blk->blk_addr = cpu_to_le16(index); + orphan_blk->blk_count = cpu_to_le16(orphan_blocks); + orphan_blk->entry_count = cpu_to_le32(nentries); + set_page_dirty(page); + f2fs_put_page(page, 1); + index++; + start_blk++; + nentries = 0; + page = NULL; + } + if (page) + goto page_exist; + + page = grab_meta_page(sbi, start_blk); + orphan_blk = (struct f2fs_orphan_block *)page_address(page); + memset(orphan_blk, 0, sizeof(*orphan_blk)); +page_exist: + orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); + } + if (!page) + goto end; + + orphan_blk->blk_addr = cpu_to_le16(index); + orphan_blk->blk_count = cpu_to_le16(orphan_blocks); + orphan_blk->entry_count = cpu_to_le32(nentries); + set_page_dirty(page); + f2fs_put_page(page, 1); +end: + mutex_unlock(&sbi->orphan_inode_mutex); +} + +static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, + block_t cp_addr, unsigned long long *version) +{ + struct page *cp_page_1, *cp_page_2 = NULL; + unsigned long blk_size = sbi->blocksize; + struct f2fs_checkpoint *cp_block; + unsigned long long cur_version = 0, pre_version = 0; + size_t crc_offset; + __u32 crc = 0; + + /* Read the 1st cp block in this CP pack */ + cp_page_1 = get_meta_page(sbi, cp_addr); + + /* get the version number */ + cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1); + crc_offset = le32_to_cpu(cp_block->checksum_offset); + if (crc_offset >= blk_size) + goto invalid_cp1; + + crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + if (!f2fs_crc_valid(crc, cp_block, crc_offset)) + goto invalid_cp1; + + pre_version = cur_cp_version(cp_block); + + /* Read the 2nd cp block in this CP pack */ + cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; + cp_page_2 = get_meta_page(sbi, cp_addr); + + cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2); + crc_offset = le32_to_cpu(cp_block->checksum_offset); + if (crc_offset >= blk_size) + goto invalid_cp2; + + crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + if (!f2fs_crc_valid(crc, cp_block, crc_offset)) + goto invalid_cp2; + + cur_version = cur_cp_version(cp_block); + + if (cur_version == pre_version) { + *version = cur_version; + f2fs_put_page(cp_page_2, 1); + return cp_page_1; + } +invalid_cp2: + f2fs_put_page(cp_page_2, 1); +invalid_cp1: + f2fs_put_page(cp_page_1, 1); + return NULL; +} + +int get_valid_checkpoint(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *cp_block; + struct f2fs_super_block *fsb = sbi->raw_super; + struct page *cp1, *cp2, *cur_page; + unsigned long blk_size = sbi->blocksize; + unsigned long long cp1_version = 0, cp2_version = 0; + unsigned long long cp_start_blk_no; + + sbi->ckpt = kzalloc(blk_size, GFP_KERNEL); + if (!sbi->ckpt) + return -ENOMEM; + /* + * Finding out valid cp block involves read both + * sets( cp pack1 and cp pack 2) + */ + cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); + cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); + + /* The second checkpoint pack should start at the next segment */ + cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); + cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); + + if (cp1 && cp2) { + if (ver_after(cp2_version, cp1_version)) + cur_page = cp2; + else + cur_page = cp1; + } else if (cp1) { + cur_page = cp1; + } else if (cp2) { + cur_page = cp2; + } else { + goto fail_no_cp; + } + + cp_block = (struct f2fs_checkpoint *)page_address(cur_page); + memcpy(sbi->ckpt, cp_block, blk_size); + + f2fs_put_page(cp1, 1); + f2fs_put_page(cp2, 1); + return 0; + +fail_no_cp: + kfree(sbi->ckpt); + return -EINVAL; +} + +static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode == inode) + return -EEXIST; + } + list_add_tail(&new->list, head); +#ifdef CONFIG_F2FS_STAT_FS + sbi->n_dirty_dirs++; +#endif + return 0; +} + +void set_dirty_dir_page(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct dir_inode_entry *new; + + if (!S_ISDIR(inode->i_mode)) + return; +retry: + new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + if (!new) { + cond_resched(); + goto retry; + } + new->inode = inode; + INIT_LIST_HEAD(&new->list); + + spin_lock(&sbi->dir_inode_lock); + if (__add_dirty_inode(inode, new)) + kmem_cache_free(inode_entry_slab, new); + + inc_page_count(sbi, F2FS_DIRTY_DENTS); + inode_inc_dirty_dents(inode); + SetPagePrivate(page); + spin_unlock(&sbi->dir_inode_lock); +} + +void add_dirty_dir_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct dir_inode_entry *new; +retry: + new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + if (!new) { + cond_resched(); + goto retry; + } + new->inode = inode; + INIT_LIST_HEAD(&new->list); + + spin_lock(&sbi->dir_inode_lock); + if (__add_dirty_inode(inode, new)) + kmem_cache_free(inode_entry_slab, new); + spin_unlock(&sbi->dir_inode_lock); +} + +void remove_dirty_dir_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + + if (!S_ISDIR(inode->i_mode)) + return; + + spin_lock(&sbi->dir_inode_lock); + if (atomic_read(&F2FS_I(inode)->dirty_dents)) { + spin_unlock(&sbi->dir_inode_lock); + return; + } + + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode == inode) { + list_del(&entry->list); + kmem_cache_free(inode_entry_slab, entry); +#ifdef CONFIG_F2FS_STAT_FS + sbi->n_dirty_dirs--; +#endif + break; + } + } + spin_unlock(&sbi->dir_inode_lock); + + /* Only from the recovery routine */ + if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { + clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); + iput(inode); + } +} + +struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + struct inode *inode = NULL; + + spin_lock(&sbi->dir_inode_lock); + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode->i_ino == ino) { + inode = entry->inode; + break; + } + } + spin_unlock(&sbi->dir_inode_lock); + return inode; +} + +void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &sbi->dir_inode_list; + struct dir_inode_entry *entry; + struct inode *inode; +retry: + spin_lock(&sbi->dir_inode_lock); + if (list_empty(head)) { + spin_unlock(&sbi->dir_inode_lock); + return; + } + entry = list_entry(head->next, struct dir_inode_entry, list); + inode = igrab(entry->inode); + spin_unlock(&sbi->dir_inode_lock); + if (inode) { + filemap_flush(inode->i_mapping); + iput(inode); + } else { + /* + * We should submit bio, since it exists several + * wribacking dentry pages in the freeing inode. + */ + f2fs_submit_bio(sbi, DATA, true); + } + goto retry; +} + +/* + * Freeze all the FS-operations for checkpoint. + */ +static void block_operations(struct f2fs_sb_info *sbi) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + struct blk_plug plug; + + blk_start_plug(&plug); + +retry_flush_dents: + mutex_lock_all(sbi); + + /* write all the dirty dentry pages */ + if (get_pages(sbi, F2FS_DIRTY_DENTS)) { + mutex_unlock_all(sbi); + sync_dirty_dir_inodes(sbi); + goto retry_flush_dents; + } + + /* + * POR: we should ensure that there is no dirty node pages + * until finishing nat/sit flush. + */ +retry_flush_nodes: + mutex_lock(&sbi->node_write); + + if (get_pages(sbi, F2FS_DIRTY_NODES)) { + mutex_unlock(&sbi->node_write); + sync_node_pages(sbi, 0, &wbc); + goto retry_flush_nodes; + } + blk_finish_plug(&plug); +} + +static void unblock_operations(struct f2fs_sb_info *sbi) +{ + mutex_unlock(&sbi->node_write); + mutex_unlock_all(sbi); +} + +static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + nid_t last_nid = 0; + block_t start_blk; + struct page *cp_page; + unsigned int data_sum_blocks, orphan_blocks; + __u32 crc32 = 0; + void *kaddr; + int i; + + /* Flush all the NAT/SIT pages */ + while (get_pages(sbi, F2FS_DIRTY_META)) + sync_meta_pages(sbi, META, LONG_MAX); + + next_free_nid(sbi, &last_nid); + + /* + * modify checkpoint + * version number is already updated + */ + ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); + ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); + ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); + for (i = 0; i < 3; i++) { + ckpt->cur_node_segno[i] = + cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); + ckpt->cur_node_blkoff[i] = + cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE)); + ckpt->alloc_type[i + CURSEG_HOT_NODE] = + curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); + } + for (i = 0; i < 3; i++) { + ckpt->cur_data_segno[i] = + cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); + ckpt->cur_data_blkoff[i] = + cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA)); + ckpt->alloc_type[i + CURSEG_HOT_DATA] = + curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); + } + + ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); + ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); + ckpt->next_free_nid = cpu_to_le32(last_nid); + + /* 2 cp + n data seg summary + orphan inode blocks */ + data_sum_blocks = npages_for_summary_flush(sbi); + if (data_sum_blocks < 3) + set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); + else + clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); + + orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) + / F2FS_ORPHANS_PER_BLOCK; + ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); + + if (is_umount) { + set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); + } else { + clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + data_sum_blocks + orphan_blocks); + } + + if (sbi->n_orphans) + set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + else + clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + + /* update SIT/NAT bitmap */ + get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); + get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); + + crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); + *((__le32 *)((unsigned char *)ckpt + + le32_to_cpu(ckpt->checksum_offset))) + = cpu_to_le32(crc32); + + start_blk = __start_cp_addr(sbi); + + /* write out checkpoint buffer at block 0 */ + cp_page = grab_meta_page(sbi, start_blk++); + kaddr = page_address(cp_page); + memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + set_page_dirty(cp_page); + f2fs_put_page(cp_page, 1); + + if (sbi->n_orphans) { + write_orphan_inodes(sbi, start_blk); + start_blk += orphan_blocks; + } + + write_data_summaries(sbi, start_blk); + start_blk += data_sum_blocks; + if (is_umount) { + write_node_summaries(sbi, start_blk); + start_blk += NR_CURSEG_NODE_TYPE; + } + + /* writeout checkpoint block */ + cp_page = grab_meta_page(sbi, start_blk); + kaddr = page_address(cp_page); + memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + set_page_dirty(cp_page); + f2fs_put_page(cp_page, 1); + + /* wait for previous submitted node/meta pages writeback */ + while (get_pages(sbi, F2FS_WRITEBACK)) + congestion_wait(BLK_RW_ASYNC, HZ / 50); + + filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); + filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); + + /* update user_block_counts */ + sbi->last_valid_block_count = sbi->total_valid_block_count; + sbi->alloc_valid_block_count = 0; + + /* Here, we only have one bio having CP pack */ + sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + + if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { + clear_prefree_segments(sbi); + F2FS_RESET_SB_DIRT(sbi); + } +} + +/* + * We guarantee that this checkpoint procedure should not fail. + */ +void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long long ckpt_ver; + + trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); + + mutex_lock(&sbi->cp_mutex); + block_operations(sbi); + + trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); + + f2fs_submit_bio(sbi, DATA, true); + f2fs_submit_bio(sbi, NODE, true); + f2fs_submit_bio(sbi, META, true); + + /* + * update checkpoint pack index + * Increase the version number so that + * SIT entries and seg summaries are written at correct place + */ + ckpt_ver = cur_cp_version(ckpt); + ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); + + /* write cached NAT/SIT entries to NAT/SIT area */ + flush_nat_entries(sbi); + flush_sit_entries(sbi); + + /* unlock all the fs_lock[] in do_checkpoint() */ + do_checkpoint(sbi, is_umount); + + unblock_operations(sbi); + mutex_unlock(&sbi->cp_mutex); + + trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); +} + +void init_orphan_info(struct f2fs_sb_info *sbi) +{ + mutex_init(&sbi->orphan_inode_mutex); + INIT_LIST_HEAD(&sbi->orphan_inode_list); + sbi->n_orphans = 0; +} + +int __init create_checkpoint_caches(void) +{ + orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", + sizeof(struct orphan_inode_entry), NULL); + if (unlikely(!orphan_entry_slab)) + return -ENOMEM; + inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", + sizeof(struct dir_inode_entry), NULL); + if (unlikely(!inode_entry_slab)) { + kmem_cache_destroy(orphan_entry_slab); + return -ENOMEM; + } + return 0; +} + +void destroy_checkpoint_caches(void) +{ + kmem_cache_destroy(orphan_entry_slab); + kmem_cache_destroy(inode_entry_slab); +} diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c new file mode 100644 index 00000000000..f47a72cfa6e --- /dev/null +++ b/fs/f2fs/data.c @@ -0,0 +1,792 @@ +/* + * fs/f2fs/data.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Copyright (c) 2014 XPerience(R) Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include + +/* + * Lock ordering for the change of data block address: + * ->data_page + * ->node_page + * update block addresses in the node page + */ +static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) +{ + struct f2fs_node *rn; + __le32 *addr_array; + struct page *node_page = dn->node_page; + unsigned int ofs_in_node = dn->ofs_in_node; + + f2fs_wait_on_page_writeback(node_page, NODE, false); + + rn = F2FS_NODE(node_page); + + /* Get physical address of data block */ + addr_array = blkaddr_in_node(rn); + addr_array[ofs_in_node] = cpu_to_le32(new_addr); + set_page_dirty(node_page); +} + +int reserve_new_block(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + + if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) + return -EPERM; + if (!inc_valid_block_count(sbi, dn->inode, 1)) + return -ENOSPC; + + trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); + + __set_data_blkaddr(dn, NEW_ADDR); + dn->data_blkaddr = NEW_ADDR; + sync_inode_page(dn); + return 0; +} + +static int check_extent_cache(struct inode *inode, pgoff_t pgofs, + struct buffer_head *bh_result) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); +#ifdef CONFIG_F2FS_STAT_FS + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +#endif + pgoff_t start_fofs, end_fofs; + block_t start_blkaddr; + + read_lock(&fi->ext.ext_lock); + if (fi->ext.len == 0) { + read_unlock(&fi->ext.ext_lock); + return 0; + } + +#ifdef CONFIG_F2FS_STAT_FS + sbi->total_hit_ext++; +#endif + start_fofs = fi->ext.fofs; + end_fofs = fi->ext.fofs + fi->ext.len - 1; + start_blkaddr = fi->ext.blk_addr; + + if (pgofs >= start_fofs && pgofs <= end_fofs) { + unsigned int blkbits = inode->i_sb->s_blocksize_bits; + size_t count; + + clear_buffer_new(bh_result); + map_bh(bh_result, inode->i_sb, + start_blkaddr + pgofs - start_fofs); + count = end_fofs - pgofs + 1; + if (count < (UINT_MAX >> blkbits)) + bh_result->b_size = (count << blkbits); + else + bh_result->b_size = UINT_MAX; + +#ifdef CONFIG_F2FS_STAT_FS + sbi->read_hit_ext++; +#endif + read_unlock(&fi->ext.ext_lock); + return 1; + } + read_unlock(&fi->ext.ext_lock); + return 0; +} + +void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) +{ + struct f2fs_inode_info *fi = F2FS_I(dn->inode); + pgoff_t fofs, start_fofs, end_fofs; + block_t start_blkaddr, end_blkaddr; + + BUG_ON(blk_addr == NEW_ADDR); + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; + + /* Update the page address in the parent node */ + __set_data_blkaddr(dn, blk_addr); + + write_lock(&fi->ext.ext_lock); + + start_fofs = fi->ext.fofs; + end_fofs = fi->ext.fofs + fi->ext.len - 1; + start_blkaddr = fi->ext.blk_addr; + end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; + + /* Drop and initialize the matched extent */ + if (fi->ext.len == 1 && fofs == start_fofs) + fi->ext.len = 0; + + /* Initial extent */ + if (fi->ext.len == 0) { + if (blk_addr != NULL_ADDR) { + fi->ext.fofs = fofs; + fi->ext.blk_addr = blk_addr; + fi->ext.len = 1; + } + goto end_update; + } + + /* Front merge */ + if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { + fi->ext.fofs--; + fi->ext.blk_addr--; + fi->ext.len++; + goto end_update; + } + + /* Back merge */ + if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { + fi->ext.len++; + goto end_update; + } + + /* Split the existing extent */ + if (fi->ext.len > 1 && + fofs >= start_fofs && fofs <= end_fofs) { + if ((end_fofs - fofs) < (fi->ext.len >> 1)) { + fi->ext.len = fofs - start_fofs; + } else { + fi->ext.fofs = fofs + 1; + fi->ext.blk_addr = start_blkaddr + + fofs - start_fofs + 1; + fi->ext.len -= fofs - start_fofs + 1; + } + goto end_update; + } + write_unlock(&fi->ext.ext_lock); + return; + +end_update: + write_unlock(&fi->ext.ext_lock); + sync_inode_page(dn); +} + +struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + struct dnode_of_data dn; + struct page *page; + int err; + + page = find_get_page(mapping, index); + if (page && PageUptodate(page)) + return page; + f2fs_put_page(page, 0); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) + return ERR_PTR(err); + f2fs_put_dnode(&dn); + + if (dn.data_blkaddr == NULL_ADDR) + return ERR_PTR(-ENOENT); + + /* By fallocate(), there is no cached page, but with NEW_ADDR */ + if (dn.data_blkaddr == NEW_ADDR) + return ERR_PTR(-EINVAL); + + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + if (!page) + return ERR_PTR(-ENOMEM); + + if (PageUptodate(page)) { + unlock_page(page); + return page; + } + + err = f2fs_readpage(sbi, page, dn.data_blkaddr, + sync ? READ_SYNC : READA); + if (sync) { + wait_on_page_locked(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 0); + return ERR_PTR(-EIO); + } + } + return page; +} + +/* + * If it tries to access a hole, return an error. + * Because, the callers, functions in dir.c and GC, should be able to know + * whether this page exists or not. + */ +struct page *get_lock_data_page(struct inode *inode, pgoff_t index) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + struct dnode_of_data dn; + struct page *page; + int err; + +repeat: + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + if (!page) + return ERR_PTR(-ENOMEM); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) { + f2fs_put_page(page, 1); + return ERR_PTR(err); + } + f2fs_put_dnode(&dn); + + if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_page(page, 1); + return ERR_PTR(-ENOENT); + } + + if (PageUptodate(page)) + return page; + + /* + * A new dentry page is allocated but not able to be written, since its + * new inode page couldn't be allocated due to -ENOSPC. + * In such the case, its blkaddr can be remained as NEW_ADDR. + * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata. + */ + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + return page; + } + + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) + return ERR_PTR(err); + + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } + return page; +} + +/* + * Caller ensures that this data page is never allocated. + * A new zero-filled data page is allocated in the page cache. + * + * Also, caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). + * Note that, npage is set only by make_empty_dir. + */ +struct page *get_new_data_page(struct inode *inode, + struct page *npage, pgoff_t index, bool new_i_size) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + struct page *page; + struct dnode_of_data dn; + int err; + + set_new_dnode(&dn, inode, npage, npage, 0); + err = get_dnode_of_data(&dn, index, ALLOC_NODE); + if (err) + return ERR_PTR(err); + + if (dn.data_blkaddr == NULL_ADDR) { + if (reserve_new_block(&dn)) { + if (!npage) + f2fs_put_dnode(&dn); + return ERR_PTR(-ENOSPC); + } + } + if (!npage) + f2fs_put_dnode(&dn); +repeat: + page = grab_cache_page(mapping, index); + if (!page) + return ERR_PTR(-ENOMEM); + + if (PageUptodate(page)) + return page; + + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + } else { + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) + return ERR_PTR(err); + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } + } + + if (new_i_size && + i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { + i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); + /* Only the directory inode sets new_i_size */ + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); + mark_inode_dirty_sync(inode); + } + return page; +} + +static void read_end_io(struct bio *bio, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + SetPageUptodate(page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + unlock_page(page); + } while (bvec >= bio->bi_io_vec); + bio_put(bio); +} + +/* + * Fill the locked page with data located in the block address. + * Return unlocked page. + */ +int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, + block_t blk_addr, int type) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct bio *bio; + + trace_f2fs_readpage(page, blk_addr, type); + + down_read(&sbi->bio_sem); + + /* Allocate a new bio */ + bio = f2fs_bio_alloc(bdev, 1); + + /* Initialize the bio */ + bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + bio->bi_end_io = read_end_io; + + if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { + bio_put(bio); + up_read(&sbi->bio_sem); + f2fs_put_page(page, 1); + return -EFAULT; + } + + submit_bio(type, bio); + up_read(&sbi->bio_sem); + return 0; +} + +/* + * This function should be used by the data read flow only where it + * does not check the "create" flag that indicates block allocation. + * The reason for this special functionality is to exploit VFS readahead + * mechanism. + */ +static int get_data_block_ro(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + unsigned int blkbits = inode->i_sb->s_blocksize_bits; + unsigned maxblocks = bh_result->b_size >> blkbits; + struct dnode_of_data dn; + pgoff_t pgofs; + int err; + + /* Get the page offset from the block offset(iblock) */ + pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); + + if (check_extent_cache(inode, pgofs, bh_result)) { + trace_f2fs_get_data_block(inode, iblock, bh_result, 0); + return 0; + } + + /* When reading holes, we need its node page */ + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); + if (err) { + trace_f2fs_get_data_block(inode, iblock, bh_result, err); + return (err == -ENOENT) ? 0 : err; + } + + /* It does not support data allocation */ + BUG_ON(create); + + if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { + int i; + unsigned int end_offset; + + end_offset = IS_INODE(dn.node_page) ? + ADDRS_PER_INODE(F2FS_I(inode)) : + ADDRS_PER_BLOCK; + + clear_buffer_new(bh_result); + + /* Give more consecutive addresses for the read ahead */ + for (i = 0; i < end_offset - dn.ofs_in_node; i++) + if (((datablock_addr(dn.node_page, + dn.ofs_in_node + i)) + != (dn.data_blkaddr + i)) || maxblocks == i) + break; + map_bh(bh_result, inode->i_sb, dn.data_blkaddr); + bh_result->b_size = (i << blkbits); + } + f2fs_put_dnode(&dn); + trace_f2fs_get_data_block(inode, iblock, bh_result, 0); + return 0; +} + +static int f2fs_read_data_page(struct file *file, struct page *page) +{ + return mpage_readpage(page, get_data_block_ro); +} + +static int f2fs_read_data_pages(struct file *file, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); +} + +int do_write_data_page(struct page *page) +{ + struct inode *inode = page->mapping->host; + block_t old_blk_addr, new_blk_addr; + struct dnode_of_data dn; + int err = 0; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + if (err) + return err; + + old_blk_addr = dn.data_blkaddr; + + /* This page is already truncated */ + if (old_blk_addr == NULL_ADDR) + goto out_writepage; + + set_page_writeback(page); + + /* + * If current allocation needs SSR, + * it had better in-place writes for updated data. + */ + if (unlikely(old_blk_addr != NEW_ADDR && + !is_cold_data(page) && + need_inplace_update(inode))) { + rewrite_data_page(F2FS_SB(inode->i_sb), page, + old_blk_addr); + } else { + write_data_page(inode, page, &dn, + old_blk_addr, &new_blk_addr); + update_extent_cache(new_blk_addr, &dn); + } +out_writepage: + f2fs_put_dnode(&dn); + return err; +} + +static int f2fs_write_data_page(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = ((unsigned long long) i_size) + >> PAGE_CACHE_SHIFT; + unsigned offset; + bool need_balance_fs = false; + int err = 0; + + if (page->index < end_index) + goto write; + + /* + * If the offset is out-of-range of file size, + * this page does not have to be written to disk. + */ + offset = i_size & (PAGE_CACHE_SIZE - 1); + if ((page->index >= end_index + 1) || !offset) { + if (S_ISDIR(inode->i_mode)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + goto out; + } + + zero_user_segment(page, offset, PAGE_CACHE_SIZE); +write: + if (sbi->por_doing) { + err = AOP_WRITEPAGE_ACTIVATE; + goto redirty_out; + } + + /* Dentry blocks are controlled by checkpoint */ + if (S_ISDIR(inode->i_mode)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + err = do_write_data_page(page); + } else { + int ilock = mutex_lock_op(sbi); + err = do_write_data_page(page); + mutex_unlock_op(sbi, ilock); + need_balance_fs = true; + } + if (err == -ENOENT) + goto out; + else if (err) + goto redirty_out; + + if (wbc->for_reclaim) + f2fs_submit_bio(sbi, DATA, true); + + clear_cold_data(page); +out: + unlock_page(page); + if (need_balance_fs) + f2fs_balance_fs(sbi); + return 0; + +redirty_out: + wbc->pages_skipped++; + set_page_dirty(page); + return err; +} + +#define MAX_DESIRED_PAGES_WP 4096 + +static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, + void *data) +{ + struct address_space *mapping = data; + int ret = mapping->a_ops->writepage(page, wbc); + mapping_set_error(mapping, ret); + return ret; +} + +static int f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + bool locked = false; + int ret; + long excess_nrtw = 0, desired_nrtw; + + /* deal with chardevs and other special file */ + if (!mapping->a_ops->writepage) + return 0; + + if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { + desired_nrtw = MAX_DESIRED_PAGES_WP; + excess_nrtw = desired_nrtw - wbc->nr_to_write; + wbc->nr_to_write = desired_nrtw; + } + + if (!S_ISDIR(inode->i_mode)) { + mutex_lock(&sbi->writepages); + locked = true; + } + ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); + if (locked) + mutex_unlock(&sbi->writepages); + f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); + + remove_dirty_dir_inode(inode); + + wbc->nr_to_write -= excess_nrtw; + return ret; +} + +static int f2fs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; + struct dnode_of_data dn; + int err = 0; + int ilock; + + f2fs_balance_fs(sbi); +repeat: + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) + return -ENOMEM; + *pagep = page; + + ilock = mutex_lock_op(sbi); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, ALLOC_NODE); + if (err) + goto err; + + if (dn.data_blkaddr == NULL_ADDR) + err = reserve_new_block(&dn); + + f2fs_put_dnode(&dn); + if (err) + goto err; + + mutex_unlock_op(sbi, ilock); + + if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) + return 0; + + if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { + unsigned start = pos & (PAGE_CACHE_SIZE - 1); + unsigned end = start + len; + + /* Reading beyond i_size is simple: memset to zero */ + zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); + goto out; + } + + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + } else { + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) + return err; + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return -EIO; + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } + } +out: + SetPageUptodate(page); + clear_cold_data(page); + return 0; + +err: + mutex_unlock_op(sbi, ilock); + f2fs_put_page(page, 1); + return err; +} + +static int f2fs_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + + SetPageUptodate(page); + set_page_dirty(page); + + if (pos + copied > i_size_read(inode)) { + i_size_write(inode, pos + copied); + mark_inode_dirty(inode); + update_inode_page(inode); + } + + unlock_page(page); + page_cache_release(page); + return copied; +} + +static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + + if (rw == WRITE) + return 0; + + /* Needs synchronization with the cleaner */ + return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + get_data_block_ro); +} + +static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (S_ISDIR(inode->i_mode) && PageDirty(page)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + ClearPagePrivate(page); +} + +static int f2fs_release_data_page(struct page *page, gfp_t wait) +{ + ClearPagePrivate(page); + return 1; +} + +static int f2fs_set_data_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + + SetPageUptodate(page); + if (!PageDirty(page)) { + __set_page_dirty_nobuffers(page); + set_dirty_dir_page(inode, page); + return 1; + } + return 0; +} + +static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) +{ + return generic_block_bmap(mapping, block, get_data_block_ro); +} + +const struct address_space_operations f2fs_dblock_aops = { + .readpage = f2fs_read_data_page, + .readpages = f2fs_read_data_pages, + .writepage = f2fs_write_data_page, + .writepages = f2fs_write_data_pages, + .write_begin = f2fs_write_begin, + .write_end = f2fs_write_end, + .set_page_dirty = f2fs_set_data_page_dirty, + .invalidatepage = f2fs_invalidate_data_page, + .releasepage = f2fs_release_data_page, + .direct_IO = f2fs_direct_IO, + .bmap = f2fs_bmap, +}; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c new file mode 100644 index 00000000000..07d58ea6b65 --- /dev/null +++ b/fs/f2fs/debug.c @@ -0,0 +1,354 @@ +/* + * f2fs debugging statistics + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * Copyright (c) 2012 Linux Foundation + * Copyright (c) 2012 Greg Kroah-Hartman + * Copyright (c) 2014 XPerience(R) Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "gc.h" + +static LIST_HEAD(f2fs_stat_list); +static struct dentry *debugfs_root; +static DEFINE_MUTEX(f2fs_stat_mutex); + +static void update_general_status(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + int i; + + /* valid check of the segment numbers */ + si->hit_ext = sbi->read_hit_ext; + si->total_ext = sbi->total_hit_ext; + si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); + si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); + si->ndirty_dirs = sbi->n_dirty_dirs; + si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); + si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; + si->rsvd_segs = reserved_segments(sbi); + si->overp_segs = overprovision_segments(sbi); + si->valid_count = valid_user_blocks(sbi); + si->valid_node_count = valid_node_count(sbi); + si->valid_inode_count = valid_inode_count(sbi); + si->utilization = utilization(sbi); + + si->free_segs = free_segments(sbi); + si->free_secs = free_sections(sbi); + si->prefree_count = prefree_segments(sbi); + si->dirty_count = dirty_segments(sbi); + si->node_pages = sbi->node_inode->i_mapping->nrpages; + si->meta_pages = sbi->meta_inode->i_mapping->nrpages; + si->nats = NM_I(sbi)->nat_cnt; + si->sits = SIT_I(sbi)->dirty_sentries; + si->fnids = NM_I(sbi)->fcnt; + si->bg_gc = sbi->bg_gc; + si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) + * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) + / 2; + si->util_valid = (int)(written_block_count(sbi) >> + sbi->log_blocks_per_seg) + * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) + / 2; + si->util_invalid = 50 - si->util_free - si->util_valid; + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) { + struct curseg_info *curseg = CURSEG_I(sbi, i); + si->curseg[i] = curseg->segno; + si->cursec[i] = curseg->segno / sbi->segs_per_sec; + si->curzone[i] = si->cursec[i] / sbi->secs_per_zone; + } + + for (i = 0; i < 2; i++) { + si->segment_count[i] = sbi->segment_count[i]; + si->block_count[i] = sbi->block_count[i]; + } +} + +/* + * This function calculates BDF of every segments + */ +static void update_sit_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; + struct sit_info *sit_i = SIT_I(sbi); + unsigned int segno, vblocks; + int ndirty = 0; + + bimodal = 0; + total_vblocks = 0; + blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); + hblks_per_sec = blks_per_sec / 2; + mutex_lock(&sit_i->sentry_lock); + for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { + vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + dist = abs(vblocks - hblks_per_sec); + bimodal += dist * dist; + + if (vblocks > 0 && vblocks < blks_per_sec) { + total_vblocks += vblocks; + ndirty++; + } + } + mutex_unlock(&sit_i->sentry_lock); + dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; + si->bimodal = bimodal / dist; + if (si->dirty_count) + si->avg_vblocks = total_vblocks / ndirty; + else + si->avg_vblocks = 0; +} + +/* + * This function calculates memory footprint. + */ +static void update_mem_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned npages; + + if (si->base_mem) + goto get_cache; + + si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; + si->base_mem += 2 * sizeof(struct f2fs_inode_info); + si->base_mem += sizeof(*sbi->ckpt); + + /* build sm */ + si->base_mem += sizeof(struct f2fs_sm_info); + + /* build sit */ + si->base_mem += sizeof(struct sit_info); + si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry); + si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); + if (sbi->segs_per_sec > 1) + si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); + si->base_mem += __bitmap_size(sbi, SIT_BITMAP); + + /* build free segmap */ + si->base_mem += sizeof(struct free_segmap_info); + si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); + + /* build curseg */ + si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; + si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE; + + /* build dirty segmap */ + si->base_mem += sizeof(struct dirty_seglist_info); + si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); + + /* buld nm */ + si->base_mem += sizeof(struct f2fs_nm_info); + si->base_mem += __bitmap_size(sbi, NAT_BITMAP); + + /* build gc */ + si->base_mem += sizeof(struct f2fs_gc_kthread); + +get_cache: + /* free nids */ + si->cache_mem = NM_I(sbi)->fcnt; + si->cache_mem += NM_I(sbi)->nat_cnt; + npages = sbi->node_inode->i_mapping->nrpages; + si->cache_mem += npages << PAGE_CACHE_SHIFT; + npages = sbi->meta_inode->i_mapping->nrpages; + si->cache_mem += npages << PAGE_CACHE_SHIFT; + si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); + si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); +} + +static int stat_show(struct seq_file *s, void *v) +{ + struct f2fs_stat_info *si; + int i = 0; + int j; + + mutex_lock(&f2fs_stat_mutex); + list_for_each_entry(si, &f2fs_stat_list, stat_list) { + char devname[BDEVNAME_SIZE]; + + update_general_status(si->sbi); + + seq_printf(s, "\n=====[ partition info(%s). #%d ]=====\n", + bdevname(si->sbi->sb->s_bdev, devname), i++); + seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", + si->sit_area_segs, si->nat_area_segs); + seq_printf(s, "[SSA: %d] [MAIN: %d", + si->ssa_area_segs, si->main_area_segs); + seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", + si->overp_segs, si->rsvd_segs); + seq_printf(s, "Utilization: %d%% (%d valid blocks)\n", + si->utilization, si->valid_count); + seq_printf(s, " - Node: %u (Inode: %u, ", + si->valid_node_count, si->valid_inode_count); + seq_printf(s, "Other: %u)\n - Data: %u\n", + si->valid_node_count - si->valid_inode_count, + si->valid_count - si->valid_node_count); + seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", + si->main_area_segs, si->main_area_sections, + si->main_area_zones); + seq_printf(s, " - COLD data: %d, %d, %d\n", + si->curseg[CURSEG_COLD_DATA], + si->cursec[CURSEG_COLD_DATA], + si->curzone[CURSEG_COLD_DATA]); + seq_printf(s, " - WARM data: %d, %d, %d\n", + si->curseg[CURSEG_WARM_DATA], + si->cursec[CURSEG_WARM_DATA], + si->curzone[CURSEG_WARM_DATA]); + seq_printf(s, " - HOT data: %d, %d, %d\n", + si->curseg[CURSEG_HOT_DATA], + si->cursec[CURSEG_HOT_DATA], + si->curzone[CURSEG_HOT_DATA]); + seq_printf(s, " - Dir dnode: %d, %d, %d\n", + si->curseg[CURSEG_HOT_NODE], + si->cursec[CURSEG_HOT_NODE], + si->curzone[CURSEG_HOT_NODE]); + seq_printf(s, " - File dnode: %d, %d, %d\n", + si->curseg[CURSEG_WARM_NODE], + si->cursec[CURSEG_WARM_NODE], + si->curzone[CURSEG_WARM_NODE]); + seq_printf(s, " - Indir nodes: %d, %d, %d\n", + si->curseg[CURSEG_COLD_NODE], + si->cursec[CURSEG_COLD_NODE], + si->curzone[CURSEG_COLD_NODE]); + seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n", + si->main_area_segs - si->dirty_count - + si->prefree_count - si->free_segs, + si->dirty_count); + seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", + si->prefree_count, si->free_segs, si->free_secs); + seq_printf(s, "GC calls: %d (BG: %d)\n", + si->call_count, si->bg_gc); + seq_printf(s, " - data segments : %d\n", si->data_segs); + seq_printf(s, " - node segments : %d\n", si->node_segs); + seq_printf(s, "Try to move %d blocks\n", si->tot_blks); + seq_printf(s, " - data blocks : %d\n", si->data_blks); + seq_printf(s, " - node blocks : %d\n", si->node_blks); + seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", + si->hit_ext, si->total_ext); + seq_printf(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - nodes %4d in %4d\n", + si->ndirty_node, si->node_pages); + seq_printf(s, " - dents %4d in dirs:%4d\n", + si->ndirty_dent, si->ndirty_dirs); + seq_printf(s, " - meta %4d in %4d\n", + si->ndirty_meta, si->meta_pages); + seq_printf(s, " - NATs %5d > %lu\n", + si->nats, NM_WOUT_THRESHOLD); + seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", + si->sits, si->fnids); + seq_puts(s, "\nDistribution of User Blocks:"); + seq_puts(s, " [ valid | invalid | free ]\n"); + seq_puts(s, " ["); + + for (j = 0; j < si->util_valid; j++) + seq_putc(s, '-'); + seq_putc(s, '|'); + + for (j = 0; j < si->util_invalid; j++) + seq_putc(s, '-'); + seq_putc(s, '|'); + + for (j = 0; j < si->util_free; j++) + seq_putc(s, '-'); + seq_puts(s, "]\n\n"); + seq_printf(s, "SSR: %u blocks in %u segments\n", + si->block_count[SSR], si->segment_count[SSR]); + seq_printf(s, "LFS: %u blocks in %u segments\n", + si->block_count[LFS], si->segment_count[LFS]); + + /* segment usage info */ + update_sit_info(si->sbi); + seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n", + si->bimodal, si->avg_vblocks); + + /* memory footprint */ + update_mem_info(si->sbi); + seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", + (si->base_mem + si->cache_mem) >> 10, + si->base_mem >> 10, si->cache_mem >> 10); + } + mutex_unlock(&f2fs_stat_mutex); + return 0; +} + +static int stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, stat_show, inode->i_private); +} + +static const struct file_operations stat_fops = { + .open = stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int f2fs_build_stats(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_stat_info *si; + + si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL); + if (!si) + return -ENOMEM; + + si->all_area_segs = le32_to_cpu(raw_super->segment_count); + si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); + si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); + si->ssa_area_segs = le32_to_cpu(raw_super->segment_count_ssa); + si->main_area_segs = le32_to_cpu(raw_super->segment_count_main); + si->main_area_sections = le32_to_cpu(raw_super->section_count); + si->main_area_zones = si->main_area_sections / + le32_to_cpu(raw_super->secs_per_zone); + si->sbi = sbi; + sbi->stat_info = si; + + mutex_lock(&f2fs_stat_mutex); + list_add_tail(&si->stat_list, &f2fs_stat_list); + mutex_unlock(&f2fs_stat_mutex); + + return 0; +} + +void f2fs_destroy_stats(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + mutex_lock(&f2fs_stat_mutex); + list_del(&si->stat_list); + mutex_unlock(&f2fs_stat_mutex); + + kfree(si); +} + +void __init f2fs_create_root_stats(void) +{ + debugfs_root = debugfs_create_dir("f2fs", NULL); + if (debugfs_root) + debugfs_create_file("status", S_IRUGO, debugfs_root, + NULL, &stat_fops); +} + +void f2fs_destroy_root_stats(void) +{ + debugfs_remove_recursive(debugfs_root); + debugfs_root = NULL; +} diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c new file mode 100644 index 00000000000..be6cd8a704b --- /dev/null +++ b/fs/f2fs/dir.c @@ -0,0 +1,716 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/dir.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include "f2fs.h" +#include "node.h" +#include "acl.h" +#include "xattr.h" + +static unsigned long dir_blocks(struct inode *inode) +{ + return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1)) + >> PAGE_CACHE_SHIFT; +} + +static unsigned int dir_buckets(unsigned int level) +{ + if (level < MAX_DIR_HASH_DEPTH / 2) + return 1 << level; + else + return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1); +} + +static unsigned int bucket_blocks(unsigned int level) +{ + if (level < MAX_DIR_HASH_DEPTH / 2) + return 2; + else + return 4; +} + +static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { + [F2FS_FT_UNKNOWN] = DT_UNKNOWN, + [F2FS_FT_REG_FILE] = DT_REG, + [F2FS_FT_DIR] = DT_DIR, + [F2FS_FT_CHRDEV] = DT_CHR, + [F2FS_FT_BLKDEV] = DT_BLK, + [F2FS_FT_FIFO] = DT_FIFO, + [F2FS_FT_SOCK] = DT_SOCK, + [F2FS_FT_SYMLINK] = DT_LNK, +}; + +#define S_SHIFT 12 +static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR, + [S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK, + [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, +}; + +static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +{ + mode_t mode = inode->i_mode; + de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; +} + +static unsigned long dir_block_index(unsigned int level, unsigned int idx) +{ + unsigned long i; + unsigned long bidx = 0; + + for (i = 0; i < level; i++) + bidx += dir_buckets(i) * bucket_blocks(i); + bidx += idx * bucket_blocks(level); + return bidx; +} + +static bool early_match_name(const char *name, size_t namelen, + f2fs_hash_t namehash, struct f2fs_dir_entry *de) +{ + if (le16_to_cpu(de->name_len) != namelen) + return false; + + if (de->hash_code != namehash) + return false; + + return true; +} + +static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, + const char *name, size_t namelen, int *max_slots, + f2fs_hash_t namehash, struct page **res_page, + bool nocase) +{ + struct f2fs_dir_entry *de; + unsigned long bit_pos, end_pos, next_pos; + struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); + int slots; + + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, 0); + while (bit_pos < NR_DENTRY_IN_BLOCK) { + de = &dentry_blk->dentry[bit_pos]; + slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + + if (nocase) { + if ((le16_to_cpu(de->name_len) == namelen) && + !strncasecmp(dentry_blk->filename[bit_pos], + name, namelen)) { + *res_page = dentry_page; + goto found; + } + } else if (early_match_name(name, namelen, namehash, de)) { + if (!memcmp(dentry_blk->filename[bit_pos], + name, namelen)) { + *res_page = dentry_page; + goto found; + } + } + next_pos = bit_pos + slots; + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, next_pos); + if (bit_pos >= NR_DENTRY_IN_BLOCK) + end_pos = NR_DENTRY_IN_BLOCK; + else + end_pos = bit_pos; + if (*max_slots < end_pos - next_pos) + *max_slots = end_pos - next_pos; + } + + de = NULL; + kunmap(dentry_page); +found: + return de; +} + +static struct f2fs_dir_entry *find_in_level(struct inode *dir, + unsigned int level, const char *name, size_t namelen, + f2fs_hash_t namehash, struct page **res_page) +{ + int s = GET_DENTRY_SLOTS(namelen); + unsigned int nbucket, nblock; + unsigned int bidx, end_block; + struct page *dentry_page; + struct f2fs_dir_entry *de = NULL; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + bool room = false; + int max_slots = 0; + + BUG_ON(level > MAX_DIR_HASH_DEPTH); + + nbucket = dir_buckets(level); + nblock = bucket_blocks(level); + + bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); + end_block = bidx + nblock; + + for (; bidx < end_block; bidx++) { + bool nocase = false; + + /* no need to allocate new dentry pages to all the indices */ + dentry_page = find_data_page(dir, bidx, true); + if (IS_ERR(dentry_page)) { + room = true; + continue; + } + + if (test_opt(sbi, ANDROID_EMU) && + (sbi->android_emu_flags & F2FS_ANDROID_EMU_NOCASE) && + F2FS_I(dir)->i_advise & FADVISE_ANDROID_EMU) + nocase = true; + + de = find_in_block(dentry_page, name, namelen, + &max_slots, namehash, res_page, + nocase); + if (de) + break; + + if (max_slots >= s) + room = true; + f2fs_put_page(dentry_page, 0); + } + + if (!de && room && F2FS_I(dir)->chash != namehash) { + F2FS_I(dir)->chash = namehash; + F2FS_I(dir)->clevel = level; + } + + return de; +} + +/* + * Find an entry in the specified directory with the wanted name. + * It returns the page where the entry was found (as a parameter - res_page), + * and the entry itself. Page is returned mapped and unlocked. + * Entry is guaranteed to be valid. + */ +struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, + struct qstr *child, struct page **res_page) +{ + const char *name = child->name; + size_t namelen = child->len; + unsigned long npages = dir_blocks(dir); + struct f2fs_dir_entry *de = NULL; + f2fs_hash_t name_hash; + unsigned int max_depth; + unsigned int level; + + if (namelen > F2FS_NAME_LEN) + return NULL; + + if (npages == 0) + return NULL; + + *res_page = NULL; + + name_hash = f2fs_dentry_hash(name, namelen); + max_depth = F2FS_I(dir)->i_current_depth; + + for (level = 0; level < max_depth; level++) { + de = find_in_level(dir, level, name, + namelen, name_hash, res_page); + if (de) + break; + } + if (!de && F2FS_I(dir)->chash != name_hash) { + F2FS_I(dir)->chash = name_hash; + F2FS_I(dir)->clevel = level - 1; + } + return de; +} + +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) +{ + struct page *page; + struct f2fs_dir_entry *de; + struct f2fs_dentry_block *dentry_blk; + + page = get_lock_data_page(dir, 0); + if (IS_ERR(page)) + return NULL; + + dentry_blk = kmap(page); + de = &dentry_blk->dentry[1]; + *p = page; + unlock_page(page); + return de; +} + +ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) +{ + ino_t res = 0; + struct f2fs_dir_entry *de; + struct page *page; + + de = f2fs_find_entry(dir, qstr, &page); + if (de) { + res = le32_to_cpu(de->ino); + kunmap(page); + f2fs_put_page(page, 0); + } + + return res; +} + +void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, + struct page *page, struct inode *inode) +{ + lock_page(page); + wait_on_page_writeback(page); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + kunmap(page); + set_page_dirty(page); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + mark_inode_dirty(dir); + + /* update parent inode number before releasing dentry page */ + F2FS_I(inode)->i_pino = dir->i_ino; + + f2fs_put_page(page, 1); +} + +static void init_dent_inode(const struct qstr *name, struct page *ipage) +{ + struct f2fs_node *rn; + + /* copy name info. to this inode page */ + rn = F2FS_NODE(ipage); + rn->i.i_namelen = cpu_to_le32(name->len); + memcpy(rn->i.i_name, name->name, name->len); + set_page_dirty(ipage); +} + +int update_dent_inode(struct inode *inode, const struct qstr *name) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + return PTR_ERR(page); + + init_dent_inode(name, page); + f2fs_put_page(page, 1); + + return 0; +} + +static int make_empty_dir(struct inode *inode, + struct inode *parent, struct page *page) +{ + struct page *dentry_page; + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dir_entry *de; + void *kaddr; + + dentry_page = get_new_data_page(inode, page, 0, true); + if (IS_ERR(dentry_page)) + return PTR_ERR(dentry_page); + + kaddr = kmap_atomic(dentry_page); + dentry_blk = (struct f2fs_dentry_block *)kaddr; + + de = &dentry_blk->dentry[0]; + de->name_len = cpu_to_le16(1); + de->hash_code = 0; + de->ino = cpu_to_le32(inode->i_ino); + memcpy(dentry_blk->filename[0], ".", 1); + set_de_type(de, inode); + + de = &dentry_blk->dentry[1]; + de->hash_code = 0; + de->name_len = cpu_to_le16(2); + de->ino = cpu_to_le32(parent->i_ino); + memcpy(dentry_blk->filename[1], "..", 2); + set_de_type(de, inode); + + test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); + test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); + kunmap_atomic(kaddr); + + set_page_dirty(dentry_page); + f2fs_put_page(dentry_page, 1); + return 0; +} + +static struct page *init_inode_metadata(struct inode *inode, + struct inode *dir, const struct qstr *name) +{ + struct page *page; + int err; + + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + page = new_inode_page(inode, name); + if (IS_ERR(page)) + return page; + + if (S_ISDIR(inode->i_mode)) { + err = make_empty_dir(inode, dir, page); + if (err) + goto error; + } + + err = f2fs_init_acl(inode, dir); + if (err) + goto error; + + err = f2fs_init_security(inode, dir, name, page); + if (err) + goto error; + + wait_on_page_writeback(page); + } else { + page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); + if (IS_ERR(page)) + return page; + + wait_on_page_writeback(page); + set_cold_node(inode, page); + } + + init_dent_inode(name, page); + + /* + * This file should be checkpointed during fsync. + * We lost i_pino from now on. + */ + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { + file_lost_pino(inode); + inc_nlink(inode); + } + return page; + +error: + f2fs_put_page(page, 1); + remove_inode_page(inode); + return ERR_PTR(err); +} + +static void update_parent_metadata(struct inode *dir, struct inode *inode, + unsigned int current_depth) +{ + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (S_ISDIR(inode->i_mode)) { + inc_nlink(dir); + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } + clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); + } + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + if (F2FS_I(dir)->i_current_depth != current_depth) { + F2FS_I(dir)->i_current_depth = current_depth; + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } + + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) + update_inode_page(dir); + else + mark_inode_dirty(dir); + + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); +} + +static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots) +{ + int bit_start = 0; + int zero_start, zero_end; +next: + zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + bit_start); + if (zero_start >= NR_DENTRY_IN_BLOCK) + return NR_DENTRY_IN_BLOCK; + + zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + zero_start); + if (zero_end - zero_start >= slots) + return zero_start; + + bit_start = zero_end + 1; + + if (zero_end + 1 >= NR_DENTRY_IN_BLOCK) + return NR_DENTRY_IN_BLOCK; + goto next; +} + +/* + * Caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). + */ +int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) +{ + unsigned int bit_pos; + unsigned int level; + unsigned int current_depth; + unsigned long bidx, block; + f2fs_hash_t dentry_hash; + struct f2fs_dir_entry *de; + unsigned int nbucket, nblock; + size_t namelen = name->len; + struct page *dentry_page = NULL; + struct f2fs_dentry_block *dentry_blk = NULL; + int slots = GET_DENTRY_SLOTS(namelen); + struct page *page; + int err = 0; + int i; + + dentry_hash = f2fs_dentry_hash(name->name, name->len); + level = 0; + current_depth = F2FS_I(dir)->i_current_depth; + if (F2FS_I(dir)->chash == dentry_hash) { + level = F2FS_I(dir)->clevel; + F2FS_I(dir)->chash = 0; + } + +start: + if (current_depth == MAX_DIR_HASH_DEPTH) + return -ENOSPC; + + /* Increase the depth, if required */ + if (level == current_depth) + ++current_depth; + + nbucket = dir_buckets(level); + nblock = bucket_blocks(level); + + bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); + + for (block = bidx; block <= (bidx + nblock - 1); block++) { + dentry_page = get_new_data_page(dir, NULL, block, true); + if (IS_ERR(dentry_page)) + return PTR_ERR(dentry_page); + + dentry_blk = kmap(dentry_page); + bit_pos = room_for_filename(dentry_blk, slots); + if (bit_pos < NR_DENTRY_IN_BLOCK) + goto add_dentry; + + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + } + + /* Move to next level to find the empty slot for new dentry */ + ++level; + goto start; +add_dentry: + wait_on_page_writeback(dentry_page); + + page = init_inode_metadata(inode, dir, name); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } + de = &dentry_blk->dentry[bit_pos]; + de->hash_code = dentry_hash; + de->name_len = cpu_to_le16(namelen); + memcpy(dentry_blk->filename[bit_pos], name->name, name->len); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + for (i = 0; i < slots; i++) + test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + set_page_dirty(dentry_page); + + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + + update_parent_metadata(dir, inode, current_depth); +fail: + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + return err; +} + +/* + * It only removes the dentry from the dentry page,corresponding name + * entry in name page does not need to be touched during deletion. + */ +void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, + struct inode *inode) +{ + struct f2fs_dentry_block *dentry_blk; + unsigned int bit_pos; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + void *kaddr = page_address(page); + int i; + + lock_page(page); + wait_on_page_writeback(page); + + dentry_blk = (struct f2fs_dentry_block *)kaddr; + bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; + for (i = 0; i < slots; i++) + test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + + /* Let's check and deallocate this dentry page */ + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + 0); + kunmap(page); /* kunmap - pair of f2fs_find_entry */ + set_page_dirty(page); + + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + + if (inode && S_ISDIR(inode->i_mode)) { + drop_nlink(dir); + update_inode_page(dir); + } else { + mark_inode_dirty(dir); + } + + if (inode) { + inode->i_ctime = CURRENT_TIME; + drop_nlink(inode); + if (S_ISDIR(inode->i_mode)) { + drop_nlink(inode); + i_size_write(inode, 0); + } + update_inode_page(inode); + + if (inode->i_nlink == 0) + add_orphan_inode(sbi, inode->i_ino); + else + release_orphan_inode(sbi); + } + + if (bit_pos == NR_DENTRY_IN_BLOCK) { + truncate_hole(dir, page->index, page->index + 1); + clear_page_dirty_for_io(page); + ClearPageUptodate(page); + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(dir); + } + f2fs_put_page(page, 1); +} + +bool f2fs_empty_dir(struct inode *dir) +{ + unsigned long bidx; + struct page *dentry_page; + unsigned int bit_pos; + struct f2fs_dentry_block *dentry_blk; + unsigned long nblock = dir_blocks(dir); + + for (bidx = 0; bidx < nblock; bidx++) { + void *kaddr; + dentry_page = get_lock_data_page(dir, bidx); + if (IS_ERR(dentry_page)) { + if (PTR_ERR(dentry_page) == -ENOENT) + continue; + else + return false; + } + + kaddr = kmap_atomic(dentry_page); + dentry_blk = (struct f2fs_dentry_block *)kaddr; + if (bidx == 0) + bit_pos = 2; + else + bit_pos = 0; + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + bit_pos); + kunmap_atomic(kaddr); + + f2fs_put_page(dentry_page, 1); + + if (bit_pos < NR_DENTRY_IN_BLOCK) + return false; + } + return true; +} + +static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + unsigned long pos = file->f_pos; + struct inode *inode = file->f_dentry->d_inode; + unsigned long npages = dir_blocks(inode); + unsigned char *types = NULL; + unsigned int bit_pos = 0, start_bit_pos = 0; + int over = 0; + struct f2fs_dentry_block *dentry_blk = NULL; + struct f2fs_dir_entry *de = NULL; + struct page *dentry_page = NULL; + unsigned int n = 0; + unsigned char d_type = DT_UNKNOWN; + int slots; + + types = f2fs_filetype_table; + bit_pos = (pos % NR_DENTRY_IN_BLOCK); + n = (pos / NR_DENTRY_IN_BLOCK); + + for ( ; n < npages; n++) { + dentry_page = get_lock_data_page(inode, n); + if (IS_ERR(dentry_page)) + continue; + + start_bit_pos = bit_pos; + dentry_blk = kmap(dentry_page); + while (bit_pos < NR_DENTRY_IN_BLOCK) { + d_type = DT_UNKNOWN; + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + bit_pos); + if (bit_pos >= NR_DENTRY_IN_BLOCK) + break; + + de = &dentry_blk->dentry[bit_pos]; + if (types && de->file_type < F2FS_FT_MAX) + d_type = types[de->file_type]; + + over = filldir(dirent, + dentry_blk->filename[bit_pos], + le16_to_cpu(de->name_len), + (n * NR_DENTRY_IN_BLOCK) + bit_pos, + le32_to_cpu(de->ino), d_type); + if (over) { + file->f_pos += bit_pos - start_bit_pos; + goto success; + } + slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + bit_pos += slots; + } + bit_pos = 0; + file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK; + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + dentry_page = NULL; + } +success: + if (dentry_page && !IS_ERR(dentry_page)) { + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + } + + return 0; +} + +const struct file_operations f2fs_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = f2fs_readdir, + .fsync = f2fs_sync_file, + .unlocked_ioctl = f2fs_ioctl, +}; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h new file mode 100644 index 00000000000..e0558ca8250 --- /dev/null +++ b/fs/f2fs/f2fs.h @@ -0,0 +1,1292 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/f2fs.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_F2FS_H +#define _LINUX_F2FS_H + +#include +#include +#include +#include +#include +#include +#include + +/* + * For mount options + */ +#define F2FS_MOUNT_BG_GC 0x00000001 +#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 +#define F2FS_MOUNT_DISCARD 0x00000004 +#define F2FS_MOUNT_NOHEAP 0x00000008 +#define F2FS_MOUNT_XATTR_USER 0x00000010 +#define F2FS_MOUNT_POSIX_ACL 0x00000020 +#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 +#define F2FS_MOUNT_INLINE_XATTR 0x00000080 +#define F2FS_MOUNT_ANDROID_EMU 0x00001000 +#define F2FS_MOUNT_ERRORS_PANIC 0x00002000 +#define F2FS_MOUNT_ERRORS_RECOVER 0x00004000 + +#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option) + +#define ver_after(a, b) (typecheck(unsigned long long, a) && \ + typecheck(unsigned long long, b) && \ + ((long long)((a) - (b)) > 0)) + +typedef u32 block_t; /* + * should not change u32, since it is the on-disk block + * address format, __le32. + */ +typedef u32 nid_t; + +struct f2fs_mount_info { + unsigned int opt; +}; + +#define CRCPOLY_LE 0xedb88320 + +static inline __u32 f2fs_crc32(void *buf, size_t len) +{ + unsigned char *p = (unsigned char *)buf; + __u32 crc = F2FS_SUPER_MAGIC; + int i; + + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + } + return crc; +} + +static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size) +{ + return f2fs_crc32(buf, buf_size) == blk_crc; +} + +/* + * For checkpoint manager + */ +enum { + NAT_BITMAP, + SIT_BITMAP +}; + +/* for the list of orphan inodes */ +struct orphan_inode_entry { + struct list_head list; /* list head */ + nid_t ino; /* inode number */ +}; + +/* for the list of directory inodes */ +struct dir_inode_entry { + struct list_head list; /* list head */ + struct inode *inode; /* vfs inode pointer */ +}; + +/* for the list of fsync inodes, used only during recovery */ +struct fsync_inode_entry { + struct list_head list; /* list head */ + struct inode *inode; /* vfs inode pointer */ + block_t blkaddr; /* block address locating the last inode */ +}; + +#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) +#define sits_in_cursum(sum) (le16_to_cpu(sum->n_sits)) + +#define nat_in_journal(sum, i) (sum->nat_j.entries[i].ne) +#define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid) +#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) +#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) + +static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) +{ + int before = nats_in_cursum(rs); + rs->n_nats = cpu_to_le16(before + i); + return before; +} + +static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) +{ + int before = sits_in_cursum(rs); + rs->n_sits = cpu_to_le16(before + i); + return before; +} + +/* + * ioctl commands + */ +#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS +#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS + +#if defined(__KERNEL__) && defined(CONFIG_COMPAT) +/* + * ioctl commands in 32 bit emulation + */ +#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#endif + +/* + * For INODE and NODE manager + */ +/* + * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 + * as its node offset to distinguish from index node blocks. + * But some bits are used to mark the node block. + */ +#define XATTR_NODE_OFFSET ((((unsigned int)-1) << OFFSET_BIT_SHIFT) \ + >> OFFSET_BIT_SHIFT) +enum { + ALLOC_NODE, /* allocate a new node page if needed */ + LOOKUP_NODE, /* look up a node without readahead */ + LOOKUP_NODE_RA, /* + * look up a node with readahead called + * by get_datablock_ro. + */ +}; + +#define F2FS_LINK_MAX 32000 /* maximum link count per file */ + +/* for in-memory extent cache entry */ +struct extent_info { + rwlock_t ext_lock; /* rwlock for consistency */ + unsigned int fofs; /* start offset in a file */ + u32 blk_addr; /* start block address of the extent */ + unsigned int len; /* length of the extent */ +}; + +/* + * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. + */ +#define FADVISE_COLD_BIT 0x01 +#define FADVISE_LOST_PINO_BIT 0x02 +#define FADVISE_ANDROID_EMU 0x10 +#define FADVISE_ANDROID_EMU_ROOT 0x20 + +struct f2fs_inode_info { + struct inode vfs_inode; /* serve a vfs inode */ + unsigned long i_flags; /* keep an inode flags for ioctl */ + unsigned char i_advise; /* use to give file attribute hints */ + unsigned int i_current_depth; /* use only in directory structure */ + unsigned int i_pino; /* parent inode number */ + umode_t i_acl_mode; /* keep file acl mode temporarily */ + + /* Use below internally in f2fs*/ + unsigned long flags; /* use to pass per-file flags */ + atomic_t dirty_dents; /* # of dirty dentry pages */ + f2fs_hash_t chash; /* hash value of given file name */ + unsigned int clevel; /* maximum level of given file name */ + nid_t i_xattr_nid; /* node id that contains xattrs */ + unsigned long long xattr_ver; /* cp version of xattr modification */ + struct extent_info ext; /* in-memory extent cache entry */ +}; + +static inline void get_extent_info(struct extent_info *ext, + struct f2fs_extent i_ext) +{ + write_lock(&ext->ext_lock); + ext->fofs = le32_to_cpu(i_ext.fofs); + ext->blk_addr = le32_to_cpu(i_ext.blk_addr); + ext->len = le32_to_cpu(i_ext.len); + write_unlock(&ext->ext_lock); +} + +static inline void set_raw_extent(struct extent_info *ext, + struct f2fs_extent *i_ext) +{ + read_lock(&ext->ext_lock); + i_ext->fofs = cpu_to_le32(ext->fofs); + i_ext->blk_addr = cpu_to_le32(ext->blk_addr); + i_ext->len = cpu_to_le32(ext->len); + read_unlock(&ext->ext_lock); +} + +struct f2fs_nm_info { + block_t nat_blkaddr; /* base disk address of NAT */ + nid_t max_nid; /* maximum possible node ids */ + nid_t next_scan_nid; /* the next nid to be scanned */ + + /* NAT cache management */ + struct radix_tree_root nat_root;/* root of the nat entry cache */ + rwlock_t nat_tree_lock; /* protect nat_tree_lock */ + unsigned int nat_cnt; /* the # of cached nat entries */ + struct list_head nat_entries; /* cached nat entry list (clean) */ + struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ + + /* free node ids management */ + struct list_head free_nid_list; /* a list for free nids */ + spinlock_t free_nid_list_lock; /* protect free nid list */ + unsigned int fcnt; /* the number of free node id */ + struct mutex build_lock; /* lock for build free nids */ + + /* for checkpoint */ + char *nat_bitmap; /* NAT bitmap pointer */ + int bitmap_size; /* bitmap size */ +}; + +/* + * this structure is used as one of function parameters. + * all the information are dedicated to a given direct node block determined + * by the data offset in a file. + */ +struct dnode_of_data { + struct inode *inode; /* vfs inode pointer */ + struct page *inode_page; /* its inode page, NULL is possible */ + struct page *node_page; /* cached direct node page */ + nid_t nid; /* node id of the direct node block */ + unsigned int ofs_in_node; /* data offset in the node page */ + bool inode_page_locked; /* inode page is locked or not */ + block_t data_blkaddr; /* block address of the node block */ +}; + +static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, + struct page *ipage, struct page *npage, nid_t nid) +{ + memset(dn, 0, sizeof(*dn)); + dn->inode = inode; + dn->inode_page = ipage; + dn->node_page = npage; + dn->nid = nid; +} + +/* + * For SIT manager + * + * By default, there are 6 active log areas across the whole main area. + * When considering hot and cold data separation to reduce cleaning overhead, + * we split 3 for data logs and 3 for node logs as hot, warm, and cold types, + * respectively. + * In the current design, you should not change the numbers intentionally. + * Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6 + * logs individually according to the underlying devices. (default: 6) + * Just in case, on-disk layout covers maximum 16 logs that consist of 8 for + * data and 8 for node logs. + */ +#define NR_CURSEG_DATA_TYPE (3) +#define NR_CURSEG_NODE_TYPE (3) +#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) + +enum { + CURSEG_HOT_DATA = 0, /* directory entry blocks */ + CURSEG_WARM_DATA, /* data blocks */ + CURSEG_COLD_DATA, /* multimedia or GCed data blocks */ + CURSEG_HOT_NODE, /* direct node blocks of directory files */ + CURSEG_WARM_NODE, /* direct node blocks of normal files */ + CURSEG_COLD_NODE, /* indirect node blocks */ + NO_CHECK_TYPE +}; + +struct f2fs_sm_info { + struct sit_info *sit_info; /* whole segment information */ + struct free_segmap_info *free_info; /* free segment information */ + struct dirty_seglist_info *dirty_info; /* dirty segment information */ + struct curseg_info *curseg_array; /* active segment information */ + + struct list_head wblist_head; /* list of under-writeback pages */ + spinlock_t wblist_lock; /* lock for checkpoint */ + + block_t seg0_blkaddr; /* block address of 0'th segment */ + block_t main_blkaddr; /* start block address of main area */ + block_t ssa_blkaddr; /* start block address of SSA area */ + + unsigned int segment_count; /* total # of segments */ + unsigned int main_segments; /* # of segments in main area */ + unsigned int reserved_segments; /* # of reserved segments */ + unsigned int ovp_segments; /* # of overprovision segments */ +}; + +/* + * For superblock + */ +/* + * COUNT_TYPE for monitoring + * + * f2fs monitors the number of several block types such as on-writeback, + * dirty dentry blocks, dirty node blocks, and dirty meta blocks. + */ +enum count_type { + F2FS_WRITEBACK, + F2FS_DIRTY_DENTS, + F2FS_DIRTY_NODES, + F2FS_DIRTY_META, + NR_COUNT_TYPE, +}; + +/* + * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS]. + * The checkpoint procedure blocks all the locks in this fs_lock array. + * Some FS operations grab free locks, and if there is no free lock, + * then wait to grab a lock in a round-robin manner. + */ +#define NR_GLOBAL_LOCKS 8 + +/* + * The below are the page types of bios used in submti_bio(). + * The available types are: + * DATA User data pages. It operates as async mode. + * NODE Node pages. It operates as async mode. + * META FS metadata pages such as SIT, NAT, CP. + * NR_PAGE_TYPE The number of page types. + * META_FLUSH Make sure the previous pages are written + * with waiting the bio's completion + * ... Only can be used with META. + */ +enum page_type { + DATA, + NODE, + META, + NR_PAGE_TYPE, + META_FLUSH, +}; + +/* + * Android sdcard emulation flags + */ +#define F2FS_ANDROID_EMU_NOCASE 0x00000001 + +struct f2fs_sb_info { + struct super_block *sb; /* pointer to VFS super block */ + struct proc_dir_entry *s_proc; /* proc entry */ + struct buffer_head *raw_super_buf; /* buffer head of raw sb */ + struct f2fs_super_block *raw_super; /* raw super block pointer */ + int s_dirty; /* dirty flag for checkpoint */ + + /* for node-related operations */ + struct f2fs_nm_info *nm_info; /* node manager */ + struct inode *node_inode; /* cache node blocks */ + + /* for segment-related operations */ + struct f2fs_sm_info *sm_info; /* segment manager */ + struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ + sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ + struct rw_semaphore bio_sem; /* IO semaphore */ + + /* for checkpoint */ + struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ + struct inode *meta_inode; /* cache meta blocks */ + struct mutex cp_mutex; /* checkpoint procedure lock */ + struct mutex fs_lock[NR_GLOBAL_LOCKS]; /* blocking FS operations */ + struct mutex node_write; /* locking node writes */ + struct mutex writepages; /* mutex for writepages() */ + unsigned char next_lock_num; /* round-robin global locks */ + int por_doing; /* recovery is doing or not */ + int on_build_free_nids; /* build_free_nids is doing */ + + /* for orphan inode management */ + struct list_head orphan_inode_list; /* orphan inode list */ + struct mutex orphan_inode_mutex; /* for orphan inode list */ + unsigned int n_orphans; /* # of orphan inodes */ + + /* for directory inode management */ + struct list_head dir_inode_list; /* dir inode list */ + spinlock_t dir_inode_lock; /* for dir inode list lock */ + + /* basic file system units */ + unsigned int log_sectors_per_block; /* log2 sectors per block */ + unsigned int log_blocksize; /* log2 block size */ + unsigned int blocksize; /* block size */ + unsigned int root_ino_num; /* root inode number*/ + unsigned int node_ino_num; /* node inode number*/ + unsigned int meta_ino_num; /* meta inode number*/ + unsigned int log_blocks_per_seg; /* log2 blocks per segment */ + unsigned int blocks_per_seg; /* blocks per segment */ + unsigned int segs_per_sec; /* segments per section */ + unsigned int secs_per_zone; /* sections per zone */ + unsigned int total_sections; /* total section count */ + unsigned int total_node_count; /* total node block count */ + unsigned int total_valid_node_count; /* valid node block count */ + unsigned int total_valid_inode_count; /* valid inode count */ + int active_logs; /* # of active logs */ + + block_t user_block_count; /* # of user blocks */ + block_t total_valid_block_count; /* # of valid blocks */ + block_t alloc_valid_block_count; /* # of allocated blocks */ + block_t last_valid_block_count; /* for recovery */ + u32 s_next_generation; /* for NFS support */ + atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ + + struct f2fs_mount_info mount_opt; /* mount options */ + + /* for cleaning operations */ + struct mutex gc_mutex; /* mutex for GC */ + struct f2fs_gc_kthread *gc_thread; /* GC thread */ + unsigned int cur_victim_sec; /* current victim section num */ + + /* + * for stat information. + * one is for the LFS mode, and the other is for the SSR mode. + */ +#ifdef CONFIG_F2FS_STAT_FS + struct f2fs_stat_info *stat_info; /* FS status information */ + unsigned int segment_count[2]; /* # of allocated segments */ + unsigned int block_count[2]; /* # of allocated blocks */ + int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ + int bg_gc; /* background gc calls */ + unsigned int n_dirty_dirs; /* # of dir inodes */ +#endif + unsigned int last_victim[2]; /* last victim segment # */ + spinlock_t stat_lock; /* lock for stat operations */ + + /* For sysfs suppport */ + struct kobject s_kobj; + struct completion s_kobj_unregister; + + /* For Android sdcard emulation */ + u32 android_emu_uid; + u32 android_emu_gid; + umode_t android_emu_mode; + int android_emu_flags; +}; + +/* + * Inline functions + */ +static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) +{ + return container_of(inode, struct f2fs_inode_info, vfs_inode); +} + +static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_super_block *)(sbi->raw_super); +} + +static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_checkpoint *)(sbi->ckpt); +} + +static inline struct f2fs_node *F2FS_NODE(struct page *page) +{ + return (struct f2fs_node *)page_address(page); +} + +static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_nm_info *)(sbi->nm_info); +} + +static inline struct f2fs_sm_info *SM_I(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_sm_info *)(sbi->sm_info); +} + +static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi) +{ + return (struct sit_info *)(SM_I(sbi)->sit_info); +} + +static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi) +{ + return (struct free_segmap_info *)(SM_I(sbi)->free_info); +} + +static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi) +{ + return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); +} + +static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) +{ + sbi->s_dirty = 1; +} + +static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) +{ + sbi->s_dirty = 0; +} + +static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) +{ + return le64_to_cpu(cp->checkpoint_ver); +} + +static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + return ckpt_flags & f; +} + +static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + ckpt_flags |= f; + cp->ckpt_flags = cpu_to_le32(ckpt_flags); +} + +static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + ckpt_flags &= (~f); + cp->ckpt_flags = cpu_to_le32(ckpt_flags); +} + +static inline void mutex_lock_all(struct f2fs_sb_info *sbi) +{ + int i; + + for (i = 0; i < NR_GLOBAL_LOCKS; i++) { + /* + * This is the only time we take multiple fs_lock[] + * instances; the order is immaterial since we + * always hold cp_mutex, which serializes multiple + * such operations. + */ + mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); + } +} + +static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) +{ + int i = 0; + for (; i < NR_GLOBAL_LOCKS; i++) + mutex_unlock(&sbi->fs_lock[i]); +} + +static inline int mutex_lock_op(struct f2fs_sb_info *sbi) +{ + unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS; + int i = 0; + + for (; i < NR_GLOBAL_LOCKS; i++) + if (mutex_trylock(&sbi->fs_lock[i])) + return i; + + mutex_lock(&sbi->fs_lock[next_lock]); + sbi->next_lock_num++; + return next_lock; +} + +static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock) +{ + if (ilock < 0) + return; + BUG_ON(ilock >= NR_GLOBAL_LOCKS); + mutex_unlock(&sbi->fs_lock[ilock]); +} + +/* + * Check whether the given nid is within node id range. + */ +static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) +{ + WARN_ON((nid >= NM_I(sbi)->max_nid)); + if (nid >= NM_I(sbi)->max_nid) + return -EINVAL; + return 0; +} + +#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 + +/* + * Check whether the inode has blocks or not + */ +static inline int F2FS_HAS_BLOCKS(struct inode *inode) +{ + if (F2FS_I(inode)->i_xattr_nid) + return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); + else + return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); +} + +static inline int f2fs_handle_error(struct f2fs_sb_info *sbi) +{ + if (test_opt(sbi, ERRORS_PANIC)) + BUG(); + if (test_opt(sbi, ERRORS_RECOVER)) + return 1; + return 0; +} + +static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, blkcnt_t count) +{ + block_t valid_block_count; + + spin_lock(&sbi->stat_lock); + valid_block_count = + sbi->total_valid_block_count + (block_t)count; + if (valid_block_count > sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + inode->i_blocks += count; + sbi->total_valid_block_count = valid_block_count; + sbi->alloc_valid_block_count += (block_t)count; + spin_unlock(&sbi->stat_lock); + return true; +} + +static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, + blkcnt_t count) +{ + spin_lock(&sbi->stat_lock); + + if (sbi->total_valid_block_count < (block_t)count) { + pr_crit("F2FS-fs (%s): block accounting error: %u < %llu\n", + sbi->sb->s_id, sbi->total_valid_block_count, count); + f2fs_handle_error(sbi); + sbi->total_valid_block_count = count; + } + if (inode->i_blocks < count) { + pr_crit("F2FS-fs (%s): inode accounting error: %llu < %llu\n", + sbi->sb->s_id, inode->i_blocks, count); + f2fs_handle_error(sbi); + inode->i_blocks = count; + } + + inode->i_blocks -= count; + sbi->total_valid_block_count -= (block_t)count; + spin_unlock(&sbi->stat_lock); + return 0; +} + +static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) +{ + atomic_inc(&sbi->nr_pages[count_type]); + F2FS_SET_SB_DIRT(sbi); +} + +static inline void inode_inc_dirty_dents(struct inode *inode) +{ + atomic_inc(&F2FS_I(inode)->dirty_dents); +} + +static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) +{ + atomic_dec(&sbi->nr_pages[count_type]); +} + +static inline void inode_dec_dirty_dents(struct inode *inode) +{ + atomic_dec(&F2FS_I(inode)->dirty_dents); +} + +static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) +{ + return atomic_read(&sbi->nr_pages[count_type]); +} + +static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) +{ + unsigned int pages_per_sec = sbi->segs_per_sec * + (1 << sbi->log_blocks_per_seg); + return ((get_pages(sbi, block_type) + pages_per_sec - 1) + >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; +} + +static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) +{ + block_t ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_block_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + + /* return NAT or SIT bitmap */ + if (flag == NAT_BITMAP) + return le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); + else if (flag == SIT_BITMAP) + return le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); + + return 0; +} + +static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + int offset = (flag == NAT_BITMAP) ? + le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; + return &ckpt->sit_nat_version_bitmap + offset; +} + +static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) +{ + block_t start_addr; + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long long ckpt_version = cur_cp_version(ckpt); + + start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); + + /* + * odd numbered checkpoint should at cp segment 0 + * and even segent must be at cp segment 1 + */ + if (!(ckpt_version & 1)) + start_addr += sbi->blocks_per_seg; + + return start_addr; +} + +static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) +{ + return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); +} + +static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, + unsigned int count) +{ + block_t valid_block_count; + unsigned int valid_node_count; + + spin_lock(&sbi->stat_lock); + + valid_block_count = sbi->total_valid_block_count + (block_t)count; + valid_node_count = sbi->total_valid_node_count + count; + + if (valid_block_count > sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + + if (valid_node_count > sbi->total_node_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + + if (inode) + inode->i_blocks += count; + sbi->alloc_valid_block_count += (block_t)count; + sbi->total_valid_node_count = valid_node_count; + sbi->total_valid_block_count = valid_block_count; + spin_unlock(&sbi->stat_lock); + + return true; +} + +static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, + unsigned int count) +{ + spin_lock(&sbi->stat_lock); + + if (sbi->total_valid_block_count < count) { + pr_crit("F2FS-fs (%s): block accounting error: %u < %u\n", + sbi->sb->s_id, sbi->total_valid_block_count, count); + f2fs_handle_error(sbi); + sbi->total_valid_block_count = count; + } + if (sbi->total_valid_node_count < count) { + pr_crit("F2FS-fs (%s): node accounting error: %u < %u\n", + sbi->sb->s_id, sbi->total_valid_node_count, count); + f2fs_handle_error(sbi); + sbi->total_valid_node_count = count; + } + if (inode->i_blocks < count) { + pr_crit("F2FS-fs (%s): inode accounting error: %llu < %u\n", + sbi->sb->s_id, inode->i_blocks, count); + f2fs_handle_error(sbi); + inode->i_blocks = count; + } + + inode->i_blocks -= count; + sbi->total_valid_node_count -= count; + sbi->total_valid_block_count -= (block_t)count; + + spin_unlock(&sbi->stat_lock); +} + +static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) +{ + unsigned int ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_node_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count); + sbi->total_valid_inode_count++; + spin_unlock(&sbi->stat_lock); +} + +static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(!sbi->total_valid_inode_count); + sbi->total_valid_inode_count--; + spin_unlock(&sbi->stat_lock); + return 0; +} + +static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) +{ + unsigned int ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_inode_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline void f2fs_put_page(struct page *page, int unlock) +{ + if (!page || IS_ERR(page)) + return; + + if (unlock) { + BUG_ON(!PageLocked(page)); + unlock_page(page); + } + page_cache_release(page); +} + +static inline void f2fs_put_dnode(struct dnode_of_data *dn) +{ + if (dn->node_page) + f2fs_put_page(dn->node_page, 1); + if (dn->inode_page && dn->node_page != dn->inode_page) + f2fs_put_page(dn->inode_page, 0); + dn->node_page = NULL; + dn->inode_page = NULL; +} + +static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, + size_t size, void (*ctor)(void *)) +{ + return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); +} + +#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) + +static inline bool IS_INODE(struct page *page) +{ + struct f2fs_node *p = F2FS_NODE(page); + return RAW_IS_INODE(p); +} + +static inline __le32 *blkaddr_in_node(struct f2fs_node *node) +{ + return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr; +} + +static inline block_t datablock_addr(struct page *node_page, + unsigned int offset) +{ + struct f2fs_node *raw_node; + __le32 *addr_array; + raw_node = F2FS_NODE(node_page); + addr_array = blkaddr_in_node(raw_node); + return le32_to_cpu(addr_array[offset]); +} + +static inline int f2fs_test_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + return mask & *addr; +} + +static inline int f2fs_set_bit(unsigned int nr, char *addr) +{ + int mask; + int ret; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + ret = mask & *addr; + *addr |= mask; + return ret; +} + +static inline int f2fs_clear_bit(unsigned int nr, char *addr) +{ + int mask; + int ret; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + ret = mask & *addr; + *addr &= ~mask; + return ret; +} + +/* used for f2fs_inode_info->flags */ +enum { + FI_NEW_INODE, /* indicate newly allocated inode */ + FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_INC_LINK, /* need to increment i_nlink */ + FI_ACL_MODE, /* indicate acl mode */ + FI_NO_ALLOC, /* should not allocate any blocks */ + FI_UPDATE_DIR, /* should update inode block for consistency */ + FI_DELAY_IPUT, /* used for the recovery */ + FI_INLINE_XATTR, /* used for inline xattr */ +}; + +static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + set_bit(flag, &fi->flags); +} + +static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) +{ + return test_bit(flag, &fi->flags); +} + +static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + clear_bit(flag, &fi->flags); +} + +static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) +{ + fi->i_acl_mode = mode; + set_inode_flag(fi, FI_ACL_MODE); +} + +static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + if (is_inode_flag_set(fi, FI_ACL_MODE)) { + clear_inode_flag(fi, FI_ACL_MODE); + return 1; + } + return 0; +} + +int f2fs_android_emu(struct f2fs_sb_info *, struct inode *, u32 *, u32 *, + umode_t *); + +#define IS_ANDROID_EMU(sbi, fi, pfi) \ + (test_opt((sbi), ANDROID_EMU) && \ + (((fi)->i_advise & FADVISE_ANDROID_EMU) || \ + ((pfi)->i_advise & FADVISE_ANDROID_EMU))) + +static inline void get_inline_info(struct f2fs_inode_info *fi, + struct f2fs_inode *ri) +{ + if (ri->i_inline & F2FS_INLINE_XATTR) + set_inode_flag(fi, FI_INLINE_XATTR); +} + +static inline void set_raw_inline(struct f2fs_inode_info *fi, + struct f2fs_inode *ri) +{ + ri->i_inline = 0; + + if (is_inode_flag_set(fi, FI_INLINE_XATTR)) + ri->i_inline |= F2FS_INLINE_XATTR; +} + +static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) +{ + if (is_inode_flag_set(fi, FI_INLINE_XATTR)) + return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; + return DEF_ADDRS_PER_INODE; +} + +static inline void *inline_xattr_addr(struct page *page) +{ + struct f2fs_inode *ri; + ri = (struct f2fs_inode *)page_address(page); + return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - + F2FS_INLINE_XATTR_ADDRS]); +} + +static inline int inline_xattr_size(struct inode *inode) +{ + if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR)) + return F2FS_INLINE_XATTR_ADDRS << 2; + else + return 0; +} + +static inline int f2fs_readonly(struct super_block *sb) +{ + return sb->s_flags & MS_RDONLY; +} + +/* + * file.c + */ +int f2fs_sync_file(struct file *, loff_t, loff_t, int); +void truncate_data_blocks(struct dnode_of_data *); +void f2fs_truncate(struct inode *); +int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +int f2fs_setattr(struct dentry *, struct iattr *); +int truncate_hole(struct inode *, pgoff_t, pgoff_t); +int truncate_data_blocks_range(struct dnode_of_data *, int); +long f2fs_ioctl(struct file *, unsigned int, unsigned long); +long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); + +/* + * inode.c + */ +void f2fs_set_inode_flags(struct inode *); +struct inode *f2fs_iget(struct super_block *, unsigned long); +void update_inode(struct inode *, struct page *); +int update_inode_page(struct inode *); +int f2fs_write_inode(struct inode *, struct writeback_control *); +void f2fs_evict_inode(struct inode *); + +/* + * namei.c + */ +struct dentry *f2fs_get_parent(struct dentry *child); + +/* + * dir.c + */ +struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, + struct page **); +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); +ino_t f2fs_inode_by_name(struct inode *, struct qstr *); +void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, + struct page *, struct inode *); +int update_dent_inode(struct inode *, const struct qstr *); +int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); +void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +int f2fs_make_empty(struct inode *, struct inode *); +bool f2fs_empty_dir(struct inode *); + +static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) +{ + return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, + inode); +} + +/* + * super.c + */ +int f2fs_sync_fs(struct super_block *, int); +extern __printf(3, 4) +void f2fs_msg(struct super_block *, const char *, const char *, ...); + +/* + * hash.c + */ +f2fs_hash_t f2fs_dentry_hash(const char *, size_t); + +/* + * node.c + */ +struct dnode_of_data; +struct node_info; + +int is_checkpointed_node(struct f2fs_sb_info *, nid_t); +void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); +int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); +int truncate_inode_blocks(struct inode *, pgoff_t); +int truncate_xattr_node(struct inode *, struct page *); +int remove_inode_page(struct inode *); +struct page *new_inode_page(struct inode *, const struct qstr *); +struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); +void ra_node_page(struct f2fs_sb_info *, nid_t); +struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); +struct page *get_node_page_ra(struct page *, int); +void sync_inode_page(struct dnode_of_data *); +int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); +bool alloc_nid(struct f2fs_sb_info *, nid_t *); +void alloc_nid_done(struct f2fs_sb_info *, nid_t); +void alloc_nid_failed(struct f2fs_sb_info *, nid_t); +void recover_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, struct node_info *, block_t); +int recover_inode_page(struct f2fs_sb_info *, struct page *); +int restore_node_summary(struct f2fs_sb_info *, unsigned int, + struct f2fs_summary_block *); +void flush_nat_entries(struct f2fs_sb_info *); +int build_node_manager(struct f2fs_sb_info *); +void destroy_node_manager(struct f2fs_sb_info *); +int __init create_node_manager_caches(void); +void destroy_node_manager_caches(void); + +/* + * segment.c + */ +void f2fs_balance_fs(struct f2fs_sb_info *); +void invalidate_blocks(struct f2fs_sb_info *, block_t); +void clear_prefree_segments(struct f2fs_sb_info *); +int npages_for_summary_flush(struct f2fs_sb_info *); +void allocate_new_segments(struct f2fs_sb_info *); +struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); +struct bio *f2fs_bio_alloc(struct block_device *, int); +void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool); +void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); +void write_meta_page(struct f2fs_sb_info *, struct page *); +void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, + block_t, block_t *); +void write_data_page(struct inode *, struct page *, struct dnode_of_data*, + block_t, block_t *); +void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); +void recover_data_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, block_t, block_t); +void rewrite_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, block_t, block_t); +void write_data_summaries(struct f2fs_sb_info *, block_t); +void write_node_summaries(struct f2fs_sb_info *, block_t); +int lookup_journal_in_cursum(struct f2fs_summary_block *, + int, unsigned int, int); +void flush_sit_entries(struct f2fs_sb_info *); +int build_segment_manager(struct f2fs_sb_info *); +void destroy_segment_manager(struct f2fs_sb_info *); + +/* + * checkpoint.c + */ +struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); +struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); +long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); +int acquire_orphan_inode(struct f2fs_sb_info *); +void release_orphan_inode(struct f2fs_sb_info *); +void add_orphan_inode(struct f2fs_sb_info *, nid_t); +void remove_orphan_inode(struct f2fs_sb_info *, nid_t); +int recover_orphan_inodes(struct f2fs_sb_info *); +int get_valid_checkpoint(struct f2fs_sb_info *); +void set_dirty_dir_page(struct inode *, struct page *); +void add_dirty_dir_inode(struct inode *); +void remove_dirty_dir_inode(struct inode *); +struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); +void sync_dirty_dir_inodes(struct f2fs_sb_info *); +void write_checkpoint(struct f2fs_sb_info *, bool); +void init_orphan_info(struct f2fs_sb_info *); +int __init create_checkpoint_caches(void); +void destroy_checkpoint_caches(void); + +/* + * data.c + */ +int reserve_new_block(struct dnode_of_data *); +void update_extent_cache(block_t, struct dnode_of_data *); +struct page *find_data_page(struct inode *, pgoff_t, bool); +struct page *get_lock_data_page(struct inode *, pgoff_t); +struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); +int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); +int do_write_data_page(struct page *); + +/* + * gc.c + */ +int start_gc_thread(struct f2fs_sb_info *); +void stop_gc_thread(struct f2fs_sb_info *); +block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); +int f2fs_gc(struct f2fs_sb_info *); +void build_gc_manager(struct f2fs_sb_info *); +int __init create_gc_caches(void); +void destroy_gc_caches(void); + +/* + * recovery.c + */ +int recover_fsync_data(struct f2fs_sb_info *); +bool space_for_roll_forward(struct f2fs_sb_info *); + +/* + * debug.c + */ +#ifdef CONFIG_F2FS_STAT_FS +struct f2fs_stat_info { + struct list_head stat_list; + struct f2fs_sb_info *sbi; + struct mutex stat_lock; + int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; + int main_area_segs, main_area_sections, main_area_zones; + int hit_ext, total_ext; + int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; + int nats, sits, fnids; + int total_count, utilization; + int bg_gc; + unsigned int valid_count, valid_node_count, valid_inode_count; + unsigned int bimodal, avg_vblocks; + int util_free, util_valid, util_invalid; + int rsvd_segs, overp_segs; + int dirty_count, node_pages, meta_pages; + int prefree_count, call_count; + int tot_segs, node_segs, data_segs, free_segs, free_secs; + int tot_blks, data_blks, node_blks; + int curseg[NR_CURSEG_TYPE]; + int cursec[NR_CURSEG_TYPE]; + int curzone[NR_CURSEG_TYPE]; + + unsigned int segment_count[2]; + unsigned int block_count[2]; + unsigned base_mem, cache_mem; +}; + +static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_stat_info*)sbi->stat_info; +} + +#define stat_inc_call_count(si) ((si)->call_count++) + +#define stat_inc_seg_count(sbi, type) \ + do { \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ + (si)->tot_segs++; \ + if (type == SUM_TYPE_DATA) \ + si->data_segs++; \ + else \ + si->node_segs++; \ + } while (0) + +#define stat_inc_tot_blk_count(si, blks) \ + (si->tot_blks += (blks)) + +#define stat_inc_data_blk_count(sbi, blks) \ + do { \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ + stat_inc_tot_blk_count(si, blks); \ + si->data_blks += (blks); \ + } while (0) + +#define stat_inc_node_blk_count(sbi, blks) \ + do { \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ + stat_inc_tot_blk_count(si, blks); \ + si->node_blks += (blks); \ + } while (0) + +int f2fs_build_stats(struct f2fs_sb_info *); +void f2fs_destroy_stats(struct f2fs_sb_info *); +void __init f2fs_create_root_stats(void); +void f2fs_destroy_root_stats(void); +#else +#define stat_inc_call_count(si) +#define stat_inc_seg_count(si, type) +#define stat_inc_tot_blk_count(si, blks) +#define stat_inc_data_blk_count(si, blks) +#define stat_inc_node_blk_count(sbi, blks) + +static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } +static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } +static inline void __init f2fs_create_root_stats(void) { } +static inline void f2fs_destroy_root_stats(void) { } +#endif + +extern const struct file_operations f2fs_dir_operations; +extern const struct file_operations f2fs_file_operations; +extern const struct inode_operations f2fs_file_inode_operations; +extern const struct address_space_operations f2fs_dblock_aops; +extern const struct address_space_operations f2fs_node_aops; +extern const struct address_space_operations f2fs_meta_aops; +extern const struct inode_operations f2fs_dir_inode_operations; +extern const struct inode_operations f2fs_symlink_inode_operations; +extern const struct inode_operations f2fs_special_inode_operations; +#endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c new file mode 100644 index 00000000000..5249b9fb114 --- /dev/null +++ b/fs/f2fs/file.c @@ -0,0 +1,727 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/file.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "xattr.h" +#include "acl.h" +#include + +static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct page *page = vmf->page; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + block_t old_blk_addr; + struct dnode_of_data dn; + int err, ilock; + + f2fs_balance_fs(sbi); + + /* Wait if fs is frozen. This is racy so we check again later on + * and retry if the fs has been frozen after the page lock has + * been acquired + */ + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + + /* block allocation */ + ilock = mutex_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); + if (err) { + mutex_unlock_op(sbi, ilock); + goto out; + } + + old_blk_addr = dn.data_blkaddr; + + if (old_blk_addr == NULL_ADDR) { + err = reserve_new_block(&dn); + if (err) { + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + goto out; + } + } + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + + file_update_time(vma->vm_file); + lock_page(page); + if (page->mapping != inode->i_mapping || + page_offset(page) > i_size_read(inode) || + !PageUptodate(page)) { + unlock_page(page); + err = -EFAULT; + goto out; + } + + /* + * check to see if the page is mapped already (no holes) + */ + if (PageMappedToDisk(page)) + goto mapped; + + /* page is wholly or partially inside EOF */ + if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { + unsigned offset; + offset = i_size_read(inode) & ~PAGE_CACHE_MASK; + zero_user_segment(page, offset, PAGE_CACHE_SIZE); + } + set_page_dirty(page); + SetPageUptodate(page); + +mapped: + /* fill the page */ + wait_on_page_writeback(page); +out: + return block_page_mkwrite_return(err); +} + +static const struct vm_operations_struct f2fs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = f2fs_vm_page_mkwrite, +}; + +static int get_parent_ino(struct inode *inode, nid_t *pino) +{ + struct dentry *dentry; + + inode = igrab(inode); + + /* Alex - the following is equivalent to: dentry = d_find_any_alias(inode); */ + dentry = NULL; + spin_lock(&inode->i_lock); + if (!list_empty(&inode->i_dentry)) { + dentry = list_first_entry(&inode->i_dentry, + struct dentry, d_alias); + dget(dentry); + } + spin_unlock(&inode->i_lock); + + iput(inode); + if (!dentry) + return 0; + + if (update_dent_inode(inode, &dentry->d_name)) { + dput(dentry); + return 0; + } + + *pino = parent_ino(dentry); + dput(dentry); + return 1; +} + +int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) +{ + struct inode *inode = file->f_mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ret = 0; + bool need_cp = false; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + + if (f2fs_readonly(inode->i_sb)) + return 0; + + trace_f2fs_sync_file_enter(inode); + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) { + trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + return ret; + } + + /* guarantee free sections for fsync */ + f2fs_balance_fs(sbi); + + mutex_lock(&inode->i_mutex); + + /* + * Both of fdatasync() and fsync() are able to be recovered from + * sudden-power-off. + */ + if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) + need_cp = true; + else if (file_wrong_pino(inode)) + need_cp = true; + else if (!space_for_roll_forward(sbi)) + need_cp = true; + else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) + need_cp = true; + else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) + need_cp = true; + + if (need_cp) { + nid_t pino; + + F2FS_I(inode)->xattr_ver = 0; + + /* all the dirty node pages should be flushed for POR */ + ret = f2fs_sync_fs(inode->i_sb, 1); + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + F2FS_I(inode)->i_pino = pino; + file_got_pino(inode); + mark_inode_dirty_sync(inode); + ret = f2fs_write_inode(inode, NULL); + if (ret) + goto out; + } + } else { + /* if there is no written node page, write its inode page */ + while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { + mark_inode_dirty_sync(inode); + ret = f2fs_write_inode(inode, NULL); + if (ret) + goto out; + } + filemap_fdatawait_range(sbi->node_inode->i_mapping, + 0, LONG_MAX); + ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + } +out: + mutex_unlock(&inode->i_mutex); + trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + return ret; +} + +static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + file_accessed(file); + vma->vm_ops = &f2fs_file_vm_ops; + return 0; +} + +int truncate_data_blocks_range(struct dnode_of_data *dn, int count) +{ + int nr_free = 0, ofs = dn->ofs_in_node; + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct f2fs_node *raw_node; + __le32 *addr; + + raw_node = F2FS_NODE(dn->node_page); + addr = blkaddr_in_node(raw_node) + ofs; + + for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { + block_t blkaddr = le32_to_cpu(*addr); + if (blkaddr == NULL_ADDR) + continue; + + update_extent_cache(NULL_ADDR, dn); + invalidate_blocks(sbi, blkaddr); + nr_free++; + } + if (nr_free) { + dec_valid_block_count(sbi, dn->inode, nr_free); + set_page_dirty(dn->node_page); + sync_inode_page(dn); + } + dn->ofs_in_node = ofs; + + trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, + dn->ofs_in_node, nr_free); + return nr_free; +} + +void truncate_data_blocks(struct dnode_of_data *dn) +{ + truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); +} + +static void truncate_partial_data_page(struct inode *inode, u64 from) +{ + unsigned offset = from & (PAGE_CACHE_SIZE - 1); + struct page *page; + + if (!offset) + return; + + page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); + if (IS_ERR(page)) + return; + + lock_page(page); + if (page->mapping != inode->i_mapping) { + f2fs_put_page(page, 1); + return; + } + wait_on_page_writeback(page); + zero_user(page, offset, PAGE_CACHE_SIZE - offset); + set_page_dirty(page); + f2fs_put_page(page, 1); +} + +static int truncate_blocks(struct inode *inode, u64 from) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + unsigned int blocksize = inode->i_sb->s_blocksize; + struct dnode_of_data dn; + pgoff_t free_from; + int count = 0, ilock = -1; + int err; + + trace_f2fs_truncate_blocks_enter(inode, from); + + free_from = (pgoff_t) + ((from + blocksize - 1) >> (sbi->log_blocksize)); + + ilock = mutex_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); + if (err) { + if (err == -ENOENT) + goto free_next; + mutex_unlock_op(sbi, ilock); + trace_f2fs_truncate_blocks_exit(inode, err); + return err; + } + + if (IS_INODE(dn.node_page)) + count = ADDRS_PER_INODE(F2FS_I(inode)); + else + count = ADDRS_PER_BLOCK; + + count -= dn.ofs_in_node; + BUG_ON(count < 0); + + if (dn.ofs_in_node || IS_INODE(dn.node_page)) { + truncate_data_blocks_range(&dn, count); + free_from += count; + } + + f2fs_put_dnode(&dn); +free_next: + err = truncate_inode_blocks(inode, free_from); + mutex_unlock_op(sbi, ilock); + + /* lastly zero out the first data page */ + truncate_partial_data_page(inode, from); + + trace_f2fs_truncate_blocks_exit(inode, err); + return err; +} + +void f2fs_truncate(struct inode *inode) +{ + int err; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; + + trace_f2fs_truncate(inode); + + err = truncate_blocks(inode, i_size_read(inode)); + if (err) { + f2fs_msg(inode->i_sb, KERN_ERR, "truncate failed with %d", + err); + f2fs_handle_error(F2FS_SB(inode->i_sb)); + } else { + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } +} + +int f2fs_getattr(struct vfsmount *mnt, + struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + generic_fillattr(inode, stat); + stat->blocks <<= 3; + return 0; +} + +#ifdef CONFIG_F2FS_FS_POSIX_ACL +static void __setattr_copy(struct inode *inode, const struct iattr *attr) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int ia_valid = attr->ia_valid; + + if (ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + if (ia_valid & ATTR_ATIME) + inode->i_atime = timespec_trunc(attr->ia_atime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MTIME) + inode->i_mtime = timespec_trunc(attr->ia_mtime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_CTIME) + inode->i_ctime = timespec_trunc(attr->ia_ctime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MODE) { + umode_t mode = attr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + set_acl_inode(fi, mode); + } +} +#else +#define __setattr_copy setattr_copy +#endif + +int f2fs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_inode_info *pfi = F2FS_I(dentry->d_parent->d_inode); + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int err; + + err = inode_change_ok(inode, attr); + if (err) + return err; + + if (IS_ANDROID_EMU(sbi, fi, pfi)) + f2fs_android_emu(sbi, inode, &attr->ia_uid, &attr->ia_gid, + &attr->ia_mode); + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + truncate_setsize(inode, attr->ia_size); + f2fs_truncate(inode); + f2fs_balance_fs(F2FS_SB(inode->i_sb)); + } + + __setattr_copy(inode, attr); + + if (attr->ia_valid & ATTR_MODE) { + err = f2fs_acl_chmod(inode); + if (err || is_inode_flag_set(fi, FI_ACL_MODE)) { + inode->i_mode = fi->i_acl_mode; + clear_inode_flag(fi, FI_ACL_MODE); + } + } + + mark_inode_dirty(inode); + return err; +} + +const struct inode_operations f2fs_file_inode_operations = { + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .get_acl = f2fs_get_acl, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +static void fill_zero(struct inode *inode, pgoff_t index, + loff_t start, loff_t len) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + int ilock; + + if (!len) + return; + + f2fs_balance_fs(sbi); + + ilock = mutex_lock_op(sbi); + page = get_new_data_page(inode, NULL, index, false); + mutex_unlock_op(sbi, ilock); + + if (!IS_ERR(page)) { + wait_on_page_writeback(page); + zero_user(page, start, len); + set_page_dirty(page); + f2fs_put_page(page, 1); + } +} + +int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) +{ + pgoff_t index; + int err; + + for (index = pg_start; index < pg_end; index++) { + struct dnode_of_data dn; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) { + if (err == -ENOENT) + continue; + return err; + } + + if (dn.data_blkaddr != NULL_ADDR) + truncate_data_blocks_range(&dn, 1); + f2fs_put_dnode(&dn); + } + return 0; +} + +static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) +{ + pgoff_t pg_start, pg_end; + loff_t off_start, off_end; + int ret = 0; + + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; + + off_start = offset & (PAGE_CACHE_SIZE - 1); + off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); + + if (pg_start == pg_end) { + fill_zero(inode, pg_start, off_start, + off_end - off_start); + } else { + if (off_start) + fill_zero(inode, pg_start++, off_start, + PAGE_CACHE_SIZE - off_start); + if (off_end) + fill_zero(inode, pg_end, 0, off_end); + + if (pg_start < pg_end) { + struct address_space *mapping = inode->i_mapping; + loff_t blk_start, blk_end; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; + + f2fs_balance_fs(sbi); + + blk_start = pg_start << PAGE_CACHE_SHIFT; + blk_end = pg_end << PAGE_CACHE_SHIFT; + truncate_inode_pages_range(mapping, blk_start, + blk_end - 1); + + ilock = mutex_lock_op(sbi); + ret = truncate_hole(inode, pg_start, pg_end); + mutex_unlock_op(sbi, ilock); + } + } + + if (!(mode & FALLOC_FL_KEEP_SIZE) && + i_size_read(inode) <= (offset + len)) { + i_size_write(inode, offset); + mark_inode_dirty(inode); + } + + return ret; +} + +static int expand_inode_data(struct inode *inode, loff_t offset, + loff_t len, int mode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + pgoff_t index, pg_start, pg_end; + loff_t new_size = i_size_read(inode); + loff_t off_start, off_end; + int ret = 0; + + ret = inode_newsize_ok(inode, (len + offset)); + if (ret) + return ret; + + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; + + off_start = offset & (PAGE_CACHE_SIZE - 1); + off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); + + for (index = pg_start; index <= pg_end; index++) { + struct dnode_of_data dn; + int ilock; + + ilock = mutex_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, index, ALLOC_NODE); + if (ret) { + mutex_unlock_op(sbi, ilock); + break; + } + + if (dn.data_blkaddr == NULL_ADDR) { + ret = reserve_new_block(&dn); + if (ret) { + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + break; + } + } + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + + if (pg_start == pg_end) + new_size = offset + len; + else if (index == pg_start && off_start) + new_size = (index + 1) << PAGE_CACHE_SHIFT; + else if (index == pg_end) + new_size = (index << PAGE_CACHE_SHIFT) + off_end; + else + new_size += PAGE_CACHE_SIZE; + } + + if (!(mode & FALLOC_FL_KEEP_SIZE) && + i_size_read(inode) < new_size) { + i_size_write(inode, new_size); + mark_inode_dirty(inode); + } + + return ret; +} + +static long f2fs_fallocate(struct file *file, int mode, + loff_t offset, loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + long ret; + + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + + if (mode & FALLOC_FL_PUNCH_HOLE) + ret = punch_hole(inode, offset, len, mode); + else + ret = expand_inode_data(inode, offset, len, mode); + + if (!ret) { + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } + trace_f2fs_fallocate(inode, mode, offset, len, ret); + return ret; +} + +#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) +#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) + +static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & F2FS_REG_FLMASK; + else + return flags & F2FS_OTHER_FLMASK; +} + +long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int flags; + int ret; + + switch (cmd) { + case F2FS_IOC_GETFLAGS: + flags = fi->i_flags & FS_FL_USER_VISIBLE; + return put_user(flags, (int __user *) arg); + case F2FS_IOC_SETFLAGS: + { + unsigned int oldflags; + + ret = mnt_want_write(filp->f_path.mnt); + if (ret) + return ret; + + if (!inode_owner_or_capable(inode)) { + ret = -EACCES; + goto out; + } + + if (get_user(flags, (int __user *) arg)) { + ret = -EFAULT; + goto out; + } + + flags = f2fs_mask_flags(inode->i_mode, flags); + + mutex_lock(&inode->i_mutex); + + oldflags = fi->i_flags; + + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { + if (!capable(CAP_LINUX_IMMUTABLE)) { + mutex_unlock(&inode->i_mutex); + ret = -EPERM; + goto out; + } + } + + flags = flags & FS_FL_USER_MODIFIABLE; + flags |= oldflags & ~FS_FL_USER_MODIFIABLE; + fi->i_flags = flags; + mutex_unlock(&inode->i_mutex); + + f2fs_set_inode_flags(inode); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); +out: + mnt_drop_write(filp->f_path.mnt); + return ret; + } + default: + return -ENOTTY; + } +} + +#ifdef CONFIG_COMPAT +long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case F2FS_IOC32_GETFLAGS: + cmd = F2FS_IOC_GETFLAGS; + break; + case F2FS_IOC32_SETFLAGS: + cmd = F2FS_IOC_SETFLAGS; + break; + default: + return -ENOIOCTLCMD; + } + return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); +} +#endif + +const struct file_operations f2fs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, + .open = generic_file_open, + .mmap = f2fs_file_mmap, + .fsync = f2fs_sync_file, + .fallocate = f2fs_fallocate, + .unlocked_ioctl = f2fs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = f2fs_compat_ioctl, +#endif + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, +}; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c new file mode 100644 index 00000000000..df447eab869 --- /dev/null +++ b/fs/f2fs/gc.c @@ -0,0 +1,740 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/gc.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "gc.h" +#include + +static struct kmem_cache *winode_slab; + +static int gc_thread_func(void *data) +{ + struct f2fs_sb_info *sbi = data; + struct f2fs_gc_kthread *gc_th = sbi->gc_thread; + wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; + long wait_ms; + + wait_ms = gc_th->min_sleep_time; + + do { + if (try_to_freeze()) + continue; + else + wait_event_interruptible_timeout(*wq, + kthread_should_stop(), + msecs_to_jiffies(wait_ms)); + if (kthread_should_stop()) + break; + + /* + * [GC triggering condition] + * 0. GC is not conducted currently. + * 1. There are enough dirty segments. + * 2. IO subsystem is idle by checking the # of writeback pages. + * 3. IO subsystem is idle by checking the # of requests in + * bdev's request list. + * + * Note) We have to avoid triggering GCs too much frequently. + * Because it is possible that some segments can be + * invalidated soon after by user update or deletion. + * So, I'd like to wait some time to collect dirty segments. + */ + if (!mutex_trylock(&sbi->gc_mutex)) + continue; + + if (!is_idle(sbi)) { + wait_ms = increase_sleep_time(gc_th, wait_ms); + mutex_unlock(&sbi->gc_mutex); + continue; + } + + if (has_enough_invalid_blocks(sbi)) + wait_ms = decrease_sleep_time(gc_th, wait_ms); + else + wait_ms = increase_sleep_time(gc_th, wait_ms); + +#ifdef CONFIG_F2FS_STAT_FS + sbi->bg_gc++; +#endif + + /* if return value is not zero, no victim was selected */ + if (f2fs_gc(sbi)) + wait_ms = gc_th->no_gc_sleep_time; + } while (!kthread_should_stop()); + return 0; +} + +int start_gc_thread(struct f2fs_sb_info *sbi) +{ + struct f2fs_gc_kthread *gc_th; + dev_t dev = sbi->sb->s_bdev->bd_dev; + int err = 0; + + if (!test_opt(sbi, BG_GC)) + goto out; + gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); + if (!gc_th) { + err = -ENOMEM; + goto out; + } + + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + + gc_th->gc_idle = 0; + + sbi->gc_thread = gc_th; + init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); + sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, + "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); + if (IS_ERR(gc_th->f2fs_gc_task)) { + err = PTR_ERR(gc_th->f2fs_gc_task); + kfree(gc_th); + sbi->gc_thread = NULL; + } + +out: + return err; +} + +void stop_gc_thread(struct f2fs_sb_info *sbi) +{ + struct f2fs_gc_kthread *gc_th = sbi->gc_thread; + if (!gc_th) + return; + kthread_stop(gc_th->f2fs_gc_task); + kfree(gc_th); + sbi->gc_thread = NULL; +} + +static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) +{ + int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; + + if (gc_th && gc_th->gc_idle) { + if (gc_th->gc_idle == 1) + gc_mode = GC_CB; + else if (gc_th->gc_idle == 2) + gc_mode = GC_GREEDY; + } + return gc_mode; +} + +static void select_policy(struct f2fs_sb_info *sbi, int gc_type, + int type, struct victim_sel_policy *p) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + if (p->alloc_mode == SSR) { + p->gc_mode = GC_GREEDY; + p->dirty_segmap = dirty_i->dirty_segmap[type]; + p->ofs_unit = 1; + } else { + p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); + p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; + p->ofs_unit = sbi->segs_per_sec; + } + p->offset = sbi->last_victim[p->gc_mode]; +} + +static unsigned int get_max_cost(struct f2fs_sb_info *sbi, + struct victim_sel_policy *p) +{ + /* SSR allocates in a segment unit */ + if (p->alloc_mode == SSR) + return 1 << sbi->log_blocks_per_seg; + if (p->gc_mode == GC_GREEDY) + return (1 << sbi->log_blocks_per_seg) * p->ofs_unit; + else if (p->gc_mode == GC_CB) + return UINT_MAX; + else /* No other gc_mode */ + return 0; +} + +static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int hint = 0; + unsigned int secno; + + /* + * If the gc_type is FG_GC, we can select victim segments + * selected by background GC before. + * Those segments guarantee they have small valid blocks. + */ +next: + secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++); + if (secno < TOTAL_SECS(sbi)) { + if (sec_usage_check(sbi, secno)) + goto next; + clear_bit(secno, dirty_i->victim_secmap); + return secno * sbi->segs_per_sec; + } + return NULL_SEGNO; +} + +static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int secno = GET_SECNO(sbi, segno); + unsigned int start = secno * sbi->segs_per_sec; + unsigned long long mtime = 0; + unsigned int vblocks; + unsigned char age = 0; + unsigned char u; + unsigned int i; + + for (i = 0; i < sbi->segs_per_sec; i++) + mtime += get_seg_entry(sbi, start + i)->mtime; + vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + + mtime = div_u64(mtime, sbi->segs_per_sec); + vblocks = div_u64(vblocks, sbi->segs_per_sec); + + u = (vblocks * 100) >> sbi->log_blocks_per_seg; + + /* Handle if the system time is changed by user */ + if (mtime < sit_i->min_mtime) + sit_i->min_mtime = mtime; + if (mtime > sit_i->max_mtime) + sit_i->max_mtime = mtime; + if (sit_i->max_mtime != sit_i->min_mtime) + age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime), + sit_i->max_mtime - sit_i->min_mtime); + + return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); +} + +static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, + struct victim_sel_policy *p) +{ + if (p->alloc_mode == SSR) + return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + + /* alloc_mode == LFS */ + if (p->gc_mode == GC_GREEDY) + return get_valid_blocks(sbi, segno, sbi->segs_per_sec); + else + return get_cb_cost(sbi, segno); +} + +/* + * This function is called from two paths. + * One is garbage collection and the other is SSR segment selection. + * When it is called during GC, it just gets a victim segment + * and it does not remove it from dirty seglist. + * When it is called from SSR segment selection, it finds a segment + * which has minimum valid blocks and removes it from dirty seglist. + */ +static int get_victim_by_default(struct f2fs_sb_info *sbi, + unsigned int *result, int gc_type, int type, char alloc_mode) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct victim_sel_policy p; + unsigned int secno, max_cost; + int nsearched = 0; + + p.alloc_mode = alloc_mode; + select_policy(sbi, gc_type, type, &p); + + p.min_segno = NULL_SEGNO; + p.min_cost = max_cost = get_max_cost(sbi, &p); + + mutex_lock(&dirty_i->seglist_lock); + + if (p.alloc_mode == LFS && gc_type == FG_GC) { + p.min_segno = check_bg_victims(sbi); + if (p.min_segno != NULL_SEGNO) + goto got_it; + } + + while (1) { + unsigned long cost; + unsigned int segno; + + segno = find_next_bit(p.dirty_segmap, + TOTAL_SEGS(sbi), p.offset); + if (segno >= TOTAL_SEGS(sbi)) { + if (sbi->last_victim[p.gc_mode]) { + sbi->last_victim[p.gc_mode] = 0; + p.offset = 0; + continue; + } + break; + } + p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; + secno = GET_SECNO(sbi, segno); + + if (sec_usage_check(sbi, secno)) + continue; + if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) + continue; + + cost = get_gc_cost(sbi, segno, &p); + + if (p.min_cost > cost) { + p.min_segno = segno; + p.min_cost = cost; + } + + if (cost == max_cost) + continue; + + if (nsearched++ >= MAX_VICTIM_SEARCH) { + sbi->last_victim[p.gc_mode] = segno; + break; + } + } + if (p.min_segno != NULL_SEGNO) { +got_it: + if (p.alloc_mode == LFS) { + secno = GET_SECNO(sbi, p.min_segno); + if (gc_type == FG_GC) + sbi->cur_victim_sec = secno; + else + set_bit(secno, dirty_i->victim_secmap); + } + *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; + + trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, + sbi->cur_victim_sec, + prefree_segments(sbi), free_segments(sbi)); + } + mutex_unlock(&dirty_i->seglist_lock); + + return (p.min_segno == NULL_SEGNO) ? 0 : 1; +} + +static const struct victim_selection default_v_ops = { + .get_victim = get_victim_by_default, +}; + +static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) +{ + struct inode_entry *ie; + + list_for_each_entry(ie, ilist, list) + if (ie->inode->i_ino == ino) + return ie->inode; + return NULL; +} + +static void add_gc_inode(struct inode *inode, struct list_head *ilist) +{ + struct inode_entry *new_ie; + + if (inode == find_gc_inode(inode->i_ino, ilist)) { + iput(inode); + return; + } +repeat: + new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); + if (!new_ie) { + cond_resched(); + goto repeat; + } + new_ie->inode = inode; + list_add_tail(&new_ie->list, ilist); +} + +static void put_gc_inode(struct list_head *ilist) +{ + struct inode_entry *ie, *next_ie; + list_for_each_entry_safe(ie, next_ie, ilist, list) { + iput(ie->inode); + list_del(&ie->list); + kmem_cache_free(winode_slab, ie); + } +} + +static int check_valid_map(struct f2fs_sb_info *sbi, + unsigned int segno, int offset) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct seg_entry *sentry; + int ret; + + mutex_lock(&sit_i->sentry_lock); + sentry = get_seg_entry(sbi, segno); + ret = f2fs_test_bit(offset, sentry->cur_valid_map); + mutex_unlock(&sit_i->sentry_lock); + return ret; +} + +/* + * This function compares node address got in summary with that in NAT. + * On validity, copy that node with cold status, otherwise (invalid node) + * ignore that. + */ +static void gc_node_segment(struct f2fs_sb_info *sbi, + struct f2fs_summary *sum, unsigned int segno, int gc_type) +{ + bool initial = true; + struct f2fs_summary *entry; + int off; + +next_step: + entry = sum; + + for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { + nid_t nid = le32_to_cpu(entry->nid); + struct page *node_page; + + /* stop BG_GC if there is not enough free sections. */ + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) + return; + + if (check_valid_map(sbi, segno, off) == 0) + continue; + + if (initial) { + ra_node_page(sbi, nid); + continue; + } + node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + continue; + + /* set page dirty and write it */ + if (gc_type == FG_GC) { + f2fs_wait_on_page_writeback(node_page, NODE, true); + set_page_dirty(node_page); + } else { + if (!PageWriteback(node_page)) + set_page_dirty(node_page); + } + f2fs_put_page(node_page, 1); + stat_inc_node_blk_count(sbi, 1); + } + + if (initial) { + initial = false; + goto next_step; + } + + if (gc_type == FG_GC) { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + sync_node_pages(sbi, 0, &wbc); + + /* + * In the case of FG_GC, it'd be better to reclaim this victim + * completely. + */ + if (get_valid_blocks(sbi, segno, 1) != 0) + goto next_step; + } +} + +/* + * Calculate start block index indicating the given node offset. + * Be careful, caller should give this node offset only indicating direct node + * blocks. If any node offsets, which point the other types of node blocks such + * as indirect or double indirect node blocks, are given, it must be a caller's + * bug. + */ +block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi) +{ + unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; + unsigned int bidx; + + if (node_ofs == 0) + return 0; + + if (node_ofs <= 2) { + bidx = node_ofs - 1; + } else if (node_ofs <= indirect_blks) { + int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); + bidx = node_ofs - 2 - dec; + } else { + int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); + bidx = node_ofs - 5 - dec; + } + return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); +} + +static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + struct node_info *dni, block_t blkaddr, unsigned int *nofs) +{ + struct page *node_page; + nid_t nid; + unsigned int ofs_in_node; + block_t source_blkaddr; + + nid = le32_to_cpu(sum->nid); + ofs_in_node = le16_to_cpu(sum->ofs_in_node); + + node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + return 0; + + get_node_info(sbi, nid, dni); + + if (sum->version != dni->version) { + f2fs_put_page(node_page, 1); + return 0; + } + + *nofs = ofs_of_node(node_page); + source_blkaddr = datablock_addr(node_page, ofs_in_node); + f2fs_put_page(node_page, 1); + + if (source_blkaddr != blkaddr) + return 0; + return 1; +} + +static void move_data_page(struct inode *inode, struct page *page, int gc_type) +{ + if (gc_type == BG_GC) { + if (PageWriteback(page)) + goto out; + set_page_dirty(page); + set_cold_data(page); + } else { + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + + f2fs_wait_on_page_writeback(page, DATA, true); + + if (clear_page_dirty_for_io(page) && + S_ISDIR(inode->i_mode)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + set_cold_data(page); + do_write_data_page(page); + clear_cold_data(page); + } +out: + f2fs_put_page(page, 1); +} + +/* + * This function tries to get parent node of victim data block, and identifies + * data block validity. If the block is valid, copy that with cold status and + * modify parent node. + * If the parent node is not valid or the data block address is different, + * the victim data block is ignored. + */ +static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + struct list_head *ilist, unsigned int segno, int gc_type) +{ + struct super_block *sb = sbi->sb; + struct f2fs_summary *entry; + block_t start_addr; + int off; + int phase = 0; + + start_addr = START_BLOCK(sbi, segno); + +next_step: + entry = sum; + + for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { + struct page *data_page; + struct inode *inode; + struct node_info dni; /* dnode info for the data */ + unsigned int ofs_in_node, nofs; + block_t start_bidx; + + /* stop BG_GC if there is not enough free sections. */ + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) + return; + + if (check_valid_map(sbi, segno, off) == 0) + continue; + + if (phase == 0) { + ra_node_page(sbi, le32_to_cpu(entry->nid)); + continue; + } + + /* Get an inode by ino with checking validity */ + if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0) + continue; + + if (phase == 1) { + ra_node_page(sbi, dni.ino); + continue; + } + + ofs_in_node = le16_to_cpu(entry->ofs_in_node); + + if (phase == 2) { + inode = f2fs_iget(sb, dni.ino); + if (IS_ERR(inode)) + continue; + + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); + + data_page = find_data_page(inode, + start_bidx + ofs_in_node, false); + if (IS_ERR(data_page)) + goto next_iput; + + f2fs_put_page(data_page, 0); + add_gc_inode(inode, ilist); + } else { + inode = find_gc_inode(dni.ino, ilist); + if (inode) { + start_bidx = start_bidx_of_node(nofs, + F2FS_I(inode)); + data_page = get_lock_data_page(inode, + start_bidx + ofs_in_node); + if (IS_ERR(data_page)) + continue; + move_data_page(inode, data_page, gc_type); + stat_inc_data_blk_count(sbi, 1); + } + } + continue; +next_iput: + iput(inode); + } + + if (++phase < 4) + goto next_step; + + if (gc_type == FG_GC) { + f2fs_submit_bio(sbi, DATA, true); + + /* + * In the case of FG_GC, it'd be better to reclaim this victim + * completely. + */ + if (get_valid_blocks(sbi, segno, 1) != 0) { + phase = 2; + goto next_step; + } + } +} + +static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, + int gc_type, int type) +{ + struct sit_info *sit_i = SIT_I(sbi); + int ret; + mutex_lock(&sit_i->sentry_lock); + ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS); + mutex_unlock(&sit_i->sentry_lock); + return ret; +} + +static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, + struct list_head *ilist, int gc_type) +{ + struct page *sum_page; + struct f2fs_summary_block *sum; + struct blk_plug plug; + + /* read segment summary of victim */ + sum_page = get_sum_page(sbi, segno); + if (IS_ERR(sum_page)) + return; + + blk_start_plug(&plug); + + sum = page_address(sum_page); + + switch (GET_SUM_TYPE((&sum->footer))) { + case SUM_TYPE_NODE: + gc_node_segment(sbi, sum->entries, segno, gc_type); + break; + case SUM_TYPE_DATA: + gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); + break; + } + blk_finish_plug(&plug); + + stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); + stat_inc_call_count(sbi->stat_info); + + f2fs_put_page(sum_page, 1); +} + +int f2fs_gc(struct f2fs_sb_info *sbi) +{ + struct list_head ilist; + unsigned int segno, i; + int gc_type = BG_GC; + int nfree = 0; + int ret = -1; + + INIT_LIST_HEAD(&ilist); +gc_more: + if (!(sbi->sb->s_flags & MS_ACTIVE)) + goto stop; + + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { + gc_type = FG_GC; + write_checkpoint(sbi, false); + } + + if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) + goto stop; + ret = 0; + + for (i = 0; i < sbi->segs_per_sec; i++) + do_garbage_collect(sbi, segno + i, &ilist, gc_type); + + if (gc_type == FG_GC) { + sbi->cur_victim_sec = NULL_SEGNO; + nfree++; + WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec)); + } + + if (has_not_enough_free_secs(sbi, nfree)) + goto gc_more; + + if (gc_type == FG_GC) + write_checkpoint(sbi, false); +stop: + mutex_unlock(&sbi->gc_mutex); + + put_gc_inode(&ilist); + return ret; +} + +void build_gc_manager(struct f2fs_sb_info *sbi) +{ + DIRTY_I(sbi)->v_ops = &default_v_ops; +} + +int __init create_gc_caches(void) +{ + winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", + sizeof(struct inode_entry), NULL); + if (!winode_slab) + return -ENOMEM; + return 0; +} + +void destroy_gc_caches(void) +{ + kmem_cache_destroy(winode_slab); +} diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h new file mode 100644 index 00000000000..28c7d8e320c --- /dev/null +++ b/fs/f2fs/gc.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/gc.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define GC_THREAD_MIN_WB_PAGES 1 /* + * a threshold to determine + * whether IO subsystem is idle + * or not + */ +#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ +#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 +#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ +#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ +#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ + +/* Search max. number of dirty segments to select a victim segment */ +#define MAX_VICTIM_SEARCH 20 + +struct f2fs_gc_kthread { + struct task_struct *f2fs_gc_task; + wait_queue_head_t gc_wait_queue_head; + + /* for gc sleep time */ + unsigned int min_sleep_time; + unsigned int max_sleep_time; + unsigned int no_gc_sleep_time; + + /* for changing gc mode */ + unsigned int gc_idle; +}; + +struct inode_entry { + struct list_head list; + struct inode *inode; +}; + +/* + * inline functions + */ +static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) +{ + if (free_segments(sbi) < overprovision_segments(sbi)) + return 0; + else + return (free_segments(sbi) - overprovision_segments(sbi)) + << sbi->log_blocks_per_seg; +} + +static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi) +{ + return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100; +} + +static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) +{ + block_t reclaimable_user_blocks = sbi->user_block_count - + written_block_count(sbi); + return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; +} + +static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) +{ + if (wait == gc_th->no_gc_sleep_time) + return wait; + + wait += gc_th->min_sleep_time; + if (wait > gc_th->max_sleep_time) + wait = gc_th->max_sleep_time; + return wait; +} + +static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) +{ + if (wait == gc_th->no_gc_sleep_time) + wait = gc_th->max_sleep_time; + + wait -= gc_th->min_sleep_time; + if (wait <= gc_th->min_sleep_time) + wait = gc_th->min_sleep_time; + return wait; +} + +static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) +{ + block_t invalid_user_blocks = sbi->user_block_count - + written_block_count(sbi); + /* + * Background GC is triggered with the following condition. + * 1. There are a number of invalid blocks. + * 2. There is not enough free space. + */ + if (invalid_user_blocks > limit_invalid_user_blocks(sbi) && + free_user_blocks(sbi) < limit_free_user_blocks(sbi)) + return true; + return false; +} + +static inline int is_idle(struct f2fs_sb_info *sbi) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct request_queue *q = bdev_get_queue(bdev); + struct request_list *rl = &q->rq; + return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]); +} diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c new file mode 100644 index 00000000000..9e352f15484 --- /dev/null +++ b/fs/f2fs/hash.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/hash.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext3/hash.c + * + * Copyright (C) 2002 by Theodore Ts'o + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +#include "f2fs.h" + +/* + * Hashing code copied from ext3 + */ +#define DELTA 0x9E3779B9 + +static void TEA_transform(unsigned int buf[4], unsigned int const in[]) +{ + __u32 sum = 0; + __u32 b0 = buf[0], b1 = buf[1]; + __u32 a = in[0], b = in[1], c = in[2], d = in[3]; + int n = 16; + + do { + sum += DELTA; + b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); + b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); + } while (--n); + + buf[0] += b0; + buf[1] += b1; +} + +static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) +{ + unsigned pad, val; + int i; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > num * 4) + len = num * 4; + for (i = 0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + val = msg[i] + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len) +{ + __u32 hash; + f2fs_hash_t f2fs_hash; + const char *p; + __u32 in[8], buf[4]; + + if ((len <= 2) && (name[0] == '.') && + (name[1] == '.' || name[1] == '\0')) + return 0; + + /* Initialize the default seed for the hash checksum functions */ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + p = name; + while (1) { + str2hashbuf(p, len, in, 4); + TEA_transform(buf, in); + p += 16; + if (len <= 16) + break; + len -= 16; + } + hash = buf[0]; + f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); + return f2fs_hash; +} diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c new file mode 100644 index 00000000000..6b7fbd39180 --- /dev/null +++ b/fs/f2fs/inode.c @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/inode.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" + +#include + +void f2fs_set_inode_flags(struct inode *inode) +{ + unsigned int flags = F2FS_I(inode)->i_flags; + + inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | + S_NOATIME | S_DIRSYNC); + + if (flags & FS_SYNC_FL) + inode->i_flags |= S_SYNC; + if (flags & FS_APPEND_FL) + inode->i_flags |= S_APPEND; + if (flags & FS_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & FS_NOATIME_FL) + inode->i_flags |= S_NOATIME; + if (flags & FS_DIRSYNC_FL) + inode->i_flags |= S_DIRSYNC; +} + +static int do_read_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct page *node_page; + struct f2fs_node *rn; + struct f2fs_inode *ri; + + /* Check if ino is within scope */ + if (check_nid_range(sbi, inode->i_ino)) { + f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu", + (unsigned long) inode->i_ino); + return -EINVAL; + } + + node_page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); + + rn = F2FS_NODE(node_page); + ri = &(rn->i); + + inode->i_mode = le16_to_cpu(ri->i_mode); + inode->i_uid = le32_to_cpu(ri->i_uid); + inode->i_gid = le32_to_cpu(ri->i_gid); + set_nlink(inode, le32_to_cpu(ri->i_links)); + inode->i_size = le64_to_cpu(ri->i_size); + inode->i_blocks = le64_to_cpu(ri->i_blocks); + + inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); + inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime); + inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime); + inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec); + inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); + inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); + inode->i_generation = le32_to_cpu(ri->i_generation); + if (ri->i_addr[0]) + inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); + else + inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); + + fi->i_current_depth = le32_to_cpu(ri->i_current_depth); + fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); + fi->i_flags = le32_to_cpu(ri->i_flags); + fi->flags = 0; + fi->i_advise = ri->i_advise; + fi->i_pino = le32_to_cpu(ri->i_pino); + get_extent_info(&fi->ext, ri->i_ext); + get_inline_info(fi, ri); + f2fs_put_page(node_page, 1); + return 0; +} + +struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + int ret = 0; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + + if (!(inode->i_state & I_NEW)) { + trace_f2fs_iget(inode); + return inode; + } + if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi)) + goto make_now; + + ret = do_read_inode(inode); + if (ret) + goto bad_inode; +make_now: + if (ino == F2FS_NODE_INO(sbi)) { + inode->i_mapping->a_ops = &f2fs_node_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + } else if (ino == F2FS_META_INO(sbi)) { + inode->i_mapping->a_ops = &f2fs_meta_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + } else if (S_ISREG(inode->i_mode)) { + inode->i_op = &f2fs_file_inode_operations; + inode->i_fop = &f2fs_file_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &f2fs_dir_inode_operations; + inode->i_fop = &f2fs_dir_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = &f2fs_symlink_inode_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || + S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { + inode->i_op = &f2fs_special_inode_operations; + init_special_inode(inode, inode->i_mode, inode->i_rdev); + } else { + ret = -EIO; + goto bad_inode; + } + unlock_new_inode(inode); + trace_f2fs_iget(inode); + return inode; + +bad_inode: + iget_failed(inode); + trace_f2fs_iget_exit(inode, ret); + return ERR_PTR(ret); +} + +void update_inode(struct inode *inode, struct page *node_page) +{ + struct f2fs_node *rn; + struct f2fs_inode *ri; + + f2fs_wait_on_page_writeback(node_page, NODE, false); + + rn = F2FS_NODE(node_page); + ri = &(rn->i); + + ri->i_mode = cpu_to_le16(inode->i_mode); + ri->i_advise = F2FS_I(inode)->i_advise; + ri->i_uid = cpu_to_le32(inode->i_uid); + ri->i_gid = cpu_to_le32(inode->i_gid); + ri->i_links = cpu_to_le32(inode->i_nlink); + ri->i_size = cpu_to_le64(i_size_read(inode)); + ri->i_blocks = cpu_to_le64(inode->i_blocks); + set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); + set_raw_inline(F2FS_I(inode), ri); + + ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); + ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); + ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); + ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth); + ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); + ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); + ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); + ri->i_generation = cpu_to_le32(inode->i_generation); + + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + if (old_valid_dev(inode->i_rdev)) { + ri->i_addr[0] = + cpu_to_le32(old_encode_dev(inode->i_rdev)); + ri->i_addr[1] = 0; + } else { + ri->i_addr[0] = 0; + ri->i_addr[1] = + cpu_to_le32(new_encode_dev(inode->i_rdev)); + ri->i_addr[2] = 0; + } + } + + set_cold_node(inode, node_page); + set_page_dirty(node_page); + clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); +} + +int update_inode_page(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *node_page; + + node_page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); + + update_inode(inode, node_page); + f2fs_put_page(node_page, 1); + return 0; +} + +int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ret, ilock; + + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) + return 0; + + if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) + return 0; + + /* + * We need to lock here to prevent from producing dirty node pages + * during the urgent cleaning time when runing out of free sections. + */ + ilock = mutex_lock_op(sbi); + ret = update_inode_page(inode); + mutex_unlock_op(sbi, ilock); + + if (wbc) + f2fs_balance_fs(sbi); + + return ret; +} + +/* + * Called at the last iput() if i_nlink is zero + */ +void f2fs_evict_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; + + trace_f2fs_evict_inode(inode); + truncate_inode_pages(&inode->i_data, 0); + + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) + goto no_delete; + + BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents)); + remove_dirty_dir_inode(inode); + + if (inode->i_nlink || is_bad_inode(inode)) + goto no_delete; + + set_inode_flag(F2FS_I(inode), FI_NO_ALLOC); + i_size_write(inode, 0); + + if (F2FS_HAS_BLOCKS(inode)) + f2fs_truncate(inode); + + ilock = mutex_lock_op(sbi); + remove_inode_page(inode); + mutex_unlock_op(sbi, ilock); + +no_delete: + end_writeback(inode); +} diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c new file mode 100644 index 00000000000..7e08105f3c4 --- /dev/null +++ b/fs/f2fs/namei.c @@ -0,0 +1,559 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/namei.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "xattr.h" +#include "acl.h" +#include + +static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + nid_t ino; + struct inode *inode; + bool nid_free = false; + int err, ilock; + + inode = new_inode(sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + ilock = mutex_lock_op(sbi); + if (!alloc_nid(sbi, &ino)) { + mutex_unlock_op(sbi, ilock); + err = -ENOSPC; + goto fail; + } + mutex_unlock_op(sbi, ilock); + + if (IS_ANDROID_EMU(sbi, F2FS_I(dir), F2FS_I(dir))) + f2fs_android_emu(sbi, inode, &inode->i_uid, + &inode->i_gid, &mode); + else { + inode->i_uid = current_fsuid(); + + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else { + inode->i_gid = current_fsgid(); + } + } + + inode->i_ino = ino; + inode->i_mode = mode; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_generation = sbi->s_next_generation++; + + err = insert_inode_locked(inode); + if (err) { + err = -EINVAL; + nid_free = true; + goto out; + } + trace_f2fs_new_inode(inode, 0); + mark_inode_dirty(inode); + return inode; + +out: + clear_nlink(inode); + unlock_new_inode(inode); +fail: + trace_f2fs_new_inode(inode, err); + make_bad_inode(inode); + iput(inode); + if (nid_free) + alloc_nid_failed(sbi, ino); + return ERR_PTR(err); +} + +static int is_multimedia_file(const unsigned char *s, const char *sub) +{ + size_t slen = strlen(s); + size_t sublen = strlen(sub); + + if (sublen > slen) + return 0; + + return !strncasecmp(s + slen - sublen, sub, sublen); +} + +/* + * Set multimedia files as cold files for hot/cold data separation + */ +static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, + const unsigned char *name) +{ + int i; + __u8 (*extlist)[8] = sbi->raw_super->extension_list; + + int count = le32_to_cpu(sbi->raw_super->extension_count); + for (i = 0; i < count; i++) { + if (is_multimedia_file(name, extlist[i])) { + file_set_cold(inode); + break; + } + } +} + +static int f2fs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + nid_t ino = 0; + int err, ilock; + + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) + set_cold_files(sbi, inode, dentry->d_name.name); + + inode->i_op = &f2fs_file_inode_operations; + inode->i_fop = &f2fs_file_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + ino = inode->i_ino; + + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out; + + alloc_nid_done(sbi, ino); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + return 0; +out: + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, ino); + return err; +} + +static int f2fs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + struct super_block *sb; + struct f2fs_sb_info *sbi; + int err, ilock; + + if (inode->i_nlink >= F2FS_LINK_MAX) + return -EMLINK; + + sb = dir->i_sb; + sbi = F2FS_SB(sb); + + f2fs_balance_fs(sbi); + + inode->i_ctime = CURRENT_TIME; + ihold(inode); + + set_inode_flag(F2FS_I(inode), FI_INC_LINK); + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out; + + d_instantiate(dentry, inode); + return 0; +out: + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + iput(inode); + return err; +} + +struct dentry *f2fs_get_parent(struct dentry *child) +{ + struct qstr dotdot = {.name = "..", .len = 2}; + unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot); + if (!ino) + return ERR_PTR(-ENOENT); + return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); +} + +static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *inode = NULL; + struct f2fs_dir_entry *de; + struct page *page; + + if (dentry->d_name.len > F2FS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + de = f2fs_find_entry(dir, &dentry->d_name, &page); + if (de) { + nid_t ino = le32_to_cpu(de->ino); + kunmap(page); + f2fs_put_page(page, 0); + + inode = f2fs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + } + + return d_splice_alias(inode, dentry); +} + +static int f2fs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode = dentry->d_inode; + struct f2fs_dir_entry *de; + struct page *page; + int err = -ENOENT; + int ilock; + + trace_f2fs_unlink_enter(dir, dentry); + f2fs_balance_fs(sbi); + + de = f2fs_find_entry(dir, &dentry->d_name, &page); + if (!de) + goto fail; + + err = acquire_orphan_inode(sbi); + if (err) { + kunmap(page); + f2fs_put_page(page, 0); + goto fail; + } + + ilock = mutex_lock_op(sbi); + f2fs_delete_entry(de, page, inode); + mutex_unlock_op(sbi, ilock); + + /* In order to evict this inode, we set it dirty */ + mark_inode_dirty(inode); +fail: + trace_f2fs_unlink_exit(inode, err); + return err; +} + +static int f2fs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + size_t symlen = strlen(symname) + 1; + int err, ilock; + + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &f2fs_symlink_inode_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out; + + err = page_symlink(inode, symname, symlen); + alloc_nid_done(sbi, inode->i_ino); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + return err; +out: + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + +static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct f2fs_sb_info *sbi; + struct inode *inode; + int err, ilock; + + if (dir->i_nlink >= F2FS_LINK_MAX) + return -EMLINK; + + sbi = F2FS_SB(dir->i_sb); + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, S_IFDIR | mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &f2fs_dir_inode_operations; + inode->i_fop = &f2fs_dir_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + + set_inode_flag(F2FS_I(inode), FI_INC_LINK); + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out_fail; + + alloc_nid_done(sbi, inode->i_ino); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + + return 0; + +out_fail: + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + +static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + if (f2fs_empty_dir(inode)) + return f2fs_unlink(dir, dentry); + return -ENOTEMPTY; +} + +static int f2fs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t rdev) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + int err = 0; + int ilock; + + if (!new_valid_dev(rdev)) + return -EINVAL; + + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + init_special_inode(inode, inode->i_mode, rdev); + inode->i_op = &f2fs_special_inode_operations; + + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out; + + alloc_nid_done(sbi, inode->i_ino); + d_instantiate(dentry, inode); + unlock_new_inode(inode); + return 0; +out: + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + +static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct super_block *sb = old_dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + struct page *old_dir_page; + struct page *old_page, *new_page; + struct f2fs_dir_entry *old_dir_entry = NULL; + struct f2fs_dir_entry *old_entry; + struct f2fs_dir_entry *new_entry; + int err = -ENOENT, ilock = -1; + + f2fs_balance_fs(sbi); + + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); + if (!old_entry) + goto out; + + if (S_ISDIR(old_inode->i_mode)) { + err = -EIO; + old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); + if (!old_dir_entry) + goto out_old; + } + + ilock = mutex_lock_op(sbi); + + if (new_inode) { + + err = -ENOTEMPTY; + if (old_dir_entry && !f2fs_empty_dir(new_inode)) + goto out_dir; + + err = -ENOENT; + new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, + &new_page); + if (!new_entry) + goto out_dir; + + err = acquire_orphan_inode(sbi); + if (err) + goto put_out_dir; + + if (update_dent_inode(old_inode, &new_dentry->d_name)) { + release_orphan_inode(sbi); + goto put_out_dir; + } + + f2fs_set_link(new_dir, new_entry, new_page, old_inode); + + new_inode->i_ctime = CURRENT_TIME; + if (old_dir_entry) + drop_nlink(new_inode); + drop_nlink(new_inode); + + if (!new_inode->i_nlink) + add_orphan_inode(sbi, new_inode->i_ino); + else + release_orphan_inode(sbi); + + update_inode_page(old_inode); + update_inode_page(new_inode); + } else { + if (old_dir_entry) { + err = -EMLINK; + if (new_dir->i_nlink >= F2FS_LINK_MAX) + goto out_dir; + } + + err = f2fs_add_link(new_dentry, old_inode); + if (err) + goto out_dir; + + if (old_dir_entry) { + inc_nlink(new_dir); + update_inode_page(new_dir); + } + } + + old_inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(old_inode); + + f2fs_delete_entry(old_entry, old_page, NULL); + + if (old_dir_entry) { + if (old_dir != new_dir) { + f2fs_set_link(old_inode, old_dir_entry, + old_dir_page, new_dir); + } else { + kunmap(old_dir_page); + f2fs_put_page(old_dir_page, 0); + } + drop_nlink(old_dir); + update_inode_page(old_dir); + } + + mutex_unlock_op(sbi, ilock); + return 0; + +put_out_dir: + if (PageLocked(new_page)) + f2fs_put_page(new_page, 1); + else + f2fs_put_page(new_page, 0); +out_dir: + if (old_dir_entry) { + kunmap(old_dir_page); + f2fs_put_page(old_dir_page, 0); + } + mutex_unlock_op(sbi, ilock); +out_old: + kunmap(old_page); + f2fs_put_page(old_page, 0); +out: + return err; +} + +const struct inode_operations f2fs_dir_inode_operations = { + .create = f2fs_create, + .lookup = f2fs_lookup, + .link = f2fs_link, + .unlink = f2fs_unlink, + .symlink = f2fs_symlink, + .mkdir = f2fs_mkdir, + .rmdir = f2fs_rmdir, + .mknod = f2fs_mknod, + .rename = f2fs_rename, + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .get_acl = f2fs_get_acl, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +const struct inode_operations f2fs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +const struct inode_operations f2fs_special_inode_operations = { + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .get_acl = f2fs_get_acl, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c new file mode 100644 index 00000000000..5ad3c3b6685 --- /dev/null +++ b/fs/f2fs/node.c @@ -0,0 +1,1861 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/node.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include + +static struct kmem_cache *nat_entry_slab; +static struct kmem_cache *free_nid_slab; + +static void clear_node_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + unsigned int long flags; + + if (PageDirty(page)) { + spin_lock_irqsave(&mapping->tree_lock, flags); + radix_tree_tag_clear(&mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + + clear_page_dirty_for_io(page); + dec_page_count(sbi, F2FS_DIRTY_NODES); + } + ClearPageUptodate(page); +} + +static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +{ + pgoff_t index = current_nat_addr(sbi, nid); + return get_meta_page(sbi, index); +} + +static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct page *src_page; + struct page *dst_page; + pgoff_t src_off; + pgoff_t dst_off; + void *src_addr; + void *dst_addr; + struct f2fs_nm_info *nm_i = NM_I(sbi); + + src_off = current_nat_addr(sbi, nid); + dst_off = next_nat_addr(sbi, src_off); + + /* get current nat block page with lock */ + src_page = get_meta_page(sbi, src_off); + + /* Dirty src_page means that it is already the new target NAT page. */ + if (PageDirty(src_page)) + return src_page; + + dst_page = grab_meta_page(sbi, dst_off); + + src_addr = page_address(src_page); + dst_addr = page_address(dst_page); + memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); + set_page_dirty(dst_page); + f2fs_put_page(src_page, 1); + + set_to_next_nat(nm_i, nid); + + return dst_page; +} + +/* + * Readahead NAT pages + */ +static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct blk_plug plug; + struct page *page; + pgoff_t index; + int i; + + blk_start_plug(&plug); + + for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { + if (nid >= nm_i->max_nid) + nid = 0; + index = current_nat_addr(sbi, nid); + + page = grab_cache_page(mapping, index); + if (!page) + continue; + if (PageUptodate(page)) { + f2fs_put_page(page, 1); + continue; + } + if (f2fs_readpage(sbi, page, index, READ)) + continue; + + f2fs_put_page(page, 0); + } + blk_finish_plug(&plug); +} + +static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) +{ + return radix_tree_lookup(&nm_i->nat_root, n); +} + +static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, + nid_t start, unsigned int nr, struct nat_entry **ep) +{ + return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr); +} + +static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) +{ + list_del(&e->list); + radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); + nm_i->nat_cnt--; + kmem_cache_free(nat_entry_slab, e); +} + +int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *e; + int is_cp = 1; + + read_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e && !e->checkpointed) + is_cp = 0; + read_unlock(&nm_i->nat_tree_lock); + return is_cp; +} + +static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) +{ + struct nat_entry *new; + + new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); + if (!new) + return NULL; + if (radix_tree_insert(&nm_i->nat_root, nid, new)) { + kmem_cache_free(nat_entry_slab, new); + return NULL; + } + memset(new, 0, sizeof(struct nat_entry)); + nat_set_nid(new, nid); + list_add_tail(&new->list, &nm_i->nat_entries); + nm_i->nat_cnt++; + return new; +} + +static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, + struct f2fs_nat_entry *ne) +{ + struct nat_entry *e; +retry: + write_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (!e) { + e = grab_nat_entry(nm_i, nid); + if (!e) { + write_unlock(&nm_i->nat_tree_lock); + goto retry; + } + nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); + nat_set_ino(e, le32_to_cpu(ne->ino)); + nat_set_version(e, ne->version); + e->checkpointed = true; + } + write_unlock(&nm_i->nat_tree_lock); +} + +static int set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, + block_t new_blkaddr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *e; +retry: + write_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, ni->nid); + if (!e) { + e = grab_nat_entry(nm_i, ni->nid); + if (!e) { + write_unlock(&nm_i->nat_tree_lock); + goto retry; + } + e->ni = *ni; + e->checkpointed = true; + BUG_ON(ni->blk_addr == NEW_ADDR); + } else if (new_blkaddr == NEW_ADDR) { + /* + * when nid is reallocated, + * previous nat entry can be remained in nat cache. + * So, reinitialize it with new information. + */ + e->ni = *ni; + if (ni->blk_addr != NULL_ADDR) { + f2fs_msg(sbi->sb, KERN_ERR, "node block address is " + "already set: %u", ni->blk_addr); + f2fs_handle_error(sbi); + /* just give up on this node */ + write_unlock(&nm_i->nat_tree_lock); + return -EIO; + } + } + + if (new_blkaddr == NEW_ADDR) + e->checkpointed = false; + + /* sanity check */ + BUG_ON(nat_get_blkaddr(e) != ni->blk_addr); + BUG_ON(nat_get_blkaddr(e) == NULL_ADDR && + new_blkaddr == NULL_ADDR); + BUG_ON(nat_get_blkaddr(e) == NEW_ADDR && + new_blkaddr == NEW_ADDR); + BUG_ON(nat_get_blkaddr(e) != NEW_ADDR && + nat_get_blkaddr(e) != NULL_ADDR && + new_blkaddr == NEW_ADDR); + + /* increament version no as node is removed */ + if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { + unsigned char version = nat_get_version(e); + nat_set_version(e, inc_node_version(version)); + } + + /* change address */ + nat_set_blkaddr(e, new_blkaddr); + __set_nat_cache_dirty(nm_i, e); + write_unlock(&nm_i->nat_tree_lock); + return 0; +} + +static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD) + return 0; + + write_lock(&nm_i->nat_tree_lock); + while (nr_shrink && !list_empty(&nm_i->nat_entries)) { + struct nat_entry *ne; + ne = list_first_entry(&nm_i->nat_entries, + struct nat_entry, list); + __del_from_nat_cache(nm_i, ne); + nr_shrink--; + } + write_unlock(&nm_i->nat_tree_lock); + return nr_shrink; +} + +/* + * This function returns always success + */ +void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + nid_t start_nid = START_NID(nid); + struct f2fs_nat_block *nat_blk; + struct page *page = NULL; + struct f2fs_nat_entry ne; + struct nat_entry *e; + int i; + + memset(&ne, 0, sizeof(struct f2fs_nat_entry)); + ni->nid = nid; + + /* Check nat cache */ + read_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e) { + ni->ino = nat_get_ino(e); + ni->blk_addr = nat_get_blkaddr(e); + ni->version = nat_get_version(e); + } + read_unlock(&nm_i->nat_tree_lock); + if (e) + return; + + /* Check current segment summary */ + mutex_lock(&curseg->curseg_mutex); + i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); + if (i >= 0) { + ne = nat_in_journal(sum, i); + node_info_from_raw_nat(ni, &ne); + } + mutex_unlock(&curseg->curseg_mutex); + if (i >= 0) + goto cache; + + /* Fill node_info from nat page */ + page = get_current_nat_page(sbi, start_nid); + nat_blk = (struct f2fs_nat_block *)page_address(page); + ne = nat_blk->entries[nid - start_nid]; + node_info_from_raw_nat(ni, &ne); + f2fs_put_page(page, 1); +cache: + /* cache nat entry */ + cache_nat_entry(NM_I(sbi), nid, &ne); +} + +/* + * The maximum depth is four. + * Offset[0] will have raw inode offset. + */ +static int get_node_path(struct f2fs_inode_info *fi, long block, + int offset[4], unsigned int noffset[4]) +{ + const long direct_index = ADDRS_PER_INODE(fi); + const long direct_blks = ADDRS_PER_BLOCK; + const long dptrs_per_blk = NIDS_PER_BLOCK; + const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; + const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; + int n = 0; + int level = 0; + + noffset[0] = 0; + + if (block < direct_index) { + offset[n] = block; + goto got; + } + block -= direct_index; + if (block < direct_blks) { + offset[n++] = NODE_DIR1_BLOCK; + noffset[n] = 1; + offset[n] = block; + level = 1; + goto got; + } + block -= direct_blks; + if (block < direct_blks) { + offset[n++] = NODE_DIR2_BLOCK; + noffset[n] = 2; + offset[n] = block; + level = 1; + goto got; + } + block -= direct_blks; + if (block < indirect_blks) { + offset[n++] = NODE_IND1_BLOCK; + noffset[n] = 3; + offset[n++] = block / direct_blks; + noffset[n] = 4 + offset[n - 1]; + offset[n] = block % direct_blks; + level = 2; + goto got; + } + block -= indirect_blks; + if (block < indirect_blks) { + offset[n++] = NODE_IND2_BLOCK; + noffset[n] = 4 + dptrs_per_blk; + offset[n++] = block / direct_blks; + noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; + offset[n] = block % direct_blks; + level = 2; + goto got; + } + block -= indirect_blks; + if (block < dindirect_blks) { + offset[n++] = NODE_DIND_BLOCK; + noffset[n] = 5 + (dptrs_per_blk * 2); + offset[n++] = block / indirect_blks; + noffset[n] = 6 + (dptrs_per_blk * 2) + + offset[n - 1] * (dptrs_per_blk + 1); + offset[n++] = (block / direct_blks) % dptrs_per_blk; + noffset[n] = 7 + (dptrs_per_blk * 2) + + offset[n - 2] * (dptrs_per_blk + 1) + + offset[n - 1]; + offset[n] = block % direct_blks; + level = 3; + goto got; + } else { + BUG(); + } +got: + return level; +} + +/* + * Caller should call f2fs_put_dnode(dn). + * Also, it should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op() only if ro is not set RDONLY_NODE. + * In the case of RDONLY_NODE, we don't need to care about mutex. + */ +int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct page *npage[4]; + struct page *parent; + int offset[4]; + unsigned int noffset[4]; + nid_t nids[4]; + int level, i; + int err = 0; + + level = get_node_path(F2FS_I(dn->inode), index, offset, noffset); + + nids[0] = dn->inode->i_ino; + npage[0] = dn->inode_page; + + if (!npage[0]) { + npage[0] = get_node_page(sbi, nids[0]); + if (IS_ERR(npage[0])) + return PTR_ERR(npage[0]); + } + parent = npage[0]; + if (level != 0) + nids[1] = get_nid(parent, offset[0], true); + dn->inode_page = npage[0]; + dn->inode_page_locked = true; + + /* get indirect or direct nodes */ + for (i = 1; i <= level; i++) { + bool done = false; + + if (!nids[i] && mode == ALLOC_NODE) { + /* alloc new node */ + if (!alloc_nid(sbi, &(nids[i]))) { + err = -ENOSPC; + goto release_pages; + } + + dn->nid = nids[i]; + npage[i] = new_node_page(dn, noffset[i], NULL); + if (IS_ERR(npage[i])) { + alloc_nid_failed(sbi, nids[i]); + err = PTR_ERR(npage[i]); + goto release_pages; + } + + set_nid(parent, offset[i - 1], nids[i], i == 1); + alloc_nid_done(sbi, nids[i]); + done = true; + } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { + npage[i] = get_node_page_ra(parent, offset[i - 1]); + if (IS_ERR(npage[i])) { + err = PTR_ERR(npage[i]); + goto release_pages; + } + done = true; + } + if (i == 1) { + dn->inode_page_locked = false; + unlock_page(parent); + } else { + f2fs_put_page(parent, 1); + } + + if (!done) { + npage[i] = get_node_page(sbi, nids[i]); + if (IS_ERR(npage[i])) { + err = PTR_ERR(npage[i]); + f2fs_put_page(npage[0], 0); + goto release_out; + } + } + if (i < level) { + parent = npage[i]; + nids[i + 1] = get_nid(parent, offset[i], false); + } + } + dn->nid = nids[level]; + dn->ofs_in_node = offset[level]; + dn->node_page = npage[level]; + dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); + return 0; + +release_pages: + f2fs_put_page(parent, 1); + if (i > 1) + f2fs_put_page(npage[0], 0); +release_out: + dn->inode_page = NULL; + dn->node_page = NULL; + return err; +} + +static void truncate_node(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct node_info ni; + + get_node_info(sbi, dn->nid, &ni); + if (dn->inode->i_blocks == 0) { + if (ni.blk_addr != NULL_ADDR) { + f2fs_msg(sbi->sb, KERN_ERR, + "empty node still has block address %u ", + ni.blk_addr); + f2fs_handle_error(sbi); + } + goto invalidate; + } + BUG_ON(ni.blk_addr == NULL_ADDR); + + /* Deallocate node address */ + invalidate_blocks(sbi, ni.blk_addr); + dec_valid_node_count(sbi, dn->inode, 1); + set_node_addr(sbi, &ni, NULL_ADDR); + + if (dn->nid == dn->inode->i_ino) { + remove_orphan_inode(sbi, dn->nid); + dec_valid_inode_count(sbi); + } else { + sync_inode_page(dn); + } +invalidate: + clear_node_page_dirty(dn->node_page); + F2FS_SET_SB_DIRT(sbi); + + f2fs_put_page(dn->node_page, 1); + dn->node_page = NULL; + trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); +} + +static int truncate_dnode(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct page *page; + + if (dn->nid == 0) + return 1; + + /* get direct node */ + page = get_node_page(sbi, dn->nid); + if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) + return 1; + else if (IS_ERR(page)) + return PTR_ERR(page); + + /* Make dnode_of_data for parameter */ + dn->node_page = page; + dn->ofs_in_node = 0; + truncate_data_blocks(dn); + truncate_node(dn); + return 1; +} + +static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, + int ofs, int depth) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct dnode_of_data rdn = *dn; + struct page *page; + struct f2fs_node *rn; + nid_t child_nid; + unsigned int child_nofs; + int freed = 0; + int i, ret; + + if (dn->nid == 0) + return NIDS_PER_BLOCK + 1; + + trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); + + page = get_node_page(sbi, dn->nid); + if (IS_ERR(page)) { + trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); + return PTR_ERR(page); + } + + rn = F2FS_NODE(page); + if (depth < 3) { + for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { + child_nid = le32_to_cpu(rn->in.nid[i]); + if (child_nid == 0) + continue; + rdn.nid = child_nid; + ret = truncate_dnode(&rdn); + if (ret < 0) + goto out_err; + set_nid(page, i, 0, false); + } + } else { + child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; + for (i = ofs; i < NIDS_PER_BLOCK; i++) { + child_nid = le32_to_cpu(rn->in.nid[i]); + if (child_nid == 0) { + child_nofs += NIDS_PER_BLOCK + 1; + continue; + } + rdn.nid = child_nid; + ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); + if (ret == (NIDS_PER_BLOCK + 1)) { + set_nid(page, i, 0, false); + child_nofs += ret; + } else if (ret < 0 && ret != -ENOENT) { + goto out_err; + } + } + freed = child_nofs; + } + + if (!ofs) { + /* remove current indirect node */ + dn->node_page = page; + truncate_node(dn); + freed++; + } else { + f2fs_put_page(page, 1); + } + trace_f2fs_truncate_nodes_exit(dn->inode, freed); + return freed; + +out_err: + f2fs_put_page(page, 1); + trace_f2fs_truncate_nodes_exit(dn->inode, ret); + return ret; +} + +static int truncate_partial_nodes(struct dnode_of_data *dn, + struct f2fs_inode *ri, int *offset, int depth) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct page *pages[2]; + nid_t nid[3]; + nid_t child_nid; + int err = 0; + int i; + int idx = depth - 2; + + nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); + if (!nid[0]) + return 0; + + /* get indirect nodes in the path */ + for (i = 0; i < depth - 1; i++) { + /* refernece count'll be increased */ + pages[i] = get_node_page(sbi, nid[i]); + if (IS_ERR(pages[i])) { + depth = i + 1; + err = PTR_ERR(pages[i]); + goto fail; + } + nid[i + 1] = get_nid(pages[i], offset[i + 1], false); + } + + /* free direct nodes linked to a partial indirect node */ + for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) { + child_nid = get_nid(pages[idx], i, false); + if (!child_nid) + continue; + dn->nid = child_nid; + err = truncate_dnode(dn); + if (err < 0) + goto fail; + set_nid(pages[idx], i, 0, false); + } + + if (offset[depth - 1] == 0) { + dn->node_page = pages[idx]; + dn->nid = nid[idx]; + truncate_node(dn); + } else { + f2fs_put_page(pages[idx], 1); + } + offset[idx]++; + offset[depth - 1] = 0; +fail: + for (i = depth - 3; i >= 0; i--) + f2fs_put_page(pages[i], 1); + + trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); + + return err; +} + +/* + * All the block addresses of data and nodes should be nullified. + */ +int truncate_inode_blocks(struct inode *inode, pgoff_t from) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *node_mapping = sbi->node_inode->i_mapping; + int err = 0, cont = 1; + int level, offset[4], noffset[4]; + unsigned int nofs = 0; + struct f2fs_node *rn; + struct dnode_of_data dn; + struct page *page; + + trace_f2fs_truncate_inode_blocks_enter(inode, from); + + level = get_node_path(F2FS_I(inode), from, offset, noffset); +restart: + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) { + trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); + return PTR_ERR(page); + } + + set_new_dnode(&dn, inode, page, NULL, 0); + unlock_page(page); + + rn = F2FS_NODE(page); + switch (level) { + case 0: + case 1: + nofs = noffset[1]; + break; + case 2: + nofs = noffset[1]; + if (!offset[level - 1]) + goto skip_partial; + err = truncate_partial_nodes(&dn, &rn->i, offset, level); + if (err < 0 && err != -ENOENT) + goto fail; + nofs += 1 + NIDS_PER_BLOCK; + break; + case 3: + nofs = 5 + 2 * NIDS_PER_BLOCK; + if (!offset[level - 1]) + goto skip_partial; + err = truncate_partial_nodes(&dn, &rn->i, offset, level); + if (err < 0 && err != -ENOENT) + goto fail; + break; + default: + BUG(); + } + +skip_partial: + while (cont) { + dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]); + switch (offset[0]) { + case NODE_DIR1_BLOCK: + case NODE_DIR2_BLOCK: + err = truncate_dnode(&dn); + break; + + case NODE_IND1_BLOCK: + case NODE_IND2_BLOCK: + err = truncate_nodes(&dn, nofs, offset[1], 2); + break; + + case NODE_DIND_BLOCK: + err = truncate_nodes(&dn, nofs, offset[1], 3); + cont = 0; + break; + + default: + BUG(); + } + if (err < 0 && err != -ENOENT) + goto fail; + if (offset[1] == 0 && + rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { + lock_page(page); + if (page->mapping != node_mapping) { + f2fs_put_page(page, 1); + goto restart; + } + wait_on_page_writeback(page); + rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; + set_page_dirty(page); + unlock_page(page); + } + offset[1] = 0; + offset[0]++; + nofs += err; + } +fail: + f2fs_put_page(page, 0); + trace_f2fs_truncate_inode_blocks_exit(inode, err); + return err > 0 ? 0 : err; +} + +int truncate_xattr_node(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + nid_t nid = F2FS_I(inode)->i_xattr_nid; + struct dnode_of_data dn; + struct page *npage; + + if (!nid) + return 0; + + npage = get_node_page(sbi, nid); + if (IS_ERR(npage)) + return PTR_ERR(npage); + + F2FS_I(inode)->i_xattr_nid = 0; + + /* need to do checkpoint during fsync */ + F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); + + set_new_dnode(&dn, inode, page, npage, nid); + + if (page) + dn.inode_page_locked = 1; + truncate_node(&dn); + return 0; +} + +/* + * Caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). + */ +int remove_inode_page(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + nid_t ino = inode->i_ino; + struct dnode_of_data dn; + int err; + + page = get_node_page(sbi, ino); + if (IS_ERR(page)) + return PTR_ERR(page); + + err = truncate_xattr_node(inode, page); + if (err) { + f2fs_put_page(page, 1); + return err; + } + + /* 0 is possible, after f2fs_new_inode() is failed */ + if (inode->i_blocks != 0 && inode->i_blocks != 1) { + f2fs_msg(sbi->sb, KERN_ERR, "inode %u still has %llu blocks", + ino, inode->i_blocks); + f2fs_handle_error(sbi); + } + set_new_dnode(&dn, inode, page, page, ino); + truncate_node(&dn); + return 0; +} + +struct page *new_inode_page(struct inode *inode, const struct qstr *name) +{ + struct dnode_of_data dn; + + /* allocate inode page for new inode */ + set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); + + /* caller should f2fs_put_page(page, 1); */ + return new_node_page(&dn, 0, NULL); +} + +struct page *new_node_page(struct dnode_of_data *dn, + unsigned int ofs, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct address_space *mapping = sbi->node_inode->i_mapping; + struct node_info old_ni, new_ni; + struct page *page; + int err; + + if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) + return ERR_PTR(-EPERM); + + page = grab_cache_page(mapping, dn->nid); + if (!page) + return ERR_PTR(-ENOMEM); + + if (!inc_valid_node_count(sbi, dn->inode, 1)) { + err = -ENOSPC; + goto fail; + } + + get_node_info(sbi, dn->nid, &old_ni); + + /* Reinitialize old_ni with new node page */ + BUG_ON(old_ni.blk_addr != NULL_ADDR); + new_ni = old_ni; + new_ni.ino = dn->inode->i_ino; + set_node_addr(sbi, &new_ni, NEW_ADDR); + + fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); + set_cold_node(dn->inode, page); + SetPageUptodate(page); + set_page_dirty(page); + + if (ofs == XATTR_NODE_OFFSET) + F2FS_I(dn->inode)->i_xattr_nid = dn->nid; + + dn->node_page = page; + if (ipage) + update_inode(dn->inode, ipage); + else + sync_inode_page(dn); + if (ofs == 0) + inc_valid_inode_count(sbi); + + return page; + +fail: + clear_node_page_dirty(page); + f2fs_put_page(page, 1); + return ERR_PTR(err); +} + +/* + * Caller should do after getting the following values. + * 0: f2fs_put_page(page, 0) + * LOCKED_PAGE: f2fs_put_page(page, 1) + * error: nothing + */ +static int read_node_page(struct page *page, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + struct node_info ni; + + get_node_info(sbi, page->index, &ni); + + if (ni.blk_addr == NULL_ADDR) { + f2fs_put_page(page, 1); + return -ENOENT; + } + + if (PageUptodate(page)) + return LOCKED_PAGE; + + return f2fs_readpage(sbi, page, ni.blk_addr, type); +} + +/* + * Readahead a node page + */ +void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + struct page *apage; + int err; + + apage = find_get_page(mapping, nid); + if (apage && PageUptodate(apage)) { + f2fs_put_page(apage, 0); + return; + } + f2fs_put_page(apage, 0); + + apage = grab_cache_page(mapping, nid); + if (!apage) + return; + + err = read_node_page(apage, READA); + if (err == 0) + f2fs_put_page(apage, 0); + else if (err == LOCKED_PAGE) + f2fs_put_page(apage, 1); +} + +struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + struct page *page; + int err; +repeat: + page = grab_cache_page(mapping, nid); + if (!page) + return ERR_PTR(-ENOMEM); + + err = read_node_page(page, READ_SYNC); + if (err < 0) + return ERR_PTR(err); + else if (err == LOCKED_PAGE) + goto got_it; + + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } +got_it: + if (nid != nid_of_node(page)) { + f2fs_msg(sbi->sb, KERN_ERR, "page node id does not match " + "request: %lu", nid); + f2fs_handle_error(sbi); + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + mark_page_accessed(page); + return page; +} + +/* + * Return a locked page for the desired node page. + * And, readahead MAX_RA_NODE number of node pages. + */ +struct page *get_node_page_ra(struct page *parent, int start) +{ + struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); + struct address_space *mapping = sbi->node_inode->i_mapping; + struct blk_plug plug; + struct page *page; + int err, i, end; + nid_t nid; + + /* First, try getting the desired direct node. */ + nid = get_nid(parent, start, false); + if (!nid) + return ERR_PTR(-ENOENT); +repeat: + page = grab_cache_page(mapping, nid); + if (!page) + return ERR_PTR(-ENOMEM); + + err = read_node_page(page, READ_SYNC); + if (err < 0) + return ERR_PTR(err); + else if (err == LOCKED_PAGE) + goto page_hit; + + blk_start_plug(&plug); + + /* Then, try readahead for siblings of the desired node */ + end = start + MAX_RA_NODE; + end = min(end, NIDS_PER_BLOCK); + for (i = start + 1; i < end; i++) { + nid = get_nid(parent, i, false); + if (!nid) + continue; + ra_node_page(sbi, nid); + } + + blk_finish_plug(&plug); + + lock_page(page); + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } +page_hit: + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + mark_page_accessed(page); + return page; +} + +void sync_inode_page(struct dnode_of_data *dn) +{ + if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { + update_inode(dn->inode, dn->node_page); + } else if (dn->inode_page) { + if (!dn->inode_page_locked) + lock_page(dn->inode_page); + update_inode(dn->inode, dn->inode_page); + if (!dn->inode_page_locked) + unlock_page(dn->inode_page); + } else { + update_inode_page(dn->inode); + } +} + +int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, + struct writeback_control *wbc) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + pgoff_t index, end; + struct pagevec pvec; + int step = ino ? 2 : 0; + int nwritten = 0, wrote = 0; + + pagevec_init(&pvec, 0); + +next_step: + index = 0; + end = LONG_MAX; + + while (index <= end) { + int i, nr_pages; + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * flushing sequence with step: + * 0. indirect nodes + * 1. dentry dnodes + * 2. file dnodes + */ + if (step == 0 && IS_DNODE(page)) + continue; + if (step == 1 && (!IS_DNODE(page) || + is_cold_node(page))) + continue; + if (step == 2 && (!IS_DNODE(page) || + !is_cold_node(page))) + continue; + + /* + * If an fsync mode, + * we should not skip writing node pages. + */ + if (ino && ino_of_node(page) == ino) + lock_page(page); + else if (!trylock_page(page)) + continue; + + if (unlikely(page->mapping != mapping)) { +continue_unlock: + unlock_page(page); + continue; + } + if (ino && ino_of_node(page) != ino) + goto continue_unlock; + + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; + } + + if (!clear_page_dirty_for_io(page)) + goto continue_unlock; + + /* called by fsync() */ + if (ino && IS_DNODE(page)) { + int mark = !is_checkpointed_node(sbi, ino); + set_fsync_mark(page, 1); + if (IS_INODE(page)) + set_dentry_mark(page, mark); + nwritten++; + } else { + set_fsync_mark(page, 0); + set_dentry_mark(page, 0); + } + mapping->a_ops->writepage(page, wbc); + wrote++; + + if (--wbc->nr_to_write == 0) + break; + } + pagevec_release(&pvec); + cond_resched(); + + if (wbc->nr_to_write == 0) { + step = 2; + break; + } + } + + if (step < 2) { + step++; + goto next_step; + } + + if (wrote) + f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL); + + return nwritten; +} + +static int f2fs_write_node_page(struct page *page, + struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + nid_t nid; + block_t new_addr; + struct node_info ni; + + if (sbi->por_doing) + goto redirty_out; + + wait_on_page_writeback(page); + + /* get old block addr of this node page */ + nid = nid_of_node(page); + BUG_ON(page->index != nid); + + get_node_info(sbi, nid, &ni); + + /* This page is already truncated */ + if (ni.blk_addr == NULL_ADDR) { + dec_page_count(sbi, F2FS_DIRTY_NODES); + unlock_page(page); + return 0; + } + + if (wbc->for_reclaim) + goto redirty_out; + + mutex_lock(&sbi->node_write); + set_page_writeback(page); + write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); + set_node_addr(sbi, &ni, new_addr); + dec_page_count(sbi, F2FS_DIRTY_NODES); + mutex_unlock(&sbi->node_write); + unlock_page(page); + return 0; + +redirty_out: + dec_page_count(sbi, F2FS_DIRTY_NODES); + wbc->pages_skipped++; + set_page_dirty(page); + return AOP_WRITEPAGE_ACTIVATE; +} + +/* + * It is very important to gather dirty pages and write at once, so that we can + * submit a big bio without interfering other data writes. + * Be default, 512 pages (2MB), a segment size, is quite reasonable. + */ +#define COLLECT_DIRTY_NODES 512 +static int f2fs_write_node_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + long nr_to_write = wbc->nr_to_write; + + /* First check balancing cached NAT entries */ + if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { + f2fs_sync_fs(sbi->sb, true); + return 0; + } + + /* collect a number of dirty node pages and write together */ + if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) + return 0; + + /* if mounting is failed, skip writing node pages */ + wbc->nr_to_write = max_hw_blocks(sbi); + sync_node_pages(sbi, 0, wbc); + wbc->nr_to_write = nr_to_write - (max_hw_blocks(sbi) - wbc->nr_to_write); + return 0; +} + +static int f2fs_set_node_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + + SetPageUptodate(page); + if (!PageDirty(page)) { + __set_page_dirty_nobuffers(page); + inc_page_count(sbi, F2FS_DIRTY_NODES); + SetPagePrivate(page); + return 1; + } + return 0; +} + +static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (PageDirty(page)) + dec_page_count(sbi, F2FS_DIRTY_NODES); + ClearPagePrivate(page); +} + +static int f2fs_release_node_page(struct page *page, gfp_t wait) +{ + ClearPagePrivate(page); + return 1; +} + +/* + * Structure of the f2fs node operations + */ +const struct address_space_operations f2fs_node_aops = { + .writepage = f2fs_write_node_page, + .writepages = f2fs_write_node_pages, + .set_page_dirty = f2fs_set_node_page_dirty, + .invalidatepage = f2fs_invalidate_node_page, + .releasepage = f2fs_release_node_page, +}; + +static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) +{ + struct list_head *this; + struct free_nid *i; + list_for_each(this, head) { + i = list_entry(this, struct free_nid, list); + if (i->nid == n) + return i; + } + return NULL; +} + +static void __del_from_free_nid_list(struct free_nid *i) +{ + list_del(&i->list); + kmem_cache_free(free_nid_slab, i); +} + +static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) +{ + struct free_nid *i; + struct nat_entry *ne; + bool allocated = false; + + if (nm_i->fcnt > 2 * MAX_FREE_NIDS) + return -1; + + /* 0 nid should not be used */ + if (nid == 0) + return 0; + + if (!build) + goto retry; + + /* do not add allocated nids */ + read_lock(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); + if (ne && nat_get_blkaddr(ne) != NULL_ADDR) + allocated = true; + read_unlock(&nm_i->nat_tree_lock); + if (allocated) + return 0; +retry: + i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); + if (!i) { + cond_resched(); + goto retry; + } + i->nid = nid; + i->state = NID_NEW; + + spin_lock(&nm_i->free_nid_list_lock); + if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) { + spin_unlock(&nm_i->free_nid_list_lock); + kmem_cache_free(free_nid_slab, i); + return 0; + } + list_add_tail(&i->list, &nm_i->free_nid_list); + nm_i->fcnt++; + spin_unlock(&nm_i->free_nid_list_lock); + return 1; +} + +static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) +{ + struct free_nid *i; + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + if (i && i->state == NID_NEW) { + __del_from_free_nid_list(i); + nm_i->fcnt--; + } + spin_unlock(&nm_i->free_nid_list_lock); +} + +static void scan_nat_page(struct f2fs_nm_info *nm_i, + struct page *nat_page, nid_t start_nid) +{ + struct f2fs_nat_block *nat_blk = page_address(nat_page); + block_t blk_addr; + int i; + + i = start_nid % NAT_ENTRY_PER_BLOCK; + + for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { + + if (start_nid >= nm_i->max_nid) + break; + + blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); + BUG_ON(blk_addr == NEW_ADDR); + if (blk_addr == NULL_ADDR) { + if (add_free_nid(nm_i, start_nid, true) < 0) + break; + } + } +} + +static void build_free_nids(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + int i = 0; + nid_t nid = nm_i->next_scan_nid; + + /* Enough entries */ + if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK) + return; + + /* readahead nat pages to be scanned */ + ra_nat_pages(sbi, nid); + + while (1) { + struct page *page = get_current_nat_page(sbi, nid); + + scan_nat_page(nm_i, page, nid); + f2fs_put_page(page, 1); + + nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); + if (nid >= nm_i->max_nid) + nid = 0; + + if (i++ == FREE_NID_PAGES) + break; + } + + /* go to the next free nat pages to find free nids abundantly */ + nm_i->next_scan_nid = nid; + + /* find free nids from current sum_pages */ + mutex_lock(&curseg->curseg_mutex); + for (i = 0; i < nats_in_cursum(sum); i++) { + block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); + nid = le32_to_cpu(nid_in_journal(sum, i)); + if (addr == NULL_ADDR) + add_free_nid(nm_i, nid, true); + else + remove_free_nid(nm_i, nid); + } + mutex_unlock(&curseg->curseg_mutex); +} + +/* + * If this function returns success, caller can obtain a new nid + * from second parameter of this function. + * The returned nid could be used ino as well as nid when inode is created. + */ +bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i = NULL; + struct list_head *this; +retry: + if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) + return false; + + spin_lock(&nm_i->free_nid_list_lock); + + /* We should not use stale free nids created by build_free_nids */ + if (nm_i->fcnt && !sbi->on_build_free_nids) { + BUG_ON(list_empty(&nm_i->free_nid_list)); + list_for_each(this, &nm_i->free_nid_list) { + i = list_entry(this, struct free_nid, list); + if (i->state == NID_NEW) + break; + } + + BUG_ON(i->state != NID_NEW); + *nid = i->nid; + i->state = NID_ALLOC; + nm_i->fcnt--; + spin_unlock(&nm_i->free_nid_list_lock); + return true; + } + spin_unlock(&nm_i->free_nid_list_lock); + + /* Let's scan nat pages and its caches to get free nids */ + mutex_lock(&nm_i->build_lock); + sbi->on_build_free_nids = 1; + build_free_nids(sbi); + sbi->on_build_free_nids = 0; + mutex_unlock(&nm_i->build_lock); + goto retry; +} + +/* + * alloc_nid() should be called prior to this function. + */ +void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i; + + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + BUG_ON(!i || i->state != NID_ALLOC); + __del_from_free_nid_list(i); + spin_unlock(&nm_i->free_nid_list_lock); +} + +/* + * alloc_nid() should be called prior to this function. + */ +void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i; + + if (!nid) + return; + + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + BUG_ON(!i || i->state != NID_ALLOC); + if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { + __del_from_free_nid_list(i); + } else { + i->state = NID_NEW; + nm_i->fcnt++; + } + spin_unlock(&nm_i->free_nid_list_lock); +} + +void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, + struct f2fs_summary *sum, struct node_info *ni, + block_t new_blkaddr) +{ + rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); + set_node_addr(sbi, ni, new_blkaddr); + clear_node_page_dirty(page); +} + +int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + struct f2fs_node *src, *dst; + nid_t ino = ino_of_node(page); + struct node_info old_ni, new_ni; + struct page *ipage; + int err; + + ipage = grab_cache_page(mapping, ino); + if (!ipage) + return -ENOMEM; + + /* Should not use this inode from free nid list */ + remove_free_nid(NM_I(sbi), ino); + + get_node_info(sbi, ino, &old_ni); + SetPageUptodate(ipage); + fill_node_footer(ipage, ino, ino, 0, true); + + src = F2FS_NODE(page); + dst = F2FS_NODE(ipage); + + memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); + dst->i.i_size = 0; + dst->i.i_blocks = cpu_to_le64(1); + dst->i.i_links = cpu_to_le32(1); + dst->i.i_xattr_nid = 0; + + new_ni = old_ni; + new_ni.ino = ino; + + err = set_node_addr(sbi, &new_ni, NEW_ADDR); + if (!err) + if (!inc_valid_node_count(sbi, NULL, 1)) + err = -ENOSPC; + if (!err) + inc_valid_inode_count(sbi); + f2fs_put_page(ipage, 1); + return err; +} + +int restore_node_summary(struct f2fs_sb_info *sbi, + unsigned int segno, struct f2fs_summary_block *sum) +{ + struct f2fs_node *rn; + struct f2fs_summary *sum_entry; + struct page *page; + block_t addr; + int i, last_offset; + + /* alloc temporal page for read node */ + page = alloc_page(GFP_NOFS | __GFP_ZERO); + if (!page) + return -ENOMEM; + lock_page(page); + + /* scan the node segment */ + last_offset = sbi->blocks_per_seg; + addr = START_BLOCK(sbi, segno); + sum_entry = &sum->entries[0]; + + for (i = 0; i < last_offset; i++, sum_entry++) { + /* + * In order to read next node page, + * we must clear PageUptodate flag. + */ + ClearPageUptodate(page); + + if (f2fs_readpage(sbi, page, addr, READ_SYNC)) + goto out; + + lock_page(page); + rn = F2FS_NODE(page); + sum_entry->nid = rn->footer.nid; + sum_entry->version = 0; + sum_entry->ofs_in_node = 0; + addr++; + } + unlock_page(page); +out: + __free_pages(page, 0); + return 0; +} + +static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + int i; + + mutex_lock(&curseg->curseg_mutex); + + if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) { + mutex_unlock(&curseg->curseg_mutex); + return false; + } + + for (i = 0; i < nats_in_cursum(sum); i++) { + struct nat_entry *ne; + struct f2fs_nat_entry raw_ne; + nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); + + raw_ne = nat_in_journal(sum, i); +retry: + write_lock(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); + if (ne) { + __set_nat_cache_dirty(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + continue; + } + ne = grab_nat_entry(nm_i, nid); + if (!ne) { + write_unlock(&nm_i->nat_tree_lock); + goto retry; + } + nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); + nat_set_ino(ne, le32_to_cpu(raw_ne.ino)); + nat_set_version(ne, raw_ne.version); + __set_nat_cache_dirty(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + } + update_nats_in_cursum(sum, -i); + mutex_unlock(&curseg->curseg_mutex); + return true; +} + +/* + * This function is called during the checkpointing process. + */ +void flush_nat_entries(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + struct list_head *cur, *n; + struct page *page = NULL; + struct f2fs_nat_block *nat_blk = NULL; + nid_t start_nid = 0, end_nid = 0; + bool flushed; + + flushed = flush_nats_in_journal(sbi); + + if (!flushed) + mutex_lock(&curseg->curseg_mutex); + + /* 1) flush dirty nat caches */ + list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) { + struct nat_entry *ne; + nid_t nid; + struct f2fs_nat_entry raw_ne; + int offset = -1; + block_t new_blkaddr; + + ne = list_entry(cur, struct nat_entry, list); + nid = nat_get_nid(ne); + + if (nat_get_blkaddr(ne) == NEW_ADDR) + continue; + if (flushed) + goto to_nat_page; + + /* if there is room for nat enries in curseg->sumpage */ + offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); + if (offset >= 0) { + raw_ne = nat_in_journal(sum, offset); + goto flush_now; + } +to_nat_page: + if (!page || (start_nid > nid || nid > end_nid)) { + if (page) { + f2fs_put_page(page, 1); + page = NULL; + } + start_nid = START_NID(nid); + end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1; + + /* + * get nat block with dirty flag, increased reference + * count, mapped and lock + */ + page = get_next_nat_page(sbi, start_nid); + nat_blk = page_address(page); + } + + BUG_ON(!nat_blk); + raw_ne = nat_blk->entries[nid - start_nid]; +flush_now: + new_blkaddr = nat_get_blkaddr(ne); + + raw_ne.ino = cpu_to_le32(nat_get_ino(ne)); + raw_ne.block_addr = cpu_to_le32(new_blkaddr); + raw_ne.version = nat_get_version(ne); + + if (offset < 0) { + nat_blk->entries[nid - start_nid] = raw_ne; + } else { + nat_in_journal(sum, offset) = raw_ne; + nid_in_journal(sum, offset) = cpu_to_le32(nid); + } + + if (nat_get_blkaddr(ne) == NULL_ADDR && + add_free_nid(NM_I(sbi), nid, false) <= 0) { + write_lock(&nm_i->nat_tree_lock); + __del_from_nat_cache(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + } else { + write_lock(&nm_i->nat_tree_lock); + __clear_nat_cache_dirty(nm_i, ne); + ne->checkpointed = true; + write_unlock(&nm_i->nat_tree_lock); + } + } + if (!flushed) + mutex_unlock(&curseg->curseg_mutex); + f2fs_put_page(page, 1); + + /* 2) shrink nat caches if necessary */ + try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD); +} + +static int init_node_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned char *version_bitmap; + unsigned int nat_segs, nat_blocks; + + nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); + + /* segment_count_nat includes pair segment so divide to 2. */ + nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; + nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; + nm_i->fcnt = 0; + nm_i->nat_cnt = 0; + + INIT_LIST_HEAD(&nm_i->free_nid_list); + INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); + INIT_LIST_HEAD(&nm_i->nat_entries); + INIT_LIST_HEAD(&nm_i->dirty_nat_entries); + + mutex_init(&nm_i->build_lock); + spin_lock_init(&nm_i->free_nid_list_lock); + rwlock_init(&nm_i->nat_tree_lock); + + nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); + nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); + version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); + if (!version_bitmap) + return -EFAULT; + + nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, + GFP_KERNEL); + if (!nm_i->nat_bitmap) + return -ENOMEM; + return 0; +} + +int build_node_manager(struct f2fs_sb_info *sbi) +{ + int err; + + sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL); + if (!sbi->nm_info) + return -ENOMEM; + + err = init_node_manager(sbi); + if (err) + return err; + + build_free_nids(sbi); + return 0; +} + +void destroy_node_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i, *next_i; + struct nat_entry *natvec[NATVEC_SIZE]; + nid_t nid = 0; + unsigned int found; + + if (!nm_i) + return; + + /* destroy free nid list */ + spin_lock(&nm_i->free_nid_list_lock); + list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { + BUG_ON(i->state == NID_ALLOC); + __del_from_free_nid_list(i); + nm_i->fcnt--; + } + BUG_ON(nm_i->fcnt); + spin_unlock(&nm_i->free_nid_list_lock); + + /* destroy nat cache */ + write_lock(&nm_i->nat_tree_lock); + while ((found = __gang_lookup_nat_cache(nm_i, + nid, NATVEC_SIZE, natvec))) { + unsigned idx; + for (idx = 0; idx < found; idx++) { + struct nat_entry *e = natvec[idx]; + nid = nat_get_nid(e) + 1; + __del_from_nat_cache(nm_i, e); + } + } + BUG_ON(nm_i->nat_cnt); + write_unlock(&nm_i->nat_tree_lock); + + kfree(nm_i->nat_bitmap); + sbi->nm_info = NULL; + kfree(nm_i); +} + +int __init create_node_manager_caches(void) +{ + nat_entry_slab = f2fs_kmem_cache_create("nat_entry", + sizeof(struct nat_entry), NULL); + if (!nat_entry_slab) + return -ENOMEM; + + free_nid_slab = f2fs_kmem_cache_create("free_nid", + sizeof(struct free_nid), NULL); + if (!free_nid_slab) { + kmem_cache_destroy(nat_entry_slab); + return -ENOMEM; + } + return 0; +} + +void destroy_node_manager_caches(void) +{ + kmem_cache_destroy(free_nid_slab); + kmem_cache_destroy(nat_entry_slab); +} diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h new file mode 100644 index 00000000000..7ee2f9e7e88 --- /dev/null +++ b/fs/f2fs/node.h @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/node.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/* start node id of a node block dedicated to the given node id */ +#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) + +/* node block offset on the NAT area dedicated to the given start node id */ +#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) + +/* # of pages to perform readahead before building free nids */ +#define FREE_NID_PAGES 4 + +/* maximum # of free node ids to produce during build_free_nids */ +#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) + +/* maximum readahead size for node during getting data blocks */ +#define MAX_RA_NODE 128 + +/* maximum cached nat entries to manage memory footprint */ +#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK) + +/* vector size for gang look-up from nat cache that consists of radix tree */ +#define NATVEC_SIZE 64 + +/* return value for read_node_page */ +#define LOCKED_PAGE 1 + +/* + * For node information + */ +struct node_info { + nid_t nid; /* node id */ + nid_t ino; /* inode number of the node's owner */ + block_t blk_addr; /* block address of the node */ + unsigned char version; /* version of the node */ +}; + +struct nat_entry { + struct list_head list; /* for clean or dirty nat list */ + bool checkpointed; /* whether it is checkpointed or not */ + struct node_info ni; /* in-memory node information */ +}; + +#define nat_get_nid(nat) (nat->ni.nid) +#define nat_set_nid(nat, n) (nat->ni.nid = n) +#define nat_get_blkaddr(nat) (nat->ni.blk_addr) +#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b) +#define nat_get_ino(nat) (nat->ni.ino) +#define nat_set_ino(nat, i) (nat->ni.ino = i) +#define nat_get_version(nat) (nat->ni.version) +#define nat_set_version(nat, v) (nat->ni.version = v) + +#define __set_nat_cache_dirty(nm_i, ne) \ + list_move_tail(&ne->list, &nm_i->dirty_nat_entries); +#define __clear_nat_cache_dirty(nm_i, ne) \ + list_move_tail(&ne->list, &nm_i->nat_entries); +#define inc_node_version(version) (++version) + +static inline void node_info_from_raw_nat(struct node_info *ni, + struct f2fs_nat_entry *raw_ne) +{ + ni->ino = le32_to_cpu(raw_ne->ino); + ni->blk_addr = le32_to_cpu(raw_ne->block_addr); + ni->version = raw_ne->version; +} + +/* + * For free nid mangement + */ +enum nid_state { + NID_NEW, /* newly added to free nid list */ + NID_ALLOC /* it is allocated */ +}; + +struct free_nid { + struct list_head list; /* for free node id list */ + nid_t nid; /* node id */ + int state; /* in use or not: NID_NEW or NID_ALLOC */ +}; + +static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *fnid; + + if (nm_i->fcnt <= 0) + return -1; + spin_lock(&nm_i->free_nid_list_lock); + fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); + *nid = fnid->nid; + spin_unlock(&nm_i->free_nid_list_lock); + return 0; +} + +/* + * inline functions + */ +static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); +} + +static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + pgoff_t block_off; + pgoff_t block_addr; + int seg_off; + + block_off = NAT_BLOCK_OFFSET(start); + seg_off = block_off >> sbi->log_blocks_per_seg; + + block_addr = (pgoff_t)(nm_i->nat_blkaddr + + (seg_off << sbi->log_blocks_per_seg << 1) + + (block_off & ((1 << sbi->log_blocks_per_seg) - 1))); + + if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) + block_addr += sbi->blocks_per_seg; + + return block_addr; +} + +static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, + pgoff_t block_addr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + block_addr -= nm_i->nat_blkaddr; + if ((block_addr >> sbi->log_blocks_per_seg) % 2) + block_addr -= sbi->blocks_per_seg; + else + block_addr += sbi->blocks_per_seg; + + return block_addr + nm_i->nat_blkaddr; +} + +static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) +{ + unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); + + if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) + f2fs_clear_bit(block_off, nm_i->nat_bitmap); + else + f2fs_set_bit(block_off, nm_i->nat_bitmap); +} + +static inline void fill_node_footer(struct page *page, nid_t nid, + nid_t ino, unsigned int ofs, bool reset) +{ + struct f2fs_node *rn = F2FS_NODE(page); + if (reset) + memset(rn, 0, sizeof(*rn)); + rn->footer.nid = cpu_to_le32(nid); + rn->footer.ino = cpu_to_le32(ino); + rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); +} + +static inline void copy_node_footer(struct page *dst, struct page *src) +{ + struct f2fs_node *src_rn = F2FS_NODE(src); + struct f2fs_node *dst_rn = F2FS_NODE(dst); + memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); +} + +static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_node *rn = F2FS_NODE(page); + + rn->footer.cp_ver = ckpt->checkpoint_ver; + rn->footer.next_blkaddr = cpu_to_le32(blkaddr); +} + +static inline nid_t ino_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le32_to_cpu(rn->footer.ino); +} + +static inline nid_t nid_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le32_to_cpu(rn->footer.nid); +} + +static inline unsigned int ofs_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + unsigned flag = le32_to_cpu(rn->footer.flag); + return flag >> OFFSET_BIT_SHIFT; +} + +static inline unsigned long long cpver_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le64_to_cpu(rn->footer.cp_ver); +} + +static inline block_t next_blkaddr_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le32_to_cpu(rn->footer.next_blkaddr); +} + +/* + * f2fs assigns the following node offsets described as (num). + * N = NIDS_PER_BLOCK + * + * Inode block (0) + * |- direct node (1) + * |- direct node (2) + * |- indirect node (3) + * | `- direct node (4 => 4 + N - 1) + * |- indirect node (4 + N) + * | `- direct node (5 + N => 5 + 2N - 1) + * `- double indirect node (5 + 2N) + * `- indirect node (6 + 2N) + * `- direct node (x(N + 1)) + */ +static inline bool IS_DNODE(struct page *node_page) +{ + unsigned int ofs = ofs_of_node(node_page); + + if (ofs == XATTR_NODE_OFFSET) + return false; + + if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || + ofs == 5 + 2 * NIDS_PER_BLOCK) + return false; + if (ofs >= 6 + 2 * NIDS_PER_BLOCK) { + ofs -= 6 + 2 * NIDS_PER_BLOCK; + if (!((long int)ofs % (NIDS_PER_BLOCK + 1))) + return false; + } + return true; +} + +static inline void set_nid(struct page *p, int off, nid_t nid, bool i) +{ + struct f2fs_node *rn = F2FS_NODE(p); + + wait_on_page_writeback(p); + + if (i) + rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); + else + rn->in.nid[off] = cpu_to_le32(nid); + set_page_dirty(p); +} + +static inline nid_t get_nid(struct page *p, int off, bool i) +{ + struct f2fs_node *rn = F2FS_NODE(p); + + if (i) + return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); + return le32_to_cpu(rn->in.nid[off]); +} + +/* + * Coldness identification: + * - Mark cold files in f2fs_inode_info + * - Mark cold node blocks in their node footer + * - Mark cold data pages in page cache + */ +static inline int is_file(struct inode *inode, int type) +{ + return F2FS_I(inode)->i_advise & type; +} + +static inline void set_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise |= type; +} + +static inline void clear_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise &= ~type; +} + +#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) +#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) +#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) +#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) +#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) +#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) + +static inline int is_cold_data(struct page *page) +{ + return PageChecked(page); +} + +static inline void set_cold_data(struct page *page) +{ + SetPageChecked(page); +} + +static inline void clear_cold_data(struct page *page) +{ + ClearPageChecked(page); +} + +static inline int is_node(struct page *page, int type) +{ + struct f2fs_node *rn = F2FS_NODE(page); + return le32_to_cpu(rn->footer.flag) & (1 << type); +} + +#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) +#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) +#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) + +static inline void set_cold_node(struct inode *inode, struct page *page) +{ + struct f2fs_node *rn = F2FS_NODE(page); + unsigned int flag = le32_to_cpu(rn->footer.flag); + + if (S_ISDIR(inode->i_mode)) + flag &= ~(0x1 << COLD_BIT_SHIFT); + else + flag |= (0x1 << COLD_BIT_SHIFT); + rn->footer.flag = cpu_to_le32(flag); +} + +static inline void set_mark(struct page *page, int mark, int type) +{ + struct f2fs_node *rn = F2FS_NODE(page); + unsigned int flag = le32_to_cpu(rn->footer.flag); + if (mark) + flag |= (0x1 << type); + else + flag &= ~(0x1 << type); + rn->footer.flag = cpu_to_le32(flag); +} +#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) +#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c new file mode 100644 index 00000000000..773752780af --- /dev/null +++ b/fs/f2fs/recovery.c @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2014 XPerience(R) Project + * + * fs/f2fs/recovery.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include "f2fs.h" +#include "node.h" +#include "segment.h" + +static struct kmem_cache *fsync_entry_slab; + +bool space_for_roll_forward(struct f2fs_sb_info *sbi) +{ + if (sbi->last_valid_block_count + sbi->alloc_valid_block_count + > sbi->user_block_count) + return false; + return true; +} + +static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, + nid_t ino) +{ + struct list_head *this; + struct fsync_inode_entry *entry; + + list_for_each(this, head) { + entry = list_entry(this, struct fsync_inode_entry, list); + if (entry->inode->i_ino == ino) + return entry; + } + return NULL; +} + +static int recover_dentry(struct page *ipage, struct inode *inode) +{ + struct f2fs_node *raw_node = F2FS_NODE(ipage); + struct f2fs_inode *raw_inode = &(raw_node->i); + nid_t pino = le32_to_cpu(raw_inode->i_pino); + struct f2fs_dir_entry *de; + struct qstr name; + struct page *page; + struct inode *dir, *einode; + int err = 0; + + dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); + if (!dir) { + dir = f2fs_iget(inode->i_sb, pino); + if (IS_ERR(dir)) { + f2fs_msg(inode->i_sb, KERN_INFO, + "%s: f2fs_iget failed: %ld", + __func__, PTR_ERR(dir)); + err = PTR_ERR(dir); + goto out; + } + set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); + add_dirty_dir_inode(dir); + } + + name.len = le32_to_cpu(raw_inode->i_namelen); + name.name = raw_inode->i_name; +retry: + de = f2fs_find_entry(dir, &name, &page); + if (de && inode->i_ino == le32_to_cpu(de->ino)) + goto out_unmap_put; + if (de) { + einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); + if (IS_ERR(einode)) { + WARN_ON(1); + if (PTR_ERR(einode) == -ENOENT) + err = -EEXIST; + goto out_unmap_put; + } + err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); + if (err) { + iput(einode); + goto out_unmap_put; + } + f2fs_delete_entry(de, page, einode); + iput(einode); + goto retry; + } + err = __f2fs_add_link(dir, &name, inode); + goto out; + +out_unmap_put: + kunmap(page); + f2fs_put_page(page, 0); +out: + f2fs_msg(inode->i_sb, KERN_DEBUG, "recover_inode and its dentry: " + "ino = %x, name = %s, dir = %lx, err = %d", + ino_of_node(ipage), raw_inode->i_name, + IS_ERR(dir) ? 0 : dir->i_ino, err); + return err; +} + +static int recover_inode(struct inode *inode, struct page *node_page) +{ + struct f2fs_node *raw_node = F2FS_NODE(node_page); + struct f2fs_inode *raw_inode = &(raw_node->i); + + if (!IS_INODE(node_page)) + return 0; + + inode->i_mode = le16_to_cpu(raw_inode->i_mode); + i_size_write(inode, le64_to_cpu(raw_inode->i_size)); + inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); + inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); + inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); + inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); + inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + + if (is_dent_dnode(node_page)) + return recover_dentry(node_page, inode); + + f2fs_msg(inode->i_sb, KERN_DEBUG, "recover_inode: ino = %x, name = %s", + ino_of_node(node_page), raw_inode->i_name); + return 0; +} + +static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) +{ + unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); + struct curseg_info *curseg; + struct page *page; + block_t blkaddr; + int err = 0; + + /* get node pages in the current segment */ + curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); + blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff; + + /* read node page */ + page = alloc_page(GFP_F2FS_ZERO); + if (!page) + return -ENOMEM; + lock_page(page); + + while (1) { + struct fsync_inode_entry *entry; + + err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); + if (err) + goto out; + + lock_page(page); + + if (cp_ver != cpver_of_node(page)) + break; + + if (!is_fsync_dnode(page)) + goto next; + + entry = get_fsync_inode(head, ino_of_node(page)); + if (entry) { + if (IS_INODE(page) && is_dent_dnode(page)) + set_inode_flag(F2FS_I(entry->inode), + FI_INC_LINK); + } else { + if (IS_INODE(page) && is_dent_dnode(page)) { + err = recover_inode_page(sbi, page); + if (err) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: recover_inode_page failed: %d", + __func__, err); + break; + } + } + + /* add this fsync inode to the list */ + entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); + if (!entry) { + err = -ENOMEM; + break; + } + + entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); + if (IS_ERR(entry->inode)) { + err = PTR_ERR(entry->inode); + f2fs_msg(sbi->sb, KERN_INFO, + "%s: f2fs_iget failed: %d", + __func__, err); + kmem_cache_free(fsync_entry_slab, entry); + break; + } + list_add_tail(&entry->list, head); + } + entry->blkaddr = blkaddr; + + err = recover_inode(entry->inode, page); + if (err && err != -ENOENT) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: recover_inode failed: %d", + __func__, err); + break; + } +next: + /* check next segment */ + blkaddr = next_blkaddr_of_node(page); + } + unlock_page(page); +out: + __free_pages(page, 0); + return err; +} + +static void destroy_fsync_dnodes(struct list_head *head) +{ + struct fsync_inode_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, head, list) { + iput(entry->inode); + list_del(&entry->list); + kmem_cache_free(fsync_entry_slab, entry); + } +} + +static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, + block_t blkaddr, struct dnode_of_data *dn) +{ + struct seg_entry *sentry; + unsigned int segno = GET_SEGNO(sbi, blkaddr); + unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & + (sbi->blocks_per_seg - 1); + struct f2fs_summary sum; + nid_t ino, nid; + void *kaddr; + struct inode *inode; + struct page *node_page; + unsigned int offset; + block_t bidx; + int i; + + sentry = get_seg_entry(sbi, segno); + if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) + return 0; + + /* Get the previous summary */ + for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { + struct curseg_info *curseg = CURSEG_I(sbi, i); + if (curseg->segno == segno) { + sum = curseg->sum_blk->entries[blkoff]; + break; + } + } + if (i > CURSEG_COLD_DATA) { + struct page *sum_page = get_sum_page(sbi, segno); + struct f2fs_summary_block *sum_node; + kaddr = page_address(sum_page); + sum_node = (struct f2fs_summary_block *)kaddr; + sum = sum_node->entries[blkoff]; + f2fs_put_page(sum_page, 1); + } + + /* Use the locked dnode page and inode */ + nid = le32_to_cpu(sum.nid); + if (dn->inode->i_ino == nid) { + struct dnode_of_data tdn = *dn; + tdn.nid = nid; + tdn.node_page = dn->inode_page; + tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + truncate_data_blocks_range(&tdn, 1); + return 0; + } else if (dn->nid == nid) { + struct dnode_of_data tdn = *dn; + tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + truncate_data_blocks_range(&tdn, 1); + return 0; + } + + /* Get the node page */ + node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); + + offset = ofs_of_node(node_page); + ino = ino_of_node(node_page); + f2fs_put_page(node_page, 1); + + /* Skip nodes with circular references */ + if (ino == dn->inode->i_ino) { + f2fs_msg(sbi->sb, KERN_ERR, "%s: node %x has circular inode %x", + __func__, ino, nid); + f2fs_handle_error(sbi); + return -EDEADLK; + } + + /* Deallocate previous index in the node page */ + inode = f2fs_iget(sbi->sb, ino); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + bidx = start_bidx_of_node(offset, F2FS_I(inode)) + + le16_to_cpu(sum.ofs_in_node); + + truncate_hole(inode, bidx, bidx + 1); + iput(inode); + return 0; +} + +static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, + struct page *page, block_t blkaddr) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int start, end; + struct dnode_of_data dn; + struct f2fs_summary sum; + struct node_info ni; + int err = 0, recovered = 0; + int ilock; + + start = start_bidx_of_node(ofs_of_node(page), fi); + if (IS_INODE(page)) + end = start + ADDRS_PER_INODE(fi); + else + end = start + ADDRS_PER_BLOCK; + + ilock = mutex_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); + + err = get_dnode_of_data(&dn, start, ALLOC_NODE); + if (err) { + mutex_unlock_op(sbi, ilock); + f2fs_msg(sbi->sb, KERN_INFO, + "%s: get_dnode_of_data failed: %d", __func__, err); + return err; + } + + wait_on_page_writeback(dn.node_page); + + get_node_info(sbi, dn.nid, &ni); + BUG_ON(ni.ino != ino_of_node(page)); + BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page)); + + for (; start < end; start++) { + block_t src, dest; + + src = datablock_addr(dn.node_page, dn.ofs_in_node); + dest = datablock_addr(page, dn.ofs_in_node); + + if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { + if (src == NULL_ADDR) { + int err = reserve_new_block(&dn); + /* We should not get -ENOSPC */ + if (err) + f2fs_msg(sbi->sb, KERN_INFO, + "%s: reserve_new_block failed: %d", + __func__, err); + BUG_ON(err); + } + + /* Check the previous node page having this index */ + err = check_index_in_prev_nodes(sbi, dest, &dn); + if (err) + goto err; + + set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); + + /* write dummy data page */ + recover_data_page(sbi, NULL, &sum, src, dest); + update_extent_cache(dest, &dn); + recovered++; + } + dn.ofs_in_node++; + } + + /* write node page in place */ + set_summary(&sum, dn.nid, 0, 0); + if (IS_INODE(dn.node_page)) + sync_inode_page(&dn); + + copy_node_footer(dn.node_page, page); + fill_node_footer(dn.node_page, dn.nid, ni.ino, + ofs_of_node(page), false); + set_page_dirty(dn.node_page); + + recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); +err: + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + + f2fs_msg(sbi->sb, KERN_DEBUG, "recover_data: ino = %lx, " + "recovered_data = %d blocks, err = %d", + inode->i_ino, recovered, err); + return err; +} + +static int recover_data(struct f2fs_sb_info *sbi, + struct list_head *head, int type) +{ + unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); + struct curseg_info *curseg; + struct page *page; + int err = 0; + block_t blkaddr; + + /* get node pages in the current segment */ + curseg = CURSEG_I(sbi, type); + blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + + /* read node page */ + page = alloc_page(GFP_NOFS | __GFP_ZERO); + if (!page) + return -ENOMEM; + + lock_page(page); + + while (1) { + struct fsync_inode_entry *entry; + + err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); + if (err) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: f2fs_readpage failed: %d", + __func__, err); + goto out; + } + + lock_page(page); + + if (cp_ver != cpver_of_node(page)) + break; + + entry = get_fsync_inode(head, ino_of_node(page)); + if (!entry) + goto next; + + err = do_recover_data(sbi, entry->inode, page, blkaddr); + if (err) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: do_recover_data failed: %d", + __func__, err); + break; + } + + if (entry->blkaddr == blkaddr) { + iput(entry->inode); + list_del(&entry->list); + kmem_cache_free(fsync_entry_slab, entry); + } +next: + /* check next segment */ + blkaddr = next_blkaddr_of_node(page); + } + unlock_page(page); +out: + __free_pages(page, 0); + + if (!err) + allocate_new_segments(sbi); + return err; +} + +int recover_fsync_data(struct f2fs_sb_info *sbi) +{ + struct list_head inode_list; + int err; + + fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", + sizeof(struct fsync_inode_entry), NULL); + if (unlikely(!fsync_entry_slab)) + return -ENOMEM; + + INIT_LIST_HEAD(&inode_list); + + /* step #1: find fsynced inode numbers */ + sbi->por_doing = 1; + err = find_fsync_dnodes(sbi, &inode_list); + if (err) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: find_fsync_dnodes failed: %d", __func__, err); + goto out; + } + + if (list_empty(&inode_list)) + goto out; + + /* step #2: recover data */ + err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); + if (!list_empty(&inode_list)) { + f2fs_handle_error(sbi); + err = -EIO; + } +out: + destroy_fsync_dnodes(&inode_list); + kmem_cache_destroy(fsync_entry_slab); + sbi->por_doing = 0; + if (!err) { + f2fs_msg(sbi->sb, KERN_INFO, "recovery complete"); + write_checkpoint(sbi, false); + } else + f2fs_msg(sbi->sb, KERN_ERR, "recovery did not fully complete"); + + return err; +} diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c new file mode 100644 index 00000000000..5a88640ece0 --- /dev/null +++ b/fs/f2fs/segment.c @@ -0,0 +1,1789 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/segment.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "segment.h" +#include "node.h" +#include + +/* + * This function balances dirty node and dentry pages. + * In addition, it controls garbage collection. + */ +void f2fs_balance_fs(struct f2fs_sb_info *sbi) +{ + /* + * We should do GC or end up with checkpoint, if there are so many dirty + * dir/node pages without enough free segments. + */ + if (has_not_enough_free_secs(sbi, 0)) { + mutex_lock(&sbi->gc_mutex); + f2fs_gc(sbi); + } +} + +static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, + enum dirty_type dirty_type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + /* need not be added */ + if (IS_CURSEG(sbi, segno)) + return; + + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]++; + + if (dirty_type == DIRTY) { + struct seg_entry *sentry = get_seg_entry(sbi, segno); + enum dirty_type t = DIRTY_HOT_DATA; + + dirty_type = sentry->type; + + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]++; + + /* Only one bitmap should be set */ + for (; t <= DIRTY_COLD_NODE; t++) { + if (t == dirty_type) + continue; + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; + } + } +} + +static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, + enum dirty_type dirty_type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]--; + + if (dirty_type == DIRTY) { + enum dirty_type t = DIRTY_HOT_DATA; + + /* clear all the bitmaps */ + for (; t <= DIRTY_COLD_NODE; t++) + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; + + if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) + clear_bit(GET_SECNO(sbi, segno), + dirty_i->victim_secmap); + } +} + +/* + * Should not occur error such as -ENOMEM. + * Adding dirty entry into seglist is not critical operation. + * If a given segment is one of current working segments, it won't be added. + */ +static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned short valid_blocks; + + if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) + return; + + mutex_lock(&dirty_i->seglist_lock); + + valid_blocks = get_valid_blocks(sbi, segno, 0); + + if (valid_blocks == 0) { + __locate_dirty_segment(sbi, segno, PRE); + __remove_dirty_segment(sbi, segno, DIRTY); + } else if (valid_blocks < sbi->blocks_per_seg) { + __locate_dirty_segment(sbi, segno, DIRTY); + } else { + /* Recovery routine with SSR needs this */ + __remove_dirty_segment(sbi, segno, DIRTY); + } + + mutex_unlock(&dirty_i->seglist_lock); +} + +/* + * Should call clear_prefree_segments after checkpoint is done. + */ +static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno = -1; + unsigned int total_segs = TOTAL_SEGS(sbi); + + mutex_lock(&dirty_i->seglist_lock); + while (1) { + segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, + segno + 1); + if (segno >= total_segs) + break; + __set_test_and_free(sbi, segno); + } + mutex_unlock(&dirty_i->seglist_lock); +} + +void clear_prefree_segments(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno = -1; + unsigned int total_segs = TOTAL_SEGS(sbi); + + mutex_lock(&dirty_i->seglist_lock); + while (1) { + segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, + segno + 1); + if (segno >= total_segs) + break; + + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) + dirty_i->nr_dirty[PRE]--; + + /* Let's use trim */ + if (test_opt(sbi, DISCARD)) + blkdev_issue_discard(sbi->sb->s_bdev, + START_BLOCK(sbi, segno) << + sbi->log_sectors_per_block, + 1 << (sbi->log_sectors_per_block + + sbi->log_blocks_per_seg), + GFP_NOFS, 0); + } + mutex_unlock(&dirty_i->seglist_lock); +} + +static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) + sit_i->dirty_sentries++; +} + +static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, + unsigned int segno, int modified) +{ + struct seg_entry *se = get_seg_entry(sbi, segno); + se->type = type; + if (modified) + __mark_sit_entry_dirty(sbi, segno); +} + +static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +{ + struct seg_entry *se; + unsigned int segno, offset; + long int new_vblocks; + bool check_map = false; + + segno = GET_SEGNO(sbi, blkaddr); + + se = get_seg_entry(sbi, segno); + new_vblocks = se->valid_blocks + del; + offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); + + if (new_vblocks < 0 || new_vblocks > sbi->blocks_per_seg || + (new_vblocks >> (sizeof(unsigned short) << 3))) + if (f2fs_handle_error(sbi)) + check_map = true; + + se->mtime = get_mtime(sbi); + SIT_I(sbi)->max_mtime = se->mtime; + + /* Update valid block bitmap */ + if (del > 0) { + if (f2fs_set_bit(offset, se->cur_valid_map)) + if (f2fs_handle_error(sbi)) + check_map = true; + } else { + if (!f2fs_clear_bit(offset, se->cur_valid_map)) + if (f2fs_handle_error(sbi)) + check_map = true; + } + + if (unlikely(check_map)) { + int i; + long int vblocks = 0; + + f2fs_msg(sbi->sb, KERN_ERR, + "cannot %svalidate block %u in segment %u with %hu valid blocks", + (del < 0) ? "in" : "", + offset, segno, se->valid_blocks); + + /* assume the count was stale to start */ + del = 0; + for (i = 0; i < sbi->blocks_per_seg; i++) + if (f2fs_test_bit(i, se->cur_valid_map)) + vblocks++; + if (vblocks != se->valid_blocks) { + f2fs_msg(sbi->sb, KERN_INFO, "correcting valid block " + "counts %d -> %ld", se->valid_blocks, vblocks); + /* make accounting corrections */ + del = vblocks - se->valid_blocks; + } + } + se->valid_blocks += del; + + if (!f2fs_test_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks += del; + + __mark_sit_entry_dirty(sbi, segno); + + /* update total number of valid blocks to be written in ckpt area */ + SIT_I(sbi)->written_valid_blocks += del; + + if (sbi->segs_per_sec > 1) + get_sec_entry(sbi, segno)->valid_blocks += del; +} + +static void refresh_sit_entry(struct f2fs_sb_info *sbi, + block_t old_blkaddr, block_t new_blkaddr) +{ + update_sit_entry(sbi, new_blkaddr, 1); + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + update_sit_entry(sbi, old_blkaddr, -1); +} + +void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) +{ + unsigned int segno = GET_SEGNO(sbi, addr); + struct sit_info *sit_i = SIT_I(sbi); + + BUG_ON(addr == NULL_ADDR); + if (addr == NEW_ADDR) + return; + + if (segno >= TOTAL_SEGS(sbi)) { + f2fs_msg(sbi->sb, KERN_ERR, "invalid segment number %u", segno); + if (f2fs_handle_error(sbi)) + return; + } + + /* add it into sit main buffer */ + mutex_lock(&sit_i->sentry_lock); + + update_sit_entry(sbi, addr, -1); + + /* add it into dirty seglist */ + locate_dirty_segment(sbi, segno); + + mutex_unlock(&sit_i->sentry_lock); +} + +/* + * This function should be resided under the curseg_mutex lock + */ +static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, + struct f2fs_summary *sum) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + void *addr = curseg->sum_blk; + addr += curseg->next_blkoff * sizeof(struct f2fs_summary); + memcpy(addr, sum, sizeof(struct f2fs_summary)); +} + +/* + * Calculate the number of current summary pages for writing + */ +int npages_for_summary_flush(struct f2fs_sb_info *sbi) +{ + int total_size_bytes = 0; + int valid_sum_count = 0; + int i, sum_space; + + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + if (sbi->ckpt->alloc_type[i] == SSR) + valid_sum_count += sbi->blocks_per_seg; + else + valid_sum_count += curseg_blkoff(sbi, i); + } + + total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1) + + sizeof(struct nat_journal) + 2 + + sizeof(struct sit_journal) + 2; + sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE; + if (total_size_bytes < sum_space) + return 1; + else if (total_size_bytes < 2 * sum_space) + return 2; + return 3; +} + +/* + * Caller should put this summary page + */ +struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) +{ + return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); +} + +static void write_sum_page(struct f2fs_sb_info *sbi, + struct f2fs_summary_block *sum_blk, block_t blk_addr) +{ + struct page *page = grab_meta_page(sbi, blk_addr); + void *kaddr = page_address(page); + memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE); + set_page_dirty(page); + f2fs_put_page(page, 1); +} + +static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno = curseg->segno + 1; + struct free_segmap_info *free_i = FREE_I(sbi); + + if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) + return !test_bit(segno, free_i->free_segmap); + return 0; +} + +/* + * Find a new segment from the free segments bitmap to right order + * This function should be returned with success, otherwise BUG + */ +static void get_new_segment(struct f2fs_sb_info *sbi, + unsigned int *newseg, bool new_sec, int dir) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int segno, secno, zoneno; + unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; + unsigned int hint = *newseg / sbi->segs_per_sec; + unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); + unsigned int left_start = hint; + bool init = true; + int go_left = 0; + int i; + + write_lock(&free_i->segmap_lock); + + if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { + segno = find_next_zero_bit(free_i->free_segmap, + TOTAL_SEGS(sbi), *newseg + 1); + if (segno - *newseg < sbi->segs_per_sec - + (*newseg % sbi->segs_per_sec)) + goto got_it; + } +find_other_zone: + secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); + if (secno >= TOTAL_SECS(sbi)) { + if (dir == ALLOC_RIGHT) { + secno = find_next_zero_bit(free_i->free_secmap, + TOTAL_SECS(sbi), 0); + BUG_ON(secno >= TOTAL_SECS(sbi)); + } else { + go_left = 1; + left_start = hint - 1; + } + } + if (go_left == 0) + goto skip_left; + + while (test_bit(left_start, free_i->free_secmap)) { + if (left_start > 0) { + left_start--; + continue; + } + left_start = find_next_zero_bit(free_i->free_secmap, + TOTAL_SECS(sbi), 0); + BUG_ON(left_start >= TOTAL_SECS(sbi)); + break; + } + secno = left_start; +skip_left: + hint = secno; + segno = secno * sbi->segs_per_sec; + zoneno = secno / sbi->secs_per_zone; + + /* give up on finding another zone */ + if (!init) + goto got_it; + if (sbi->secs_per_zone == 1) + goto got_it; + if (zoneno == old_zoneno) + goto got_it; + if (dir == ALLOC_LEFT) { + if (!go_left && zoneno + 1 >= total_zones) + goto got_it; + if (go_left && zoneno == 0) + goto got_it; + } + for (i = 0; i < NR_CURSEG_TYPE; i++) + if (CURSEG_I(sbi, i)->zone == zoneno) + break; + + if (i < NR_CURSEG_TYPE) { + /* zone is in user, try another */ + if (go_left) + hint = zoneno * sbi->secs_per_zone - 1; + else if (zoneno + 1 >= total_zones) + hint = 0; + else + hint = (zoneno + 1) * sbi->secs_per_zone; + init = false; + goto find_other_zone; + } +got_it: + /* set it as dirty segment in free segmap */ + BUG_ON(test_bit(segno, free_i->free_segmap)); + __set_inuse(sbi, segno); + *newseg = segno; + write_unlock(&free_i->segmap_lock); +} + +static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + struct summary_footer *sum_footer; + + curseg->segno = curseg->next_segno; + curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno); + curseg->next_blkoff = 0; + curseg->next_segno = NULL_SEGNO; + + sum_footer = &(curseg->sum_blk->footer); + memset(sum_footer, 0, sizeof(struct summary_footer)); + if (IS_DATASEG(type)) + SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA); + if (IS_NODESEG(type)) + SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); + __set_sit_entry_type(sbi, type, curseg->segno, modified); +} + +/* + * Allocate a current working segment. + * This function always allocates a free segment in LFS manner. + */ +static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno = curseg->segno; + int dir = ALLOC_LEFT; + + write_sum_page(sbi, curseg->sum_blk, + GET_SUM_BLOCK(sbi, segno)); + if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) + dir = ALLOC_RIGHT; + + if (test_opt(sbi, NOHEAP)) + dir = ALLOC_RIGHT; + + get_new_segment(sbi, &segno, new_sec, dir); + curseg->next_segno = segno; + reset_curseg(sbi, type, 1); + curseg->alloc_type = LFS; +} + +static void __next_free_blkoff(struct f2fs_sb_info *sbi, + struct curseg_info *seg, block_t start) +{ + struct seg_entry *se = get_seg_entry(sbi, seg->segno); + block_t ofs; + for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) { + if (!f2fs_test_bit(ofs, se->ckpt_valid_map) + && !f2fs_test_bit(ofs, se->cur_valid_map)) + break; + } + seg->next_blkoff = ofs; +} + +/* + * If a segment is written by LFS manner, next block offset is just obtained + * by increasing the current block offset. However, if a segment is written by + * SSR manner, next block offset obtained by calling __next_free_blkoff + */ +static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, + struct curseg_info *seg) +{ + if (seg->alloc_type == SSR) + __next_free_blkoff(sbi, seg, seg->next_blkoff + 1); + else + seg->next_blkoff++; +} + +/* + * This function always allocates a used segment (from dirty seglist) by SSR + * manner, so it should recover the existing segment information of valid blocks + */ +static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int new_segno = curseg->next_segno; + struct f2fs_summary_block *sum_node; + struct page *sum_page; + + write_sum_page(sbi, curseg->sum_blk, + GET_SUM_BLOCK(sbi, curseg->segno)); + __set_test_and_inuse(sbi, new_segno); + + mutex_lock(&dirty_i->seglist_lock); + __remove_dirty_segment(sbi, new_segno, PRE); + __remove_dirty_segment(sbi, new_segno, DIRTY); + mutex_unlock(&dirty_i->seglist_lock); + + reset_curseg(sbi, type, 1); + curseg->alloc_type = SSR; + __next_free_blkoff(sbi, curseg, 0); + + if (reuse) { + sum_page = get_sum_page(sbi, new_segno); + sum_node = (struct f2fs_summary_block *)page_address(sum_page); + memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); + f2fs_put_page(sum_page, 1); + } +} + +static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; + + if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0)) + return v_ops->get_victim(sbi, + &(curseg)->next_segno, BG_GC, type, SSR); + + /* For data segments, let's do SSR more intensively */ + for (; type >= CURSEG_HOT_DATA; type--) + if (v_ops->get_victim(sbi, &(curseg)->next_segno, + BG_GC, type, SSR)) + return 1; + return 0; +} + +/* + * flush out current segment and replace it with new segment + * This function should be returned with success, otherwise BUG + */ +static void allocate_segment_by_default(struct f2fs_sb_info *sbi, + int type, bool force) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + + if (force) + new_curseg(sbi, type, true); + else if (type == CURSEG_WARM_NODE) + new_curseg(sbi, type, false); + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + new_curseg(sbi, type, false); + else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) + change_curseg(sbi, type, true); + else + new_curseg(sbi, type, false); +#ifdef CONFIG_F2FS_STAT_FS + sbi->segment_count[curseg->alloc_type]++; +#endif +} + +void allocate_new_segments(struct f2fs_sb_info *sbi) +{ + struct curseg_info *curseg; + unsigned int old_curseg; + int i; + + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + curseg = CURSEG_I(sbi, i); + old_curseg = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); + locate_dirty_segment(sbi, old_curseg); + } +} + +static const struct segment_allocation default_salloc_ops = { + .allocate_segment = allocate_segment_by_default, +}; + +static void f2fs_end_io_write(struct bio *bio, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct bio_private *p = bio->bi_private; + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + if (!uptodate) { + SetPageError(page); + if (page->mapping) + set_bit(AS_EIO, &page->mapping->flags); + set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); + p->sbi->sb->s_flags |= MS_RDONLY; + } + end_page_writeback(page); + dec_page_count(p->sbi, F2FS_WRITEBACK); + } while (bvec >= bio->bi_io_vec); + + if (p->is_sync) + complete(p->wait); + kfree(p); + bio_put(bio); +} + +struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) +{ + struct bio *bio; + + /* No failure on bio allocation */ + bio = bio_alloc(GFP_NOIO, npages); + bio->bi_bdev = bdev; + bio->bi_private = NULL; + + return bio; +} + +static void do_submit_bio(struct f2fs_sb_info *sbi, + enum page_type type, bool sync) +{ + int rw = sync ? WRITE_SYNC : WRITE; + enum page_type btype = type > META ? META : type; + + if (type >= META_FLUSH) + rw = WRITE_FLUSH_FUA; + + if (btype == META) + rw |= REQ_META; + + if (sbi->bio[btype]) { + struct bio_private *p = sbi->bio[btype]->bi_private; + p->sbi = sbi; + sbi->bio[btype]->bi_end_io = f2fs_end_io_write; + + trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]); + + if (type == META_FLUSH) { + DECLARE_COMPLETION_ONSTACK(wait); + p->is_sync = true; + p->wait = &wait; + submit_bio(rw, sbi->bio[btype]); + wait_for_completion(&wait); + } else { + p->is_sync = false; + submit_bio(rw, sbi->bio[btype]); + } + sbi->bio[btype] = NULL; + } +} + +void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) +{ + down_write(&sbi->bio_sem); + do_submit_bio(sbi, type, sync); + up_write(&sbi->bio_sem); +} + +static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, + block_t blk_addr, enum page_type type) +{ + struct block_device *bdev = sbi->sb->s_bdev; + + verify_block_addr(sbi, blk_addr); + + down_write(&sbi->bio_sem); + + inc_page_count(sbi, F2FS_WRITEBACK); + + if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1) + do_submit_bio(sbi, type, false); +alloc_new: + if (sbi->bio[type] == NULL) { + struct bio_private *priv; +retry: + priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); + if (!priv) { + cond_resched(); + goto retry; + } + + sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); + sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + sbi->bio[type]->bi_private = priv; + /* + * The end_io will be assigned at the sumbission phase. + * Until then, let bio_add_page() merge consecutive IOs as much + * as possible. + */ + } + + if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) < + PAGE_CACHE_SIZE) { + do_submit_bio(sbi, type, false); + goto alloc_new; + } + + sbi->last_block_in_bio[type] = blk_addr; + + up_write(&sbi->bio_sem); + trace_f2fs_submit_write_page(page, blk_addr, type); +} + +void f2fs_wait_on_page_writeback(struct page *page, + enum page_type type, bool sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + if (PageWriteback(page)) { + f2fs_submit_bio(sbi, type, sync); + wait_on_page_writeback(page); + } +} + +static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + if (curseg->next_blkoff < sbi->blocks_per_seg) + return true; + return false; +} + +static int __get_segment_type_2(struct page *page, enum page_type p_type) +{ + if (p_type == DATA) + return CURSEG_HOT_DATA; + else + return CURSEG_HOT_NODE; +} + +static int __get_segment_type_4(struct page *page, enum page_type p_type) +{ + if (p_type == DATA) { + struct inode *inode = page->mapping->host; + + if (S_ISDIR(inode->i_mode)) + return CURSEG_HOT_DATA; + else + return CURSEG_COLD_DATA; + } else { + if (IS_DNODE(page) && !is_cold_node(page)) + return CURSEG_HOT_NODE; + else + return CURSEG_COLD_NODE; + } +} + +static int __get_segment_type_6(struct page *page, enum page_type p_type) +{ + if (p_type == DATA) { + struct inode *inode = page->mapping->host; + + if (S_ISDIR(inode->i_mode)) + return CURSEG_HOT_DATA; + else if (is_cold_data(page) || file_is_cold(inode)) + return CURSEG_COLD_DATA; + else + return CURSEG_WARM_DATA; + } else { + if (IS_DNODE(page)) + return is_cold_node(page) ? CURSEG_WARM_NODE : + CURSEG_HOT_NODE; + else + return CURSEG_COLD_NODE; + } +} + +static int __get_segment_type(struct page *page, enum page_type p_type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + switch (sbi->active_logs) { + case 2: + return __get_segment_type_2(page, p_type); + case 4: + return __get_segment_type_4(page, p_type); + } + /* NR_CURSEG_TYPE(6) logs by default */ + BUG_ON(sbi->active_logs != NR_CURSEG_TYPE); + return __get_segment_type_6(page, p_type); +} + +static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, + block_t old_blkaddr, block_t *new_blkaddr, + struct f2fs_summary *sum, enum page_type p_type) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg; + unsigned int old_cursegno; + int type; + + type = __get_segment_type(page, p_type); + curseg = CURSEG_I(sbi, type); + + mutex_lock(&curseg->curseg_mutex); + + *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + old_cursegno = curseg->segno; + + /* + * __add_sum_entry should be resided under the curseg_mutex + * because, this function updates a summary entry in the + * current summary block. + */ + __add_sum_entry(sbi, type, sum); + + mutex_lock(&sit_i->sentry_lock); + __refresh_next_blkoff(sbi, curseg); +#ifdef CONFIG_F2FS_STAT_FS + sbi->block_count[curseg->alloc_type]++; +#endif + + /* + * SIT information should be updated before segment allocation, + * since SSR needs latest valid block information. + */ + refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); + + if (!__has_curseg_space(sbi, type)) + sit_i->s_ops->allocate_segment(sbi, type, false); + + locate_dirty_segment(sbi, old_cursegno); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + mutex_unlock(&sit_i->sentry_lock); + + if (p_type == NODE) + fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + + /* writeout dirty page into bdev */ + submit_write_page(sbi, page, *new_blkaddr, p_type); + + mutex_unlock(&curseg->curseg_mutex); +} + +void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) +{ + set_page_writeback(page); + submit_write_page(sbi, page, page->index, META); +} + +void write_node_page(struct f2fs_sb_info *sbi, struct page *page, + unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) +{ + struct f2fs_summary sum; + set_summary(&sum, nid, 0, 0); + do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE); +} + +void write_data_page(struct inode *inode, struct page *page, + struct dnode_of_data *dn, block_t old_blkaddr, + block_t *new_blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_summary sum; + struct node_info ni; + + BUG_ON(old_blkaddr == NULL_ADDR); + get_node_info(sbi, dn->nid, &ni); + set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); + + do_write_page(sbi, page, old_blkaddr, + new_blkaddr, &sum, DATA); +} + +void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, + block_t old_blk_addr) +{ + submit_write_page(sbi, page, old_blk_addr, DATA); +} + +void recover_data_page(struct f2fs_sb_info *sbi, + struct page *page, struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg; + unsigned int segno, old_cursegno; + struct seg_entry *se; + int type; + + segno = GET_SEGNO(sbi, new_blkaddr); + se = get_seg_entry(sbi, segno); + type = se->type; + + if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { + if (old_blkaddr == NULL_ADDR) + type = CURSEG_COLD_DATA; + else + type = CURSEG_WARM_DATA; + } + curseg = CURSEG_I(sbi, type); + + mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + old_cursegno = curseg->segno; + + /* change the current segment */ + if (segno != curseg->segno) { + curseg->next_segno = segno; + change_curseg(sbi, type, true); + } + + curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & + (sbi->blocks_per_seg - 1); + __add_sum_entry(sbi, type, sum); + + refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); + + locate_dirty_segment(sbi, old_cursegno); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + + mutex_unlock(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); +} + +void rewrite_node_page(struct f2fs_sb_info *sbi, + struct page *page, struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr) +{ + struct sit_info *sit_i = SIT_I(sbi); + int type = CURSEG_WARM_NODE; + struct curseg_info *curseg; + unsigned int segno, old_cursegno; + block_t next_blkaddr = next_blkaddr_of_node(page); + unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); + + curseg = CURSEG_I(sbi, type); + + mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + segno = GET_SEGNO(sbi, new_blkaddr); + old_cursegno = curseg->segno; + + /* change the current segment */ + if (segno != curseg->segno) { + curseg->next_segno = segno; + change_curseg(sbi, type, true); + } + curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & + (sbi->blocks_per_seg - 1); + __add_sum_entry(sbi, type, sum); + + /* change the current log to the next block addr in advance */ + if (next_segno != segno) { + curseg->next_segno = next_segno; + change_curseg(sbi, type, true); + } + curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) & + (sbi->blocks_per_seg - 1); + + /* rewrite node page */ + set_page_writeback(page); + submit_write_page(sbi, page, new_blkaddr, NODE); + f2fs_submit_bio(sbi, NODE, true); + refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); + + locate_dirty_segment(sbi, old_cursegno); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + + mutex_unlock(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); +} + +static int read_compacted_summaries(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct curseg_info *seg_i; + unsigned char *kaddr; + struct page *page; + block_t start; + int i, j, offset; + + start = start_sum_block(sbi); + + page = get_meta_page(sbi, start++); + kaddr = (unsigned char *)page_address(page); + + /* Step 1: restore nat cache */ + seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); + memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE); + + /* Step 2: restore sit cache */ + seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); + memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE, + SUM_JOURNAL_SIZE); + offset = 2 * SUM_JOURNAL_SIZE; + + /* Step 3: restore summary entries */ + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + unsigned short blk_off; + unsigned int segno; + + seg_i = CURSEG_I(sbi, i); + segno = le32_to_cpu(ckpt->cur_data_segno[i]); + blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); + seg_i->next_segno = segno; + reset_curseg(sbi, i, 0); + seg_i->alloc_type = ckpt->alloc_type[i]; + seg_i->next_blkoff = blk_off; + + if (seg_i->alloc_type == SSR) + blk_off = sbi->blocks_per_seg; + + for (j = 0; j < blk_off; j++) { + struct f2fs_summary *s; + s = (struct f2fs_summary *)(kaddr + offset); + seg_i->sum_blk->entries[j] = *s; + offset += SUMMARY_SIZE; + if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE - + SUM_FOOTER_SIZE) + continue; + + f2fs_put_page(page, 1); + page = NULL; + + page = get_meta_page(sbi, start++); + kaddr = (unsigned char *)page_address(page); + offset = 0; + } + } + f2fs_put_page(page, 1); + return 0; +} + +static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_summary_block *sum; + struct curseg_info *curseg; + struct page *new; + unsigned short blk_off; + unsigned int segno = 0; + block_t blk_addr = 0; + + /* get segment number and block addr */ + if (IS_DATASEG(type)) { + segno = le32_to_cpu(ckpt->cur_data_segno[type]); + blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - + CURSEG_HOT_DATA]); + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); + else + blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); + } else { + segno = le32_to_cpu(ckpt->cur_node_segno[type - + CURSEG_HOT_NODE]); + blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - + CURSEG_HOT_NODE]); + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, + type - CURSEG_HOT_NODE); + else + blk_addr = GET_SUM_BLOCK(sbi, segno); + } + + new = get_meta_page(sbi, blk_addr); + sum = (struct f2fs_summary_block *)page_address(new); + + if (IS_NODESEG(type)) { + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { + struct f2fs_summary *ns = &sum->entries[0]; + int i; + for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { + ns->version = 0; + ns->ofs_in_node = 0; + } + } else { + if (restore_node_summary(sbi, segno, sum)) { + f2fs_put_page(new, 1); + return -EINVAL; + } + } + } + + /* set uncompleted segment to curseg */ + curseg = CURSEG_I(sbi, type); + mutex_lock(&curseg->curseg_mutex); + memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE); + curseg->next_segno = segno; + reset_curseg(sbi, type, 0); + curseg->alloc_type = ckpt->alloc_type[type]; + curseg->next_blkoff = blk_off; + mutex_unlock(&curseg->curseg_mutex); + f2fs_put_page(new, 1); + return 0; +} + +static int restore_curseg_summaries(struct f2fs_sb_info *sbi) +{ + int type = CURSEG_HOT_DATA; + + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { + /* restore for compacted data summary */ + if (read_compacted_summaries(sbi)) + return -EINVAL; + type = CURSEG_HOT_NODE; + } + + for (; type <= CURSEG_COLD_NODE; type++) + if (read_normal_summaries(sbi, type)) + return -EINVAL; + return 0; +} + +static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + struct page *page; + unsigned char *kaddr; + struct f2fs_summary *summary; + struct curseg_info *seg_i; + int written_size = 0; + int i, j; + + page = grab_meta_page(sbi, blkaddr++); + kaddr = (unsigned char *)page_address(page); + + /* Step 1: write nat cache */ + seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); + memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE); + written_size += SUM_JOURNAL_SIZE; + + /* Step 2: write sit cache */ + seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); + memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits, + SUM_JOURNAL_SIZE); + written_size += SUM_JOURNAL_SIZE; + + set_page_dirty(page); + + /* Step 3: write summary entries */ + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + unsigned short blkoff; + seg_i = CURSEG_I(sbi, i); + if (sbi->ckpt->alloc_type[i] == SSR) + blkoff = sbi->blocks_per_seg; + else + blkoff = curseg_blkoff(sbi, i); + + for (j = 0; j < blkoff; j++) { + if (!page) { + page = grab_meta_page(sbi, blkaddr++); + kaddr = (unsigned char *)page_address(page); + written_size = 0; + } + summary = (struct f2fs_summary *)(kaddr + written_size); + *summary = seg_i->sum_blk->entries[j]; + written_size += SUMMARY_SIZE; + set_page_dirty(page); + + if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE - + SUM_FOOTER_SIZE) + continue; + + f2fs_put_page(page, 1); + page = NULL; + } + } + if (page) + f2fs_put_page(page, 1); +} + +static void write_normal_summaries(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + int i, end; + if (IS_DATASEG(type)) + end = type + NR_CURSEG_DATA_TYPE; + else + end = type + NR_CURSEG_NODE_TYPE; + + for (i = type; i < end; i++) { + struct curseg_info *sum = CURSEG_I(sbi, i); + mutex_lock(&sum->curseg_mutex); + write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type)); + mutex_unlock(&sum->curseg_mutex); + } +} + +void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +{ + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) + write_compacted_summaries(sbi, start_blk); + else + write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); +} + +void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +{ + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) + write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); +} + +int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, + unsigned int val, int alloc) +{ + int i; + + if (type == NAT_JOURNAL) { + for (i = 0; i < nats_in_cursum(sum); i++) { + if (le32_to_cpu(nid_in_journal(sum, i)) == val) + return i; + } + if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) + return update_nats_in_cursum(sum, 1); + } else if (type == SIT_JOURNAL) { + for (i = 0; i < sits_in_cursum(sum); i++) + if (le32_to_cpu(segno_in_journal(sum, i)) == val) + return i; + if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES) + return update_sits_in_cursum(sum, 1); + } + return -1; +} + +static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); + block_t blk_addr = sit_i->sit_base_addr + offset; + + check_seg_range(sbi, segno); + + /* calculate sit block address */ + if (f2fs_test_bit(offset, sit_i->sit_bitmap)) + blk_addr += sit_i->sit_blocks; + + return get_meta_page(sbi, blk_addr); +} + +static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, + unsigned int start) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct page *src_page, *dst_page; + pgoff_t src_off, dst_off; + void *src_addr, *dst_addr; + + src_off = current_sit_addr(sbi, start); + dst_off = next_sit_addr(sbi, src_off); + + /* get current sit block page without lock */ + src_page = get_meta_page(sbi, src_off); + dst_page = grab_meta_page(sbi, dst_off); + BUG_ON(PageDirty(src_page)); + + src_addr = page_address(src_page); + dst_addr = page_address(dst_page); + memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); + + set_page_dirty(dst_page); + f2fs_put_page(src_page, 1); + + set_to_next_sit(sit_i, start); + + return dst_page; +} + +static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) +{ + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + int i; + + /* + * If the journal area in the current summary is full of sit entries, + * all the sit entries will be flushed. Otherwise the sit entries + * are not able to replace with newly hot sit entries. + */ + if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { + for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { + unsigned int segno; + segno = le32_to_cpu(segno_in_journal(sum, i)); + __mark_sit_entry_dirty(sbi, segno); + } + update_sits_in_cursum(sum, -sits_in_cursum(sum)); + return 1; + } + return 0; +} + +/* + * CP calls this function, which flushes SIT entries including sit_journal, + * and moves prefree segs to free segs. + */ +void flush_sit_entries(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned long *bitmap = sit_i->dirty_sentries_bitmap; + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + unsigned long nsegs = TOTAL_SEGS(sbi); + struct page *page = NULL; + struct f2fs_sit_block *raw_sit = NULL; + unsigned int start = 0, end = 0; + unsigned int segno = -1; + bool flushed; + + mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + /* + * "flushed" indicates whether sit entries in journal are flushed + * to the SIT area or not. + */ + flushed = flush_sits_in_journal(sbi); + + while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { + struct seg_entry *se = get_seg_entry(sbi, segno); + int sit_offset, offset; + + sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); + + if (flushed) + goto to_sit_page; + + offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); + if (offset >= 0) { + segno_in_journal(sum, offset) = cpu_to_le32(segno); + seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); + goto flush_done; + } +to_sit_page: + if (!page || (start > segno) || (segno > end)) { + if (page) { + f2fs_put_page(page, 1); + page = NULL; + } + + start = START_SEGNO(sit_i, segno); + end = start + SIT_ENTRY_PER_BLOCK - 1; + + /* read sit block that will be updated */ + page = get_next_sit_page(sbi, start); + raw_sit = page_address(page); + } + + /* udpate entry in SIT block */ + seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); +flush_done: + __clear_bit(segno, bitmap); + sit_i->dirty_sentries--; + } + mutex_unlock(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); + + /* writeout last modified SIT block */ + f2fs_put_page(page, 1); + + set_prefree_as_free_segments(sbi); +} + +static int build_sit_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct sit_info *sit_i; + unsigned int sit_segs, start; + char *src_bitmap, *dst_bitmap; + unsigned int bitmap_size; + + /* allocate memory for SIT information */ + sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL); + if (!sit_i) + return -ENOMEM; + + SM_I(sbi)->sit_info = sit_i; + + sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); + if (!sit_i->sentries) + return -ENOMEM; + + bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!sit_i->dirty_sentries_bitmap) + return -ENOMEM; + + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + sit_i->sentries[start].cur_valid_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + sit_i->sentries[start].ckpt_valid_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->sentries[start].cur_valid_map + || !sit_i->sentries[start].ckpt_valid_map) + return -ENOMEM; + } + + if (sbi->segs_per_sec > 1) { + sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * + sizeof(struct sec_entry)); + if (!sit_i->sec_entries) + return -ENOMEM; + } + + /* get information related with SIT */ + sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; + + /* setup SIT bitmap from ckeckpoint pack */ + bitmap_size = __bitmap_size(sbi, SIT_BITMAP); + src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); + + dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); + if (!dst_bitmap) + return -ENOMEM; + + /* init SIT information */ + sit_i->s_ops = &default_salloc_ops; + + sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); + sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; + sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count); + sit_i->sit_bitmap = dst_bitmap; + sit_i->bitmap_size = bitmap_size; + sit_i->dirty_sentries = 0; + sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; + sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); + sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; + mutex_init(&sit_i->sentry_lock); + return 0; +} + +static int build_free_segmap(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + struct free_segmap_info *free_i; + unsigned int bitmap_size, sec_bitmap_size; + + /* allocate memory for free segmap information */ + free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL); + if (!free_i) + return -ENOMEM; + + SM_I(sbi)->free_info = free_i; + + bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); + if (!free_i->free_segmap) + return -ENOMEM; + + sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); + free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); + if (!free_i->free_secmap) + return -ENOMEM; + + /* set all segments as dirty temporarily */ + memset(free_i->free_segmap, 0xff, bitmap_size); + memset(free_i->free_secmap, 0xff, sec_bitmap_size); + + /* init free segmap information */ + free_i->start_segno = + (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr); + free_i->free_segments = 0; + free_i->free_sections = 0; + rwlock_init(&free_i->segmap_lock); + return 0; +} + +static int build_curseg(struct f2fs_sb_info *sbi) +{ + struct curseg_info *array; + int i; + + array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); + if (!array) + return -ENOMEM; + + SM_I(sbi)->curseg_array = array; + + for (i = 0; i < NR_CURSEG_TYPE; i++) { + mutex_init(&array[i].curseg_mutex); + array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL); + if (!array[i].sum_blk) + return -ENOMEM; + array[i].segno = NULL_SEGNO; + array[i].next_blkoff = 0; + } + return restore_curseg_summaries(sbi); +} + +static void build_sit_entries(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + unsigned int start; + + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + struct seg_entry *se = &sit_i->sentries[start]; + struct f2fs_sit_block *sit_blk; + struct f2fs_sit_entry sit; + struct page *page; + int i; + + mutex_lock(&curseg->curseg_mutex); + for (i = 0; i < sits_in_cursum(sum); i++) { + if (le32_to_cpu(segno_in_journal(sum, i)) == start) { + sit = sit_in_journal(sum, i); + mutex_unlock(&curseg->curseg_mutex); + goto got_it; + } + } + mutex_unlock(&curseg->curseg_mutex); + page = get_current_sit_page(sbi, start); + sit_blk = (struct f2fs_sit_block *)page_address(page); + sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; + f2fs_put_page(page, 1); +got_it: + check_block_count(sbi, start, &sit); + seg_info_from_raw_sit(se, &sit); + if (sbi->segs_per_sec > 1) { + struct sec_entry *e = get_sec_entry(sbi, start); + e->valid_blocks += se->valid_blocks; + } + } +} + +static void init_free_segmap(struct f2fs_sb_info *sbi) +{ + unsigned int start; + int type; + + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + struct seg_entry *sentry = get_seg_entry(sbi, start); + if (!sentry->valid_blocks) + __set_free(sbi, start); + } + + /* set use the current segments */ + for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { + struct curseg_info *curseg_t = CURSEG_I(sbi, type); + __set_test_and_inuse(sbi, curseg_t->segno); + } +} + +static void init_dirty_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); + unsigned short valid_blocks; + + while (1) { + /* find dirty segment based on free segmap */ + segno = find_next_inuse(free_i, total_segs, offset); + if (segno >= total_segs) + break; + offset = segno + 1; + valid_blocks = get_valid_blocks(sbi, segno, 0); + if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) + continue; + mutex_lock(&dirty_i->seglist_lock); + __locate_dirty_segment(sbi, segno, DIRTY); + mutex_unlock(&dirty_i->seglist_lock); + } +} + +static int init_victim_secmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); + + dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dirty_i->victim_secmap) + return -ENOMEM; + return 0; +} + +static int build_dirty_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i; + unsigned int bitmap_size, i; + + /* allocate memory for dirty segments list information */ + dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL); + if (!dirty_i) + return -ENOMEM; + + SM_I(sbi)->dirty_info = dirty_i; + mutex_init(&dirty_i->seglist_lock); + + bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + + for (i = 0; i < NR_DIRTY_TYPE; i++) { + dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); + if (!dirty_i->dirty_segmap[i]) + return -ENOMEM; + } + + init_dirty_segmap(sbi); + return init_victim_secmap(sbi); +} + +/* + * Update min, max modified time for cost-benefit GC algorithm + */ +static void init_min_max_mtime(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int segno; + + mutex_lock(&sit_i->sentry_lock); + + sit_i->min_mtime = LLONG_MAX; + + for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { + unsigned int i; + unsigned long long mtime = 0; + + for (i = 0; i < sbi->segs_per_sec; i++) + mtime += get_seg_entry(sbi, segno + i)->mtime; + + mtime = div_u64(mtime, sbi->segs_per_sec); + + if (sit_i->min_mtime > mtime) + sit_i->min_mtime = mtime; + } + sit_i->max_mtime = get_mtime(sbi); + mutex_unlock(&sit_i->sentry_lock); +} + +int build_segment_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_sm_info *sm_info; + int err; + + sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL); + if (!sm_info) + return -ENOMEM; + + /* init sm info */ + sbi->sm_info = sm_info; + INIT_LIST_HEAD(&sm_info->wblist_head); + spin_lock_init(&sm_info->wblist_lock); + sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); + sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); + sm_info->segment_count = le32_to_cpu(raw_super->segment_count); + sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); + sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); + sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); + sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); + + err = build_sit_info(sbi); + if (err) + return err; + err = build_free_segmap(sbi); + if (err) + return err; + err = build_curseg(sbi); + if (err) + return err; + + /* reinit free segmap based on SIT */ + build_sit_entries(sbi); + + init_free_segmap(sbi); + err = build_dirty_segmap(sbi); + if (err) + return err; + + init_min_max_mtime(sbi); + return 0; +} + +static void discard_dirty_segmap(struct f2fs_sb_info *sbi, + enum dirty_type dirty_type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + mutex_lock(&dirty_i->seglist_lock); + kfree(dirty_i->dirty_segmap[dirty_type]); + dirty_i->nr_dirty[dirty_type] = 0; + mutex_unlock(&dirty_i->seglist_lock); +} + +static void destroy_victim_secmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + kfree(dirty_i->victim_secmap); +} + +static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + int i; + + if (!dirty_i) + return; + + /* discard pre-free/dirty segments list */ + for (i = 0; i < NR_DIRTY_TYPE; i++) + discard_dirty_segmap(sbi, i); + + destroy_victim_secmap(sbi); + SM_I(sbi)->dirty_info = NULL; + kfree(dirty_i); +} + +static void destroy_curseg(struct f2fs_sb_info *sbi) +{ + struct curseg_info *array = SM_I(sbi)->curseg_array; + int i; + + if (!array) + return; + SM_I(sbi)->curseg_array = NULL; + for (i = 0; i < NR_CURSEG_TYPE; i++) + kfree(array[i].sum_blk); + kfree(array); +} + +static void destroy_free_segmap(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = SM_I(sbi)->free_info; + if (!free_i) + return; + SM_I(sbi)->free_info = NULL; + kfree(free_i->free_segmap); + kfree(free_i->free_secmap); + kfree(free_i); +} + +static void destroy_sit_info(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int start; + + if (!sit_i) + return; + + if (sit_i->sentries) { + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + kfree(sit_i->sentries[start].cur_valid_map); + kfree(sit_i->sentries[start].ckpt_valid_map); + } + } + vfree(sit_i->sentries); + vfree(sit_i->sec_entries); + kfree(sit_i->dirty_sentries_bitmap); + + SM_I(sbi)->sit_info = NULL; + kfree(sit_i->sit_bitmap); + kfree(sit_i); +} + +void destroy_segment_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + destroy_dirty_segmap(sbi); + destroy_curseg(sbi); + destroy_free_segmap(sbi); + destroy_sit_info(sbi); + sbi->sm_info = NULL; + kfree(sm_info); +} diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h new file mode 100644 index 00000000000..69a5c79f67b --- /dev/null +++ b/fs/f2fs/segment.h @@ -0,0 +1,639 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/segment.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include + +/* constant macro */ +#define NULL_SEGNO ((unsigned int)(~0)) +#define NULL_SECNO ((unsigned int)(~0)) + +/* L: Logical segment # in volume, R: Relative segment # in main area */ +#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) +#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) + +#define IS_DATASEG(t) \ + ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ + (t == CURSEG_WARM_DATA)) + +#define IS_NODESEG(t) \ + ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ + (t == CURSEG_WARM_NODE)) + +#define IS_CURSEG(sbi, seg) \ + ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) + +#define IS_CURSEC(sbi, secno) \ + ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ + sbi->segs_per_sec)) \ + +#define START_BLOCK(sbi, segno) \ + (SM_I(sbi)->seg0_blkaddr + \ + (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) +#define NEXT_FREE_BLKADDR(sbi, curseg) \ + (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) + +#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) + +#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ + ((blk_addr) - SM_I(sbi)->seg0_blkaddr) +#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) +#define GET_SEGNO(sbi, blk_addr) \ + (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ + NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ + GET_SEGNO_FROM_SEG0(sbi, blk_addr))) +#define GET_SECNO(sbi, segno) \ + ((segno) / sbi->segs_per_sec) +#define GET_ZONENO_FROM_SEGNO(sbi, segno) \ + ((segno / sbi->segs_per_sec) / sbi->secs_per_zone) + +#define GET_SUM_BLOCK(sbi, segno) \ + ((sbi->sm_info->ssa_blkaddr) + segno) + +#define GET_SUM_TYPE(footer) ((footer)->entry_type) +#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type) + +#define SIT_ENTRY_OFFSET(sit_i, segno) \ + (segno % sit_i->sents_per_block) +#define SIT_BLOCK_OFFSET(sit_i, segno) \ + (segno / SIT_ENTRY_PER_BLOCK) +#define START_SEGNO(sit_i, segno) \ + (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) +#define f2fs_bitmap_size(nr) \ + (BITS_TO_LONGS(nr) * sizeof(unsigned long)) +#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) +#define TOTAL_SECS(sbi) (sbi->total_sections) + +#define SECTOR_FROM_BLOCK(sbi, blk_addr) \ + (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) +#define SECTOR_TO_BLOCK(sbi, sectors) \ + (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) + +/* during checkpoint, bio_private is used to synchronize the last bio */ +struct bio_private { + struct f2fs_sb_info *sbi; + bool is_sync; + void *wait; +}; + +/* + * indicate a block allocation direction: RIGHT and LEFT. + * RIGHT means allocating new sections towards the end of volume. + * LEFT means the opposite direction. + */ +enum { + ALLOC_RIGHT = 0, + ALLOC_LEFT +}; + +/* + * In the victim_sel_policy->alloc_mode, there are two block allocation modes. + * LFS writes data sequentially with cleaning operations. + * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations. + */ +enum { + LFS = 0, + SSR +}; + +/* + * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes. + * GC_CB is based on cost-benefit algorithm. + * GC_GREEDY is based on greedy algorithm. + */ +enum { + GC_CB = 0, + GC_GREEDY +}; + +/* + * BG_GC means the background cleaning job. + * FG_GC means the on-demand cleaning job. + */ +enum { + BG_GC = 0, + FG_GC +}; + +/* for a function parameter to select a victim segment */ +struct victim_sel_policy { + int alloc_mode; /* LFS or SSR */ + int gc_mode; /* GC_CB or GC_GREEDY */ + unsigned long *dirty_segmap; /* dirty segment bitmap */ + unsigned int offset; /* last scanned bitmap offset */ + unsigned int ofs_unit; /* bitmap search unit */ + unsigned int min_cost; /* minimum cost */ + unsigned int min_segno; /* segment # having min. cost */ +}; + +struct seg_entry { + unsigned short valid_blocks; /* # of valid blocks */ + unsigned char *cur_valid_map; /* validity bitmap of blocks */ + /* + * # of valid blocks and the validity bitmap stored in the the last + * checkpoint pack. This information is used by the SSR mode. + */ + unsigned short ckpt_valid_blocks; + unsigned char *ckpt_valid_map; + unsigned char type; /* segment type like CURSEG_XXX_TYPE */ + unsigned long long mtime; /* modification time of the segment */ +}; + +struct sec_entry { + unsigned int valid_blocks; /* # of valid blocks in a section */ +}; + +struct segment_allocation { + void (*allocate_segment)(struct f2fs_sb_info *, int, bool); +}; + +struct sit_info { + const struct segment_allocation *s_ops; + + block_t sit_base_addr; /* start block address of SIT area */ + block_t sit_blocks; /* # of blocks used by SIT area */ + block_t written_valid_blocks; /* # of valid blocks in main area */ + char *sit_bitmap; /* SIT bitmap pointer */ + unsigned int bitmap_size; /* SIT bitmap size */ + + unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ + unsigned int dirty_sentries; /* # of dirty sentries */ + unsigned int sents_per_block; /* # of SIT entries per block */ + struct mutex sentry_lock; /* to protect SIT cache */ + struct seg_entry *sentries; /* SIT segment-level cache */ + struct sec_entry *sec_entries; /* SIT section-level cache */ + + /* for cost-benefit algorithm in cleaning procedure */ + unsigned long long elapsed_time; /* elapsed time after mount */ + unsigned long long mounted_time; /* mount time */ + unsigned long long min_mtime; /* min. modification time */ + unsigned long long max_mtime; /* max. modification time */ +}; + +struct free_segmap_info { + unsigned int start_segno; /* start segment number logically */ + unsigned int free_segments; /* # of free segments */ + unsigned int free_sections; /* # of free sections */ + rwlock_t segmap_lock; /* free segmap lock */ + unsigned long *free_segmap; /* free segment bitmap */ + unsigned long *free_secmap; /* free section bitmap */ +}; + +/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */ +enum dirty_type { + DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */ + DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */ + DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */ + DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */ + DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */ + DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */ + DIRTY, /* to count # of dirty segments */ + PRE, /* to count # of entirely obsolete segments */ + NR_DIRTY_TYPE +}; + +struct dirty_seglist_info { + const struct victim_selection *v_ops; /* victim selction operation */ + unsigned long *dirty_segmap[NR_DIRTY_TYPE]; + struct mutex seglist_lock; /* lock for segment bitmaps */ + int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ + unsigned long *victim_secmap; /* background GC victims */ +}; + +/* victim selection function for cleaning and SSR */ +struct victim_selection { + int (*get_victim)(struct f2fs_sb_info *, unsigned int *, + int, int, char); +}; + +/* for active log information */ +struct curseg_info { + struct mutex curseg_mutex; /* lock for consistency */ + struct f2fs_summary_block *sum_blk; /* cached summary block */ + unsigned char alloc_type; /* current allocation type */ + unsigned int segno; /* current segment number */ + unsigned short next_blkoff; /* next block offset to write */ + unsigned int zone; /* current zone number */ + unsigned int next_segno; /* preallocated segment */ +}; + +/* + * inline functions + */ +static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) +{ + return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); +} + +static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + return &sit_i->sentries[segno]; +} + +static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + return &sit_i->sec_entries[GET_SECNO(sbi, segno)]; +} + +static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno, int section) +{ + /* + * In order to get # of valid blocks in a section instantly from many + * segments, f2fs manages two counting structures separately. + */ + if (section > 1) + return get_sec_entry(sbi, segno)->valid_blocks; + else + return get_seg_entry(sbi, segno)->valid_blocks; +} + +static inline void seg_info_from_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + se->valid_blocks = GET_SIT_VBLOCKS(rs); + se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs); + memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + se->type = GET_SIT_TYPE(rs); + se->mtime = le64_to_cpu(rs->mtime); +} + +static inline void seg_info_to_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) | + se->valid_blocks; + rs->vblocks = cpu_to_le16(raw_vblocks); + memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + se->ckpt_valid_blocks = se->valid_blocks; + rs->mtime = cpu_to_le64(se->mtime); +} + +static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, + unsigned int max, unsigned int segno) +{ + unsigned int ret; + read_lock(&free_i->segmap_lock); + ret = find_next_bit(free_i->free_segmap, max, segno); + read_unlock(&free_i->segmap_lock); + return ret; +} + +static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int next; + + write_lock(&free_i->segmap_lock); + clear_bit(segno, free_i->free_segmap); + free_i->free_segments++; + + next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); + if (next >= start_segno + sbi->segs_per_sec) { + clear_bit(secno, free_i->free_secmap); + free_i->free_sections++; + } + write_unlock(&free_i->segmap_lock); +} + +static inline void __set_inuse(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + set_bit(segno, free_i->free_segmap); + free_i->free_segments--; + if (!test_and_set_bit(secno, free_i->free_secmap)) + free_i->free_sections--; +} + +static inline void __set_test_and_free(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int next; + + write_lock(&free_i->segmap_lock); + if (test_and_clear_bit(segno, free_i->free_segmap)) { + free_i->free_segments++; + + next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), + start_segno); + if (next >= start_segno + sbi->segs_per_sec) { + if (test_and_clear_bit(secno, free_i->free_secmap)) + free_i->free_sections++; + } + } + write_unlock(&free_i->segmap_lock); +} + +static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + write_lock(&free_i->segmap_lock); + if (!test_and_set_bit(segno, free_i->free_segmap)) { + free_i->free_segments--; + if (!test_and_set_bit(secno, free_i->free_secmap)) + free_i->free_sections--; + } + write_unlock(&free_i->segmap_lock); +} + +static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, + void *dst_addr) +{ + struct sit_info *sit_i = SIT_I(sbi); + memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size); +} + +static inline block_t written_block_count(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + block_t vblocks; + + mutex_lock(&sit_i->sentry_lock); + vblocks = sit_i->written_valid_blocks; + mutex_unlock(&sit_i->sentry_lock); + + return vblocks; +} + +static inline unsigned int free_segments(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int free_segs; + + read_lock(&free_i->segmap_lock); + free_segs = free_i->free_segments; + read_unlock(&free_i->segmap_lock); + + return free_segs; +} + +static inline int reserved_segments(struct f2fs_sb_info *sbi) +{ + return SM_I(sbi)->reserved_segments; +} + +static inline unsigned int free_sections(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int free_secs; + + read_lock(&free_i->segmap_lock); + free_secs = free_i->free_sections; + read_unlock(&free_i->segmap_lock); + + return free_secs; +} + +static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) +{ + return DIRTY_I(sbi)->nr_dirty[PRE]; +} + +static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi) +{ + return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] + + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] + + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE]; +} + +static inline int overprovision_segments(struct f2fs_sb_info *sbi) +{ + return SM_I(sbi)->ovp_segments; +} + +static inline int overprovision_sections(struct f2fs_sb_info *sbi) +{ + return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec; +} + +static inline int reserved_sections(struct f2fs_sb_info *sbi) +{ + return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec; +} + +static inline bool need_SSR(struct f2fs_sb_info *sbi) +{ + return (free_sections(sbi) < overprovision_sections(sbi)); +} + +static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) +{ + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + + if (sbi->por_doing) + return false; + + return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + + reserved_sections(sbi))); +} + +static inline int utilization(struct f2fs_sb_info *sbi) +{ + return div_u64(valid_user_blocks(sbi) * 100, sbi->user_block_count); +} + +/* + * Sometimes f2fs may be better to drop out-of-place update policy. + * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write + * data in the original place likewise other traditional file systems. + * But, currently set 100 in percentage, which means it is disabled. + * See below need_inplace_update(). + */ +#define MIN_IPU_UTIL 100 +static inline bool need_inplace_update(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (S_ISDIR(inode->i_mode)) + return false; + if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) + return true; + return false; +} + +static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, + int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->segno; +} + +static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi, + int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->alloc_type; +} + +static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->next_blkoff; +} + +static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) +{ + unsigned int end_segno = SM_I(sbi)->segment_count - 1; + BUG_ON(segno > end_segno); +} + +/* + * This function is used for only debugging. + * NOTE: In future, we have to remove this function. + */ +static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; + block_t start_addr = sm_info->seg0_blkaddr; + block_t end_addr = start_addr + total_blks - 1; + BUG_ON(blk_addr < start_addr); + BUG_ON(blk_addr > end_addr); +} + +/* + * Summary block is always treated as invalid block + */ +static inline void check_block_count(struct f2fs_sb_info *sbi, + int segno, struct f2fs_sit_entry *raw_sit) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + unsigned int end_segno = sm_info->segment_count - 1; + int valid_blocks = 0; + int i; + + /* check segment usage */ + BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); + + /* check boundary of a given segment number */ + BUG_ON(segno > end_segno); + + /* check bitmap with valid block count */ + for (i = 0; i < sbi->blocks_per_seg; i++) + if (f2fs_test_bit(i, raw_sit->valid_map)) + valid_blocks++; + BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); +} + +static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, + unsigned int start) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); + block_t blk_addr = sit_i->sit_base_addr + offset; + + check_seg_range(sbi, start); + + /* calculate sit block address */ + if (f2fs_test_bit(offset, sit_i->sit_bitmap)) + blk_addr += sit_i->sit_blocks; + + return blk_addr; +} + +static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi, + pgoff_t block_addr) +{ + struct sit_info *sit_i = SIT_I(sbi); + block_addr -= sit_i->sit_base_addr; + if (block_addr < sit_i->sit_blocks) + block_addr += sit_i->sit_blocks; + else + block_addr -= sit_i->sit_blocks; + + return block_addr + sit_i->sit_base_addr; +} + +static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) +{ + unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); + + if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) + f2fs_clear_bit(block_off, sit_i->sit_bitmap); + else + f2fs_set_bit(block_off, sit_i->sit_bitmap); +} + +static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - + sit_i->mounted_time; +} + +static inline void set_summary(struct f2fs_summary *sum, nid_t nid, + unsigned int ofs_in_node, unsigned char version) +{ + sum->nid = cpu_to_le32(nid); + sum->ofs_in_node = cpu_to_le16(ofs_in_node); + sum->version = version; +} + +static inline block_t start_sum_block(struct f2fs_sb_info *sbi) +{ + return __start_cp_addr(sbi) + + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); +} + +static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) +{ + return __start_cp_addr(sbi) + + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count) + - (base + 1) + type; +} + +static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) +{ + if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) + return true; + return false; +} + +static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct request_queue *q = bdev_get_queue(bdev); + return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); +} diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c new file mode 100644 index 00000000000..7153f8445cf --- /dev/null +++ b/fs/f2fs/super.c @@ -0,0 +1,1156 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/super.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "xattr.h" +#include "gc.h" + +#define CREATE_TRACE_POINTS +#include + +static struct proc_dir_entry *f2fs_proc_root; +static struct kmem_cache *f2fs_inode_cachep; +static struct kset *f2fs_kset; + +enum { + Opt_gc_background, + Opt_disable_roll_forward, + Opt_discard, + Opt_noheap, + Opt_nouser_xattr, + Opt_noacl, + Opt_active_logs, + Opt_disable_ext_identify, + Opt_inline_xattr, + Opt_android_emu, + Opt_err_continue, + Opt_err_panic, + Opt_err_recover, + Opt_err, +}; + +static match_table_t f2fs_tokens = { + {Opt_gc_background, "background_gc=%s"}, + {Opt_disable_roll_forward, "disable_roll_forward"}, + {Opt_discard, "discard"}, + {Opt_noheap, "no_heap"}, + {Opt_nouser_xattr, "nouser_xattr"}, + {Opt_noacl, "noacl"}, + {Opt_active_logs, "active_logs=%u"}, + {Opt_disable_ext_identify, "disable_ext_identify"}, + {Opt_inline_xattr, "inline_xattr"}, + {Opt_android_emu, "android_emu=%s"}, + {Opt_err_continue, "errors=continue"}, + {Opt_err_panic, "errors=panic"}, + {Opt_err_recover, "errors=recover"}, + {Opt_err, NULL}, +}; + +/* Sysfs support for f2fs */ +struct f2fs_attr { + struct attribute attr; + ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); + ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, + const char *, size_t); + int offset; +}; + +static ssize_t f2fs_sbi_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned int *ui; + + if (!gc_kth) + return -EINVAL; + + ui = (unsigned int *)(((char *)gc_kth) + a->offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + +static ssize_t f2fs_sbi_store(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, + const char *buf, size_t count) +{ + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned long t; + unsigned int *ui; + ssize_t ret; + + if (!gc_kth) + return -EINVAL; + + ui = (unsigned int *)(((char *)gc_kth) + a->offset); + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret < 0) + return ret; + *ui = t; + return count; +} + +static ssize_t f2fs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->show ? a->show(a, sbi, buf) : 0; +} + +static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->store ? a->store(a, sbi, buf, len) : 0; +} + +static void f2fs_sb_release(struct kobject *kobj) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .offset = offsetof(struct f2fs_gc_kthread, _elname), \ +} + +#define F2FS_RW_ATTR(name, elname) \ + F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname) + +F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); +F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); +F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); +F2FS_RW_ATTR(gc_idle, gc_idle); + +#define ATTR_LIST(name) (&f2fs_attr_##name.attr) +static struct attribute *f2fs_attrs[] = { + ATTR_LIST(gc_min_sleep_time), + ATTR_LIST(gc_max_sleep_time), + ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_idle), + NULL, +}; + +static const struct sysfs_ops f2fs_attr_ops = { + .show = f2fs_attr_show, + .store = f2fs_attr_store, +}; + +static struct kobj_type f2fs_ktype = { + .default_attrs = f2fs_attrs, + .sysfs_ops = &f2fs_attr_ops, + .release = f2fs_sb_release, +}; + +void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + va_end(args); +} + +static void init_once(void *foo) +{ + struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; + + inode_init_once(&fi->vfs_inode); +} + +static int parse_android_emu(struct f2fs_sb_info *sbi, char *args) +{ + char *sep = args; + char *sepres; + int ret; + + if (!sep) + return -EINVAL; + + sepres = strsep(&sep, ":"); + if (!sep) + return -EINVAL; + ret = kstrtou32(sepres, 0, &sbi->android_emu_uid); + if (ret) + return ret; + + sepres = strsep(&sep, ":"); + if (!sep) + return -EINVAL; + ret = kstrtou32(sepres, 0, &sbi->android_emu_gid); + if (ret) + return ret; + + sepres = strsep(&sep, ":"); + ret = kstrtou16(sepres, 8, &sbi->android_emu_mode); + if (ret) + return ret; + + if (sep && strstr(sep, "nocase")) + sbi->android_emu_flags = F2FS_ANDROID_EMU_NOCASE; + + return 0; +} + +static int parse_options(struct super_block *sb, char *options) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + substring_t args[MAX_OPT_ARGS]; + char *p, *name; + int arg = 0; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = NULL; + token = match_token(p, f2fs_tokens, args); + + switch (token) { + case Opt_gc_background: + name = match_strdup(&args[0]); + + if (!name) + return -ENOMEM; + if (!strncmp(name, "on", 2)) + set_opt(sbi, BG_GC); + else if (!strncmp(name, "off", 3)) + clear_opt(sbi, BG_GC); + else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_disable_roll_forward: + set_opt(sbi, DISABLE_ROLL_FORWARD); + break; + case Opt_discard: + set_opt(sbi, DISCARD); + break; + case Opt_noheap: + set_opt(sbi, NOHEAP); + break; +#ifdef CONFIG_F2FS_FS_XATTR + case Opt_nouser_xattr: + clear_opt(sbi, XATTR_USER); + break; + case Opt_inline_xattr: + set_opt(sbi, INLINE_XATTR); + break; +#else + case Opt_nouser_xattr: + f2fs_msg(sb, KERN_INFO, + "nouser_xattr options not supported"); + break; + case Opt_inline_xattr: + f2fs_msg(sb, KERN_INFO, + "inline_xattr options not supported"); + break; +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + case Opt_noacl: + clear_opt(sbi, POSIX_ACL); + break; +#else + case Opt_noacl: + f2fs_msg(sb, KERN_INFO, "noacl options not supported"); + break; +#endif + case Opt_active_logs: + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) + return -EINVAL; + sbi->active_logs = arg; + break; + case Opt_disable_ext_identify: + set_opt(sbi, DISABLE_EXT_IDENTIFY); + break; + case Opt_err_continue: + clear_opt(sbi, ERRORS_RECOVER); + clear_opt(sbi, ERRORS_PANIC); + break; + case Opt_err_panic: + set_opt(sbi, ERRORS_PANIC); + clear_opt(sbi, ERRORS_RECOVER); + break; + case Opt_err_recover: + set_opt(sbi, ERRORS_RECOVER); + clear_opt(sbi, ERRORS_PANIC); + break; + case Opt_android_emu: + if (args->from) { + int ret; + char *perms = match_strdup(args); + + ret = parse_android_emu(sbi, perms); + kfree(perms); + + if (ret) + return -EINVAL; + + set_opt(sbi, ANDROID_EMU); + } else + return -EINVAL; + break; + + default: + f2fs_msg(sb, KERN_ERR, + "Unrecognized mount option \"%s\" or missing value", + p); + return -EINVAL; + } + } + return 0; +} + +static struct inode *f2fs_alloc_inode(struct super_block *sb) +{ + struct f2fs_inode_info *fi; + + fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO); + if (!fi) + return NULL; + + init_once((void *) fi); + + /* Initialize f2fs-specific inode info */ + fi->vfs_inode.i_version = 1; + atomic_set(&fi->dirty_dents, 0); + fi->i_current_depth = 1; + fi->i_advise = 0; + rwlock_init(&fi->ext.ext_lock); + + set_inode_flag(fi, FI_NEW_INODE); + + if (test_opt(F2FS_SB(sb), INLINE_XATTR)) + set_inode_flag(fi, FI_INLINE_XATTR); + + return &fi->vfs_inode; +} + +static int f2fs_drop_inode(struct inode *inode) +{ + /* + * This is to avoid a deadlock condition like below. + * writeback_single_inode(inode) + * - f2fs_write_data_page + * - f2fs_gc -> iput -> evict + * - inode_wait_for_writeback(inode) + */ + if (!inode_unhashed(inode) && inode->i_state & I_SYNC) + return 0; + return generic_drop_inode(inode); +} + +/* + * f2fs_dirty_inode() is called from __mark_inode_dirty() + * + * We should call set_dirty_inode to write the dirty inode through write_inode. + */ +static void f2fs_dirty_inode(struct inode *inode, int flags) +{ + set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); +} + +static void f2fs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode)); +} + +static void f2fs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, f2fs_i_callback); +} + +static void f2fs_put_super(struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (sbi->s_proc) { + remove_proc_entry("segment_info", sbi->s_proc); + remove_proc_entry(sb->s_id, f2fs_proc_root); + } + kobject_del(&sbi->s_kobj); + + f2fs_destroy_stats(sbi); + stop_gc_thread(sbi); + + write_checkpoint(sbi, true); + + iput(sbi->node_inode); + iput(sbi->meta_inode); + + /* destroy f2fs internal modules */ + destroy_node_manager(sbi); + destroy_segment_manager(sbi); + + kfree(sbi->ckpt); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + + sb->s_fs_info = NULL; + brelse(sbi->raw_super_buf); + kfree(sbi); +} + +int f2fs_sync_fs(struct super_block *sb, int sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + trace_f2fs_sync_fs(sb, sync); + + if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) + return 0; + + if (sync) { + mutex_lock(&sbi->gc_mutex); + write_checkpoint(sbi, false); + mutex_unlock(&sbi->gc_mutex); + } else { + f2fs_balance_fs(sbi); + } + + return 0; +} + +static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + block_t total_count, user_block_count, start_count, ovp_count; + + total_count = le64_to_cpu(sbi->raw_super->block_count); + user_block_count = sbi->user_block_count; + start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr); + ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; + buf->f_type = F2FS_SUPER_MAGIC; + buf->f_bsize = sbi->blocksize; + + buf->f_blocks = total_count - start_count; + buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; + buf->f_bavail = user_block_count - valid_user_blocks(sbi); + + buf->f_files = sbi->total_node_count; + buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); + + buf->f_namelen = F2FS_NAME_LEN; + buf->f_fsid.val[0] = (u32)id; + buf->f_fsid.val[1] = (u32)(id >> 32); + + return 0; +} + +static int f2fs_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct f2fs_sb_info *sbi = F2FS_SB(vfs->mnt_sb); + + if (!(vfs->mnt_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) + seq_printf(seq, ",background_gc=%s", "on"); + else + seq_printf(seq, ",background_gc=%s", "off"); + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) + seq_puts(seq, ",disable_roll_forward"); + if (test_opt(sbi, DISCARD)) + seq_puts(seq, ",discard"); + if (test_opt(sbi, NOHEAP)) + seq_puts(seq, ",no_heap_alloc"); +#ifdef CONFIG_F2FS_FS_XATTR + if (test_opt(sbi, XATTR_USER)) + seq_puts(seq, ",user_xattr"); + else + seq_puts(seq, ",nouser_xattr"); + if (test_opt(sbi, INLINE_XATTR)) + seq_puts(seq, ",inline_xattr"); +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + if (test_opt(sbi, POSIX_ACL)) + seq_puts(seq, ",acl"); + else + seq_puts(seq, ",noacl"); +#endif + if (test_opt(sbi, ERRORS_PANIC)) + seq_puts(seq, ",errors=panic"); + else if (test_opt(sbi, ERRORS_RECOVER)) + seq_puts(seq, ",errors=recover"); + else + seq_puts(seq, ",errors=continue"); + if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) + seq_puts(seq, ",disable_ext_identify"); + + if (test_opt(sbi, ANDROID_EMU)) + seq_printf(seq, ",android_emu=%u:%u:%ho%s", + sbi->android_emu_uid, + sbi->android_emu_gid, + sbi->android_emu_mode, + (sbi->android_emu_flags & + F2FS_ANDROID_EMU_NOCASE) ? + ":nocase" : ""); + + seq_printf(seq, ",active_logs=%u", sbi->active_logs); + + return 0; +} + +static int segment_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + int i; + + for (i = 0; i < total_segs; i++) { + seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); + if (i != 0 && (i % 10) == 0) + seq_puts(seq, "\n"); + else + seq_puts(seq, " "); + } + return 0; +} + +static int segment_info_open_fs(struct inode *inode, struct file *file) +{ + return single_open(file, segment_info_seq_show, + PROC_I(inode)->pde->data); +} + +static const struct file_operations f2fs_seq_segment_info_fops = { + .owner = THIS_MODULE, + .open = segment_info_open_fs, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int f2fs_remount(struct super_block *sb, int *flags, char *data) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_mount_info org_mount_opt; + int err, active_logs; + + /* + * Save the old mount options in case we + * need to restore them. + */ + org_mount_opt = sbi->mount_opt; + active_logs = sbi->active_logs; + + /* parse mount options */ + err = parse_options(sb, data); + if (err) + goto restore_opts; + + /* + * Previous and new state of filesystem is RO, + * so no point in checking GC conditions. + */ + if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) + goto skip; + + /* + * We stop the GC thread if FS is mounted as RO + * or if background_gc = off is passed in mount + * option. Also sync the filesystem. + */ + if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { + if (sbi->gc_thread) { + stop_gc_thread(sbi); + f2fs_sync_fs(sb, 1); + } + } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { + err = start_gc_thread(sbi); + if (err) + goto restore_opts; + } +skip: + /* Update the POSIXACL Flag */ + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + return 0; + +restore_opts: + sbi->mount_opt = org_mount_opt; + sbi->active_logs = active_logs; + return err; +} + +static struct super_operations f2fs_sops = { + .alloc_inode = f2fs_alloc_inode, + .drop_inode = f2fs_drop_inode, + .destroy_inode = f2fs_destroy_inode, + .write_inode = f2fs_write_inode, + .dirty_inode = f2fs_dirty_inode, + .show_options = f2fs_show_options, + .evict_inode = f2fs_evict_inode, + .put_super = f2fs_put_super, + .sync_fs = f2fs_sync_fs, + .statfs = f2fs_statfs, + .remount_fs = f2fs_remount, +}; + +static struct inode *f2fs_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + + if (ino < F2FS_ROOT_INO(sbi)) + return ERR_PTR(-ESTALE); + + /* + * f2fs_iget isn't quite right if the inode is currently unallocated! + * However f2fs_iget currently does appropriate checks to handle stale + * inodes so everything is OK. + */ + inode = f2fs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { + /* we didn't find the right inode.. */ + iput(inode); + return ERR_PTR(-ESTALE); + } + return inode; +} + +static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + f2fs_nfs_get_inode); +} + +static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + f2fs_nfs_get_inode); +} + +static const struct export_operations f2fs_export_ops = { + .fh_to_dentry = f2fs_fh_to_dentry, + .fh_to_parent = f2fs_fh_to_parent, + .get_parent = f2fs_get_parent, +}; + +static loff_t max_file_size(unsigned bits) +{ + loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); + loff_t leaf_count = ADDRS_PER_BLOCK; + + /* two direct node blocks */ + result += (leaf_count * 2); + + /* two indirect node blocks */ + leaf_count *= NIDS_PER_BLOCK; + result += (leaf_count * 2); + + /* one double indirect node block */ + leaf_count *= NIDS_PER_BLOCK; + result += leaf_count; + + result <<= bits; + return result; +} + +static int sanity_check_raw_super(struct super_block *sb, + struct f2fs_super_block *raw_super) +{ + unsigned int blocksize; + + if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { + f2fs_msg(sb, KERN_INFO, + "Magic Mismatch, valid(0x%x) - read(0x%x)", + F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); + return 1; + } + + /* Currently, support only 4KB page cache size */ + if (F2FS_BLKSIZE != PAGE_CACHE_SIZE) { + f2fs_msg(sb, KERN_INFO, + "Invalid page_cache_size (%lu), supports only 4KB\n", + PAGE_CACHE_SIZE); + return 1; + } + + /* Currently, support only 4KB block size */ + blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); + if (blocksize != F2FS_BLKSIZE) { + f2fs_msg(sb, KERN_INFO, + "Invalid blocksize (%u), supports only 4KB\n", + blocksize); + return 1; + } + + if (le32_to_cpu(raw_super->log_sectorsize) != + F2FS_LOG_SECTOR_SIZE) { + f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize"); + return 1; + } + if (le32_to_cpu(raw_super->log_sectors_per_block) != + F2FS_LOG_SECTORS_PER_BLOCK) { + f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block"); + return 1; + } + return 0; +} + +static int sanity_check_ckpt(struct f2fs_sb_info *sbi) +{ + unsigned int total, fsmeta; + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + + total = le32_to_cpu(raw_super->segment_count); + fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); + fsmeta += le32_to_cpu(raw_super->segment_count_sit); + fsmeta += le32_to_cpu(raw_super->segment_count_nat); + fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); + fsmeta += le32_to_cpu(raw_super->segment_count_ssa); + + if (fsmeta >= total) + return 1; + + if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { + f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); + return 1; + } + return 0; +} + +static void init_sb_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = sbi->raw_super; + int i; + + sbi->log_sectors_per_block = + le32_to_cpu(raw_super->log_sectors_per_block); + sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize); + sbi->blocksize = 1 << sbi->log_blocksize; + sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg; + sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); + sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); + sbi->total_sections = le32_to_cpu(raw_super->section_count); + sbi->total_node_count = + (le32_to_cpu(raw_super->segment_count_nat) / 2) + * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK; + sbi->root_ino_num = le32_to_cpu(raw_super->root_ino); + sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); + sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); + sbi->cur_victim_sec = NULL_SECNO; + + for (i = 0; i < NR_COUNT_TYPE; i++) + atomic_set(&sbi->nr_pages[i], 0); +} + +static int validate_superblock(struct super_block *sb, + struct f2fs_super_block **raw_super, + struct buffer_head **raw_super_buf, sector_t block) +{ + const char *super = (block == 0 ? "first" : "second"); + + /* read f2fs raw super block */ + *raw_super_buf = sb_bread(sb, block); + if (!*raw_super_buf) { + f2fs_msg(sb, KERN_ERR, "unable to read %s superblock", + super); + return -EIO; + } + + *raw_super = (struct f2fs_super_block *) + ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET); + + /* sanity checking of raw super */ + if (!sanity_check_raw_super(sb, *raw_super)) + return 0; + + f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " + "in %s superblock", super); + return -EINVAL; +} + +static int f2fs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct f2fs_sb_info *sbi; + struct f2fs_super_block *raw_super; + struct buffer_head *raw_super_buf; + struct inode *root; + long err = -EINVAL; + int i; + const char *descr = ""; + + f2fs_msg(sb, KERN_INFO, "mounting.."); + /* allocate memory for f2fs-specific super block info */ + sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + /* set a block size */ + if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) { + f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); + goto free_sbi; + } + + err = validate_superblock(sb, &raw_super, &raw_super_buf, 0); + if (err) { + brelse(raw_super_buf); + /* check secondary superblock when primary failed */ + err = validate_superblock(sb, &raw_super, &raw_super_buf, 1); + if (err) + goto free_sb_buf; + } + sb->s_fs_info = sbi; + /* init some FS parameters */ + sbi->active_logs = NR_CURSEG_TYPE; + + set_opt(sbi, BG_GC); + +#ifdef CONFIG_F2FS_FS_XATTR + set_opt(sbi, XATTR_USER); +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + set_opt(sbi, POSIX_ACL); +#endif + /* parse mount options */ + err = parse_options(sb, (char *)data); + if (err) + goto free_sb_buf; + + sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); + get_random_bytes(&sbi->s_next_generation, sizeof(u32)); + + sb->s_op = &f2fs_sops; + sb->s_xattr = f2fs_xattr_handlers; + sb->s_export_op = &f2fs_export_ops; + sb->s_magic = F2FS_SUPER_MAGIC; + sb->s_time_gran = 1; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); + + /* init f2fs-specific super block info */ + sbi->sb = sb; + sbi->raw_super = raw_super; + sbi->raw_super_buf = raw_super_buf; + mutex_init(&sbi->gc_mutex); + mutex_init(&sbi->writepages); + mutex_init(&sbi->cp_mutex); + for (i = 0; i < NR_GLOBAL_LOCKS; i++) + mutex_init(&sbi->fs_lock[i]); + mutex_init(&sbi->node_write); + sbi->por_doing = 0; + spin_lock_init(&sbi->stat_lock); + init_rwsem(&sbi->bio_sem); + init_sb_info(sbi); + + /* get an inode for meta space */ + sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); + if (IS_ERR(sbi->meta_inode)) { + f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); + err = PTR_ERR(sbi->meta_inode); + goto free_sb_buf; + } + +get_cp: + err = get_valid_checkpoint(sbi); + if (err) { + f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint"); + goto free_meta_inode; + } + + /* sanity checking of checkpoint */ + err = -EINVAL; + if (sanity_check_ckpt(sbi)) { + f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint"); + goto free_cp; + } + + sbi->total_valid_node_count = + le32_to_cpu(sbi->ckpt->valid_node_count); + sbi->total_valid_inode_count = + le32_to_cpu(sbi->ckpt->valid_inode_count); + sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count); + sbi->total_valid_block_count = + le64_to_cpu(sbi->ckpt->valid_block_count); + sbi->last_valid_block_count = sbi->total_valid_block_count; + sbi->alloc_valid_block_count = 0; + INIT_LIST_HEAD(&sbi->dir_inode_list); + spin_lock_init(&sbi->dir_inode_lock); + + init_orphan_info(sbi); + + /* setup f2fs internal modules */ + err = build_segment_manager(sbi); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Failed to initialize F2FS segment manager"); + goto free_sm; + } + err = build_node_manager(sbi); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Failed to initialize F2FS node manager"); + goto free_nm; + } + + build_gc_manager(sbi); + + /* get an inode for node space */ + sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); + if (IS_ERR(sbi->node_inode)) { + f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); + err = PTR_ERR(sbi->node_inode); + goto free_nm; + } + + /* if there are nt orphan nodes free them */ + err = -EINVAL; + if (recover_orphan_inodes(sbi)) + goto free_node_inode; + + /* read root inode and dentry */ + root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); + if (IS_ERR(root)) { + f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); + err = PTR_ERR(root); + goto free_node_inode; + } + if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) + goto free_root_inode; + + sb->s_root = d_alloc_root(root); /* allocate root dentry */ + if (!sb->s_root) { + err = -ENOMEM; + goto free_root_inode; + } + + /* recover fsynced data */ + if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + err = recover_fsync_data(sbi); + if (err) { + if (f2fs_handle_error(sbi)) { + set_opt(sbi, DISABLE_ROLL_FORWARD); + kfree(sbi->ckpt); + f2fs_msg(sb, KERN_ERR, + "reloading last checkpoint"); + goto get_cp; + } + f2fs_msg(sb, KERN_ERR, + "cannot recover all fsync data errno=%ld", err); + /* checkpoint what we have */ + write_checkpoint(sbi, false); + } + } + + /* + * If filesystem is not mounted as read-only then + * do start the gc_thread. + */ + if (!(sb->s_flags & MS_RDONLY)) { + /* After POR, we can run background GC thread.*/ + err = start_gc_thread(sbi); + if (err) + goto fail; + } + + err = f2fs_build_stats(sbi); + if (err) + goto fail; + + if (f2fs_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); + + if (sbi->s_proc) + proc_create_data("segment_info", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_info_fops, sb); + + if (test_opt(sbi, DISCARD)) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + if (!blk_queue_discard(q)) + f2fs_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but " + "the device does not support discard"); + } + + if (test_opt(sbi, ANDROID_EMU)) + descr = " with android sdcard emulation"; + f2fs_msg(sb, KERN_INFO, "mounted filesystem%s", descr); + + sbi->s_kobj.kset = f2fs_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, + "%s", sb->s_id); + if (err) + goto fail; + + return 0; +fail: + stop_gc_thread(sbi); +free_root_inode: + iput(root); +free_node_inode: + iput(sbi->node_inode); +free_nm: + destroy_node_manager(sbi); +free_sm: + destroy_segment_manager(sbi); +free_cp: + kfree(sbi->ckpt); +free_meta_inode: + make_bad_inode(sbi->meta_inode); + iput(sbi->meta_inode); +free_sb_buf: + brelse(raw_super_buf); +free_sbi: + kfree(sbi); + f2fs_msg(sb, KERN_ERR, "mount failed"); + return err; +} + +static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); +} + +static struct file_system_type f2fs_fs_type = { + .owner = THIS_MODULE, + .name = "f2fs", + .mount = f2fs_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_inodecache(void) +{ + f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", + sizeof(struct f2fs_inode_info), NULL); + if (f2fs_inode_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void destroy_inodecache(void) +{ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(f2fs_inode_cachep); +} + +static int __init init_f2fs_fs(void) +{ + int err; + + err = init_inodecache(); + if (err) + goto fail; + err = create_node_manager_caches(); + if (err) + goto free_inodecache; + err = create_gc_caches(); + if (err) + goto free_node_manager_caches; + err = create_checkpoint_caches(); + if (err) + goto free_gc_caches; + f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); + if (!f2fs_kset) { + err = -ENOMEM; + goto free_checkpoint_caches; + } + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_kset; + f2fs_create_root_stats(); + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + return 0; + +free_kset: + kset_unregister(f2fs_kset); +free_checkpoint_caches: + destroy_checkpoint_caches(); +free_gc_caches: + destroy_gc_caches(); +free_node_manager_caches: + destroy_node_manager_caches(); +free_inodecache: + destroy_inodecache(); +fail: + return err; +} + +static void __exit exit_f2fs_fs(void) +{ + remove_proc_entry("fs/f2fs", NULL); + f2fs_destroy_root_stats(); + unregister_filesystem(&f2fs_fs_type); + destroy_checkpoint_caches(); + destroy_gc_caches(); + destroy_node_manager_caches(); + destroy_inodecache(); + kset_unregister(f2fs_kset); +} + +module_init(init_f2fs_fs) +module_exit(exit_f2fs_fs) + +MODULE_AUTHOR("Samsung Electronics's Praesto Team"); +MODULE_DESCRIPTION("Flash Friendly File System"); +MODULE_LICENSE("GPL"); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c new file mode 100644 index 00000000000..973b3c57e42 --- /dev/null +++ b/fs/f2fs/xattr.c @@ -0,0 +1,602 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/xattr.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/xattr.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher + * + * Fix by Harrison Xing . + * Extended attributes for symlinks and special files added per + * suggestion of Luka Renko . + * xattr consolidation Copyright (c) 2004 James Morris , + * Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include "f2fs.h" +#include "xattr.h" + +static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + int total_len, prefix_len = 0; + const char *prefix = NULL; + + switch (type) { + case F2FS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + prefix = XATTR_USER_PREFIX; + prefix_len = XATTR_USER_PREFIX_LEN; + break; + case F2FS_XATTR_INDEX_TRUSTED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + prefix = XATTR_TRUSTED_PREFIX; + prefix_len = XATTR_TRUSTED_PREFIX_LEN; + break; + case F2FS_XATTR_INDEX_SECURITY: + prefix = XATTR_SECURITY_PREFIX; + prefix_len = XATTR_SECURITY_PREFIX_LEN; + break; + default: + return -EINVAL; + } + + total_len = prefix_len + name_len + 1; + if (list && total_len <= list_size) { + memcpy(list, prefix, prefix_len); + memcpy(list + prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + + switch (type) { + case F2FS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + break; + case F2FS_XATTR_INDEX_TRUSTED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + break; + case F2FS_XATTR_INDEX_SECURITY: + break; + default: + return -EINVAL; + } + if (strcmp(name, "") == 0) + return -EINVAL; + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); +} + +static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + + switch (type) { + case F2FS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + break; + case F2FS_XATTR_INDEX_TRUSTED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + break; + case F2FS_XATTR_INDEX_SECURITY: + break; + default: + return -EINVAL; + } + if (strcmp(name, "") == 0) + return -EINVAL; + + return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); +} + +static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + const char *xname = F2FS_SYSTEM_ADVISE_PREFIX; + size_t size; + + if (type != F2FS_XATTR_INDEX_ADVISE) + return 0; + + size = strlen(xname) + 1; + if (list && size <= list_size) + memcpy(list, xname, size); + return size; +} + +static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct inode *inode = dentry->d_inode; + + if (!name || strcmp(name, "") != 0) + return -EINVAL; + + if (buffer) + *((char *)buffer) = F2FS_I(inode)->i_advise; + return sizeof(char); +} + +static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct inode *inode = dentry->d_inode; + + if (!name || strcmp(name, "") != 0) + return -EINVAL; + if (!inode_owner_or_capable(inode)) + return -EPERM; + if (value == NULL) + return -EINVAL; + + F2FS_I(inode)->i_advise = *(char *)value; + return 0; +} + +#ifdef CONFIG_F2FS_FS_SECURITY +static int __f2fs_setxattr(struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len, + struct page *ipage); +static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *page) +{ + const struct xattr *xattr; + int err = 0; + + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, + xattr->name, xattr->value, + xattr->value_len, (struct page *)page); + if (err < 0) + break; + } + return err; +} + +int f2fs_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct page *ipage) +{ + return security_new_inode_init_security(inode, dir, qstr, + &f2fs_initxattrs, ipage); +} +#endif + +const struct xattr_handler f2fs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .flags = F2FS_XATTR_INDEX_USER, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + +const struct xattr_handler f2fs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .flags = F2FS_XATTR_INDEX_TRUSTED, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + +const struct xattr_handler f2fs_xattr_advise_handler = { + .prefix = F2FS_SYSTEM_ADVISE_PREFIX, + .flags = F2FS_XATTR_INDEX_ADVISE, + .list = f2fs_xattr_advise_list, + .get = f2fs_xattr_advise_get, + .set = f2fs_xattr_advise_set, +}; + +const struct xattr_handler f2fs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .flags = F2FS_XATTR_INDEX_SECURITY, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + +static const struct xattr_handler *f2fs_xattr_handler_map[] = { + [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, +#ifdef CONFIG_F2FS_FS_POSIX_ACL + [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler, + [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, +#endif + [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, +#ifdef CONFIG_F2FS_FS_SECURITY + [F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler, +#endif + [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, +}; + +const struct xattr_handler *f2fs_xattr_handlers[] = { + &f2fs_xattr_user_handler, +#ifdef CONFIG_F2FS_FS_POSIX_ACL + &f2fs_xattr_acl_access_handler, + &f2fs_xattr_acl_default_handler, +#endif + &f2fs_xattr_trusted_handler, +#ifdef CONFIG_F2FS_FS_SECURITY + &f2fs_xattr_security_handler, +#endif + &f2fs_xattr_advise_handler, + NULL, +}; + +static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) +{ + const struct xattr_handler *handler = NULL; + + if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map)) + handler = f2fs_xattr_handler_map[name_index]; + return handler; +} + +static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, + size_t name_len, const char *name) +{ + struct f2fs_xattr_entry *entry; + + list_for_each_xattr(entry, base_addr) { + if (entry->e_name_index != name_index) + continue; + if (entry->e_name_len != name_len) + continue; + if (!memcmp(entry->e_name, name, name_len)) + break; + } + return entry; +} + +static void *read_all_xattrs(struct inode *inode, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_xattr_header *header; + size_t size = PAGE_SIZE, inline_size = 0; + void *txattr_addr; + + inline_size = inline_xattr_size(inode); + + txattr_addr = kzalloc(inline_size + size, GFP_KERNEL); + if (!txattr_addr) + return NULL; + + /* read from inline xattr */ + if (inline_size) { + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + goto fail; + inline_addr = inline_xattr_addr(page); + } + memcpy(txattr_addr, inline_addr, inline_size); + f2fs_put_page(page, 1); + } + + /* read from xattr node block */ + if (F2FS_I(inode)->i_xattr_nid) { + struct page *xpage; + void *xattr_addr; + + /* The inode already has an extended attribute block. */ + xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + if (IS_ERR(xpage)) + goto fail; + + xattr_addr = page_address(xpage); + memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE); + f2fs_put_page(xpage, 1); + } + + header = XATTR_HDR(txattr_addr); + + /* never been allocated xattrs */ + if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) { + header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); + header->h_refcount = cpu_to_le32(1); + } + return txattr_addr; +fail: + kzfree(txattr_addr); + return NULL; +} + +static inline int write_all_xattrs(struct inode *inode, __u32 hsize, + void *txattr_addr, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + size_t inline_size = 0; + void *xattr_addr; + struct page *xpage; + nid_t new_nid = 0; + int err; + + inline_size = inline_xattr_size(inode); + + if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) + if (!alloc_nid(sbi, &new_nid)) + return -ENOSPC; + + /* write to inline xattr */ + if (inline_size) { + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(page); + } + inline_addr = inline_xattr_addr(page); + } + memcpy(inline_addr, txattr_addr, inline_size); + f2fs_put_page(page, 1); + + /* no need to use xattr node block */ + if (hsize <= inline_size) { + err = truncate_xattr_node(inode, ipage); + alloc_nid_failed(sbi, new_nid); + return err; + } + } + + /* write to xattr node block */ + if (F2FS_I(inode)->i_xattr_nid) { + xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + if (IS_ERR(xpage)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(xpage); + } + BUG_ON(new_nid); + } else { + struct dnode_of_data dn; + set_new_dnode(&dn, inode, NULL, NULL, new_nid); + xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); + if (IS_ERR(xpage)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(xpage); + } + alloc_nid_done(sbi, new_nid); + } + + xattr_addr = page_address(xpage); + memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE - + sizeof(struct node_footer)); + set_page_dirty(xpage); + f2fs_put_page(xpage, 1); + + /* need to checkpoint during fsync */ + F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); + return 0; +} + +int f2fs_getxattr(struct inode *inode, int name_index, const char *name, + void *buffer, size_t buffer_size) +{ + struct f2fs_xattr_entry *entry; + void *base_addr; + int error = 0; + size_t value_len, name_len; + + if (name == NULL) + return -EINVAL; + name_len = strlen(name); + + base_addr = read_all_xattrs(inode, NULL); + if (!base_addr) + return -ENOMEM; + + entry = __find_xattr(base_addr, name_index, name_len, name); + if (IS_XATTR_LAST_ENTRY(entry)) { + error = -ENODATA; + goto cleanup; + } + + value_len = le16_to_cpu(entry->e_value_size); + + if (buffer && value_len > buffer_size) { + error = -ERANGE; + goto cleanup; + } + + if (buffer) { + char *pval = entry->e_name + entry->e_name_len; + memcpy(buffer, pval, value_len); + } + error = value_len; + +cleanup: + kzfree(base_addr); + return error; +} + +ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) +{ + struct inode *inode = dentry->d_inode; + struct f2fs_xattr_entry *entry; + void *base_addr; + int error = 0; + size_t rest = buffer_size; + + base_addr = read_all_xattrs(inode, NULL); + if (!base_addr) + return -ENOMEM; + + list_for_each_xattr(entry, base_addr) { + const struct xattr_handler *handler = + f2fs_xattr_handler(entry->e_name_index); + size_t size; + + if (!handler) + continue; + + size = handler->list(dentry, buffer, rest, entry->e_name, + entry->e_name_len, handler->flags); + if (buffer && size > rest) { + error = -ERANGE; + goto cleanup; + } + + if (buffer) + buffer += size; + rest -= size; + } + error = buffer_size - rest; +cleanup: + kzfree(base_addr); + return error; +} + +static int __f2fs_setxattr(struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len, + struct page *ipage) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_xattr_entry *here, *last; + void *base_addr; + int found, newsize; + size_t name_len; + __u32 new_hsize; + int error = -ENOMEM; + + if (name == NULL) + return -EINVAL; + + if (value == NULL) + value_len = 0; + + name_len = strlen(name); + + if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) + return -ERANGE; + + base_addr = read_all_xattrs(inode, ipage); + if (!base_addr) + goto exit; + + /* find entry with wanted name. */ + here = __find_xattr(base_addr, name_index, name_len, name); + + found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; + last = here; + + while (!IS_XATTR_LAST_ENTRY(last)) + last = XATTR_NEXT_ENTRY(last); + + newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + + name_len + value_len); + + /* 1. Check space */ + if (value) { + int free; + /* + * If value is NULL, it is remove operation. + * In case of update operation, we caculate free. + */ + free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); + if (found) + free = free - ENTRY_SIZE(here); + + if (free < newsize) { + error = -ENOSPC; + goto exit; + } + } + + /* 2. Remove old entry */ + if (found) { + /* + * If entry is found, remove old entry. + * If not found, remove operation is not needed. + */ + struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here); + int oldsize = ENTRY_SIZE(here); + + memmove(here, next, (char *)last - (char *)next); + last = (struct f2fs_xattr_entry *)((char *)last - oldsize); + memset(last, 0, oldsize); + } + + new_hsize = (char *)last - (char *)base_addr; + + /* 3. Write new entry */ + if (value) { + char *pval; + /* + * Before we come here, old entry is removed. + * We just write new entry. + */ + memset(last, 0, newsize); + last->e_name_index = name_index; + last->e_name_len = name_len; + memcpy(last->e_name, name, name_len); + pval = last->e_name + name_len; + memcpy(pval, value, value_len); + last->e_value_size = cpu_to_le16(value_len); + new_hsize += newsize; + } + + error = write_all_xattrs(inode, new_hsize, base_addr, ipage); + if (error) + goto exit; + + if (is_inode_flag_set(fi, FI_ACL_MODE)) { + inode->i_mode = fi->i_acl_mode; + inode->i_ctime = CURRENT_TIME; + clear_inode_flag(fi, FI_ACL_MODE); + } + + if (ipage) + update_inode(inode, ipage); + else + update_inode_page(inode); +exit: + kzfree(base_addr); + return error; +} + +int f2fs_setxattr(struct inode *inode, int name_index, const char *name, + const void *value, size_t value_len, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; + int err; + + f2fs_balance_fs(sbi); + + ilock = mutex_lock_op(sbi); + + err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); + + mutex_unlock_op(sbi, ilock); + + return err; +} diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h new file mode 100644 index 00000000000..ae894d17cf7 --- /dev/null +++ b/fs/f2fs/xattr.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/xattr.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/xattr.h + * + * On-disk format of extended attributes for the ext2 filesystem. + * + * (C) 2001 Andreas Gruenbacher, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __F2FS_XATTR_H__ +#define __F2FS_XATTR_H__ + +#include +#include + +/* Magic value in attribute blocks */ +#define F2FS_XATTR_MAGIC 0xF2F52011 + +/* Maximum number of references to one attribute block */ +#define F2FS_XATTR_REFCOUNT_MAX 1024 + +/* Name indexes */ +#define F2FS_SYSTEM_ADVISE_PREFIX "system.advise" +#define F2FS_XATTR_INDEX_USER 1 +#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS 2 +#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT 3 +#define F2FS_XATTR_INDEX_TRUSTED 4 +#define F2FS_XATTR_INDEX_LUSTRE 5 +#define F2FS_XATTR_INDEX_SECURITY 6 +#define F2FS_XATTR_INDEX_ADVISE 7 + +struct f2fs_xattr_header { + __le32 h_magic; /* magic number for identification */ + __le32 h_refcount; /* reference count */ + __u32 h_reserved[4]; /* zero right now */ +}; + +struct f2fs_xattr_entry { + __u8 e_name_index; + __u8 e_name_len; + __le16 e_value_size; /* size of attribute value */ + char e_name[0]; /* attribute name */ +}; + +#define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr)) +#define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr)) +#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1)) +#define XATTR_ROUND (3) + +#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND) + +#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \ + entry->e_name_len + le16_to_cpu(entry->e_value_size))) + +#define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\ + ENTRY_SIZE(entry))) + +#define IS_XATTR_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) + +#define list_for_each_xattr(entry, addr) \ + for (entry = XATTR_FIRST_ENTRY(addr);\ + !IS_XATTR_LAST_ENTRY(entry);\ + entry = XATTR_NEXT_ENTRY(entry)) + +#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \ + sizeof(struct node_footer) - sizeof(__u32)) + +#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \ + sizeof(struct f2fs_xattr_header) - \ + sizeof(struct f2fs_xattr_entry)) + +/* + * On-disk structure of f2fs_xattr + * We use inline xattrs space + 1 block for xattr. + * + * +--------------------+ + * | f2fs_xattr_header | + * | | + * +--------------------+ + * | f2fs_xattr_entry | + * | .e_name_index = 1 | + * | .e_name_len = 3 | + * | .e_value_size = 14 | + * | .e_name = "foo" | + * | "value_of_xattr" |<- value_offs = e_name + e_name_len + * +--------------------+ + * | f2fs_xattr_entry | + * | .e_name_index = 4 | + * | .e_name = "bar" | + * +--------------------+ + * | | + * | Free | + * | | + * +--------------------+<- MIN_OFFSET + * | node_footer | + * | (nid, ino, offset) | + * +--------------------+ + * + **/ + +#ifdef CONFIG_F2FS_FS_XATTR +extern const struct xattr_handler f2fs_xattr_user_handler; +extern const struct xattr_handler f2fs_xattr_trusted_handler; +extern const struct xattr_handler f2fs_xattr_acl_access_handler; +extern const struct xattr_handler f2fs_xattr_acl_default_handler; +extern const struct xattr_handler f2fs_xattr_advise_handler; +extern const struct xattr_handler f2fs_xattr_security_handler; + +extern const struct xattr_handler *f2fs_xattr_handlers[]; + +extern int f2fs_setxattr(struct inode *, int, const char *, + const void *, size_t, struct page *); +extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); +extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); +#else + +#define f2fs_xattr_handlers NULL +static inline int f2fs_setxattr(struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len) +{ + return -EOPNOTSUPP; +} +static inline int f2fs_getxattr(struct inode *inode, int name_index, + const char *name, void *buffer, size_t buffer_size) +{ + return -EOPNOTSUPP; +} +static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, + size_t buffer_size) +{ + return -EOPNOTSUPP; +} +#endif + +#ifdef CONFIG_F2FS_FS_SECURITY +extern int f2fs_init_security(struct inode *, struct inode *, + const struct qstr *, struct page *); +#else +static inline int f2fs_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct page *ipage) +{ + return 0; +} +#endif +#endif /* __F2FS_XATTR_H__ */ diff --git a/fs/ioprio.c b/fs/ioprio.c index 7da2a06508e..95a6c2b04e0 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -30,7 +30,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) { - int err; + int err, i; struct io_context *ioc; const struct cred *cred = current_cred(), *tcred; @@ -60,12 +60,17 @@ int set_task_ioprio(struct task_struct *task, int ioprio) err = -ENOMEM; break; } + /* let other ioc users see the new values */ + smp_wmb(); task->io_context = ioc; } while (1); if (!err) { ioc->ioprio = ioprio; - ioc->ioprio_changed = 1; + /* make sure schedulers see the new ioprio value */ + wmb(); + for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++) + set_bit(i, ioc->ioprio_changed); } task_unlock(task); diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index 1afa4dd4cae..8d95888b22c 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -13,15 +13,17 @@ static int loadavg_proc_show(struct seq_file *m, void *v) { unsigned long avnrun[3]; + unsigned long time_avnrun = avg_nr_running(); get_avenrun(avnrun, FIXED_1/200, 0); - seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n", + seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d %lu.%02lu\n", LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]), LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]), LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]), nr_running(), nr_threads, - task_active_pid_ns(current)->last_pid); + task_active_pid_ns(current)->last_pid, + LOAD_INT(time_avnrun), LOAD_FRAC(time_avnrun)); return 0; } diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 4b758ad5c83..e2ef91dcf3d 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -10,6 +10,7 @@ #include #include #include +#include #ifndef arch_irq_stat_cpu #define arch_irq_stat_cpu(cpu) 0 @@ -21,6 +22,41 @@ #define arch_idle_time(cpu) 0 #endif +static cputime64_t get_idle_time(int cpu) +{ + u64 idle_time = -1ULL; + cputime64_t idle; + + if (cpu_online(cpu)) + idle_time = get_cpu_idle_time_us(cpu, NULL); + + if (idle_time == -1ULL) { + /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ + idle = kstat_cpu(cpu).cpustat.idle; + idle = cputime64_add(idle, arch_idle_time(cpu)); + } else + idle = nsecs_to_jiffies64(1000 * idle_time); + + return idle; +} + +static cputime64_t get_iowait_time(int cpu) +{ + u64 iowait_time = -1ULL; + cputime64_t iowait; + + if (cpu_online(cpu)) + iowait_time = get_cpu_iowait_time_us(cpu, NULL); + + if (iowait_time == -1ULL) + /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */ + iowait = kstat_cpu(cpu).cpustat.iowait; + else + iowait = nsecs_to_jiffies64(1000 * iowait_time); + + return iowait; +} + static int show_stat(struct seq_file *p, void *v) { int i, j; @@ -42,9 +78,8 @@ static int show_stat(struct seq_file *p, void *v) user = cputime64_add(user, kstat_cpu(i).cpustat.user); nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); system = cputime64_add(system, kstat_cpu(i).cpustat.system); - idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); - idle = cputime64_add(idle, arch_idle_time(i)); - iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); + idle = cputime64_add(idle, get_idle_time(i)); + iowait = cputime64_add(iowait, get_iowait_time(i)); irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); @@ -80,14 +115,12 @@ static int show_stat(struct seq_file *p, void *v) #else for_each_online_cpu(i) { #endif - /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ user = kstat_cpu(i).cpustat.user; nice = kstat_cpu(i).cpustat.nice; system = kstat_cpu(i).cpustat.system; - idle = kstat_cpu(i).cpustat.idle; - idle = cputime64_add(idle, arch_idle_time(i)); - iowait = kstat_cpu(i).cpustat.iowait; + idle = get_idle_time(i); + iowait = get_iowait_time(i); irq = kstat_cpu(i).cpustat.irq; softirq = kstat_cpu(i).cpustat.softirq; steal = kstat_cpu(i).cpustat.steal; diff --git a/fs/select.c b/fs/select.c index d33418fdc85..049c8ce78ef 100644 --- a/fs/select.c +++ b/fs/select.c @@ -69,7 +69,6 @@ static long __estimate_accuracy(struct timespec *tv) long select_estimate_accuracy(struct timespec *tv) { - unsigned long ret; struct timespec now; /* @@ -81,10 +80,8 @@ long select_estimate_accuracy(struct timespec *tv) ktime_get_ts(&now); now = timespec_sub(*tv, now); - ret = __estimate_accuracy(&now); - if (ret < current->timer_slack_ns) - return current->timer_slack_ns; - return ret; + return min_t(long, __estimate_accuracy(&now), + task_get_effective_timer_slack(current)); } diff --git a/fs/sync.c b/fs/sync.c index c98a7477edf..27626fa6d09 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -18,6 +18,11 @@ #include #include "internal.h" +#ifdef CONFIG_DYNAMIC_FSYNC +extern bool early_suspend_active; +extern bool dyn_fsync_active; +#endif + #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) @@ -87,7 +92,7 @@ static void sync_one_sb(struct super_block *sb, void *arg) * Sync all the data for all the filesystems (called by sys_sync() and * emergency sync) */ -static void sync_filesystems(int wait) +void sync_filesystems(int wait) { iterate_supers(sync_one_sb, &wait); } @@ -165,9 +170,17 @@ SYSCALL_DEFINE1(syncfs, int, fd) */ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) { +#ifdef CONFIG_DYNAMIC_FSYNC + if (likely(dyn_fsync_active && !early_suspend_active)) + return 0; + else { +#endif if (!file->f_op || !file->f_op->fsync) return -EINVAL; return file->f_op->fsync(file, start, end, datasync); +#ifdef CONFIG_DYNAMIC_FSYNC + } +#endif } EXPORT_SYMBOL(vfs_fsync_range); @@ -189,22 +202,33 @@ static int do_fsync(unsigned int fd, int datasync) { struct file *file; int ret = -EBADF; + int fput_needed; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (file) { ret = vfs_fsync(file, datasync); - fput(file); + fput_light(file, fput_needed); } return ret; } SYSCALL_DEFINE1(fsync, unsigned int, fd) { +#ifdef CONFIG_DYNAMIC_FSYNC + if (likely(dyn_fsync_active && !early_suspend_active)) + return 0; + else +#endif return do_fsync(fd, 0); } SYSCALL_DEFINE1(fdatasync, unsigned int, fd) { +#if 0 + if (likely(dyn_fsync_active && !early_suspend_active)) + return 0; + else +#endif return do_fsync(fd, 1); } @@ -275,6 +299,12 @@ EXPORT_SYMBOL(generic_write_sync); SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, unsigned int flags) { +#ifdef CONFIG_DYNAMIC_FSYNC + if (likely(dyn_fsync_active && !early_suspend_active)) + return 0; + else { +#endif + int ret; struct file *file; struct address_space *mapping; @@ -354,6 +384,9 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, fput_light(file, fput_needed); out: return ret; +#ifdef CONFIG_DYNAMIC_FSYNC + } +#endif } #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS asmlinkage long SyS_sync_file_range(long fd, loff_t offset, loff_t nbytes, @@ -370,6 +403,11 @@ SYSCALL_ALIAS(sys_sync_file_range, SyS_sync_file_range); SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags, loff_t offset, loff_t nbytes) { +#ifdef CONFIG_DYNAMIC_FSYNC + if (likely(dyn_fsync_active && !early_suspend_active)) + return 0; + else +#endif return sys_sync_file_range(fd, offset, nbytes, flags); } #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index e3f091a81c7..1b3c622aa19 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -136,12 +136,13 @@ static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *sec void *old_secdata; size_t old_secdata_len; - iattrs = sd->s_iattr; - if (!iattrs) - iattrs = sysfs_init_inode_attrs(sd); - if (!iattrs) - return -ENOMEM; + if (!sd->s_iattr) { + sd->s_iattr = sysfs_init_inode_attrs(sd); + if (!sd->s_iattr) + return -ENOMEM; + } + iattrs = sd->s_iattr; old_secdata = iattrs->ia_secdata; old_secdata_len = iattrs->ia_secdata_len; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5e30b45d3d6..4a2662facdd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -33,7 +33,7 @@ struct sg_io_hdr; struct bsg_job; #define BLKDEV_MIN_RQ 4 -#define BLKDEV_MAX_RQ 128 /* Default maximum */ +#define BLKDEV_MAX_RQ 512 /* Non-Default maximum (128) */ struct request; typedef void (rq_end_io_fn)(struct request *, int); @@ -277,6 +277,7 @@ struct request_queue { struct request_list rq; request_fn_proc *request_fn; + request_fn_proc *urgent_request_fn; make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unprep_rq_fn *unprep_rq_fn; @@ -352,6 +353,8 @@ struct request_queue { struct list_head timeout_list; struct queue_limits limits; + bool notified_urgent; + bool dispatched_urgent; /* * sg stuff @@ -662,6 +665,8 @@ extern struct request *blk_make_request(struct request_queue *, struct bio *, gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); +extern int blk_reinsert_request(struct request_queue *q, struct request *rq); +extern bool blk_reinsert_req_sup(struct request_queue *q); extern void blk_add_request_payload(struct request *rq, struct page *page, unsigned int len); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); @@ -806,6 +811,7 @@ extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); extern struct request_queue *blk_init_allocated_queue(struct request_queue *, request_fn_proc *, spinlock_t *); +extern void blk_urgent_request(struct request_queue *q, request_fn_proc *fn); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index ac663c18776..f2ca8cf3a88 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -64,3 +64,15 @@ SUBSYS(perf) #endif /* */ + +#ifdef CONFIG_CGROUP_TIMER_SLACK +SUBSYS(timer_slack) +#endif + +/* */ + +#ifdef CONFIG_CGROUP_BFQIO +SUBSYS(bfqio) +#endif + +/* */ diff --git a/include/linux/compaction.h b/include/linux/compaction.h index cc9f7a42864..233998aab97 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -84,6 +84,11 @@ static inline bool compaction_deferred(struct zone *zone) return 1; } +static inline int compact_nodes(bool sync) +{ + return COMPACT_CONTINUE; +} + #endif /* CONFIG_COMPACTION */ #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 54d948ec49a..d732be52629 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -66,8 +66,9 @@ enum { /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, CPU_PRI_MIGRATION = 10, - /* prepare workqueues for other notifiers */ - CPU_PRI_WORKQUEUE = 5, + /* bring up workqueues before normal notifiers and down after */ + CPU_PRI_WORKQUEUE_UP = 5, + CPU_PRI_WORKQUEUE_DOWN = -5, }; #define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 03a7c6f5786..673d2e58cee 100755 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -203,6 +203,9 @@ extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy, int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); +int lock_policy_rwsem_write(int cpu); +void unlock_policy_rwsem_write(int cpu); + /********************************************************************* * CPUFREQ DRIVER INTERFACE * @@ -364,12 +367,18 @@ extern struct cpufreq_governor cpufreq_gov_userspace; #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND) extern struct cpufreq_governor cpufreq_gov_ondemand; #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_ondemand) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND) +extern struct cpufreq_governor cpufreq_gov_touchdemand; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_touchdemand) #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) extern struct cpufreq_governor cpufreq_gov_conservative; #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE) extern struct cpufreq_governor cpufreq_gov_interactive; #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_interactive) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_PEGASUSQ) +extern struct cpufreq_governor cpufreq_gov_pegasusq; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_pegasusq) #endif diff --git a/include/linux/cpuquiet.h b/include/linux/cpuquiet.h new file mode 100644 index 00000000000..5558c015bb5 --- /dev/null +++ b/include/linux/cpuquiet.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#ifndef _LINUX_CPUONLINE_H +#define _LINUX_CPUONLINE_H + +#include +#include + +#define CPUQUIET_NAME_LEN 16 + +struct cpuquiet_governor { + char name[CPUQUIET_NAME_LEN]; + struct list_head governor_list; + int (*start) (void); + void (*stop) (void); + int (*store_active) (unsigned int cpu, bool active); + void (*device_free_notification) (void); + void (*device_busy_notification) (void); + struct module *owner; +}; + +struct cpuquiet_driver { + char name[CPUQUIET_NAME_LEN]; + int (*quiesence_cpu) (unsigned int cpunumber); + int (*wake_cpu) (unsigned int cpunumber); +}; + +extern int cpuquiet_register_governor(struct cpuquiet_governor *gov); +extern void cpuquiet_unregister_governor(struct cpuquiet_governor *gov); +extern int cpuquiet_quiesence_cpu(unsigned int cpunumber); +extern int cpuquiet_wake_cpu(unsigned int cpunumber); +extern int cpuquiet_register_driver(struct cpuquiet_driver *drv); +extern void cpuquiet_unregister_driver(struct cpuquiet_driver *drv); +extern int cpuquiet_add_group(struct attribute_group *attrs); +extern void cpuquiet_remove_group(struct attribute_group *attrs); +extern void cpuquiet_device_busy(void); +extern void cpuquiet_device_free(void); +int cpuquiet_kobject_init(struct kobject *kobj, struct kobj_type *type, + char *name); +extern unsigned int nr_cluster_ids; + +/* Sysfs support */ +struct cpuquiet_attribute { + struct attribute attr; + ssize_t (*show)(struct cpuquiet_attribute *attr, char *buf); + ssize_t (*store)(struct cpuquiet_attribute *attr, const char *buf, + size_t count); + /* Optional. Called after store is called */ + void (*store_callback)(struct cpuquiet_attribute *attr); + void *param; +}; + +#define CPQ_ATTRIBUTE(_name, _mode, _type, _callback) \ + static struct cpuquiet_attribute _name ## _attr = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = show_ ## _type ## _attribute, \ + .store = store_ ## _type ## _attribute, \ + .store_callback = _callback, \ + .param = &_name, \ +} + +#define CPQ_BASIC_ATTRIBUTE(_name, _mode, _type) \ + CPQ_ATTRIBUTE(_name, _mode, _type, NULL) + +#define CPQ_ATTRIBUTE_CUSTOM(_name, _mode, _show, _store) \ + static struct cpuquiet_attribute _name ## _attr = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store \ + .store_callback = NULL, \ + .param = &_name, \ +} + + +extern ssize_t show_int_attribute(struct cpuquiet_attribute *cattr, char *buf); +extern ssize_t store_int_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count); +extern ssize_t show_bool_attribute(struct cpuquiet_attribute *cattr, char *buf); +extern ssize_t store_bool_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count); +extern ssize_t store_uint_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count); +extern ssize_t show_uint_attribute(struct cpuquiet_attribute *cattr, char *buf); +extern ssize_t store_ulong_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count); +extern ssize_t show_ulong_attribute(struct cpuquiet_attribute *cattr, + char *buf); +extern ssize_t cpuquiet_auto_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf); +extern ssize_t cpuquiet_auto_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, + size_t count); +#endif diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 713c7c62443..9327888b4b2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -239,6 +239,7 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); +extern struct dentry * d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); diff --git a/include/linux/decompress/unlz4.h b/include/linux/decompress/unlz4.h new file mode 100644 index 00000000000..d5b68bf3ec9 --- /dev/null +++ b/include/linux/decompress/unlz4.h @@ -0,0 +1,10 @@ +#ifndef DECOMPRESS_UNLZ4_H +#define DECOMPRESS_UNLZ4_H + +int unlz4(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error)(char *x)); +#endif diff --git a/include/linux/elevator.h b/include/linux/elevator.h index d800d514218..858993b1f83 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -20,6 +20,8 @@ typedef void (elevator_bio_merged_fn) (struct request_queue *, typedef int (elevator_dispatch_fn) (struct request_queue *, int); typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); +typedef int (elevator_reinsert_req_fn) (struct request_queue *, struct request *); +typedef bool (elevator_is_urgent_fn) (struct request_queue *); typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); typedef int (elevator_may_queue_fn) (struct request_queue *, int); @@ -42,6 +44,9 @@ struct elevator_ops elevator_dispatch_fn *elevator_dispatch_fn; elevator_add_req_fn *elevator_add_req_fn; + elevator_reinsert_req_fn *elevator_reinsert_req_fn; + elevator_is_urgent_fn *elevator_is_urgent_fn; + elevator_activate_req_fn *elevator_activate_req_fn; elevator_deactivate_req_fn *elevator_deactivate_req_fn; @@ -109,6 +114,7 @@ extern void elv_merged_request(struct request_queue *, struct request *, int); extern void elv_bio_merged(struct request_queue *q, struct request *, struct bio *); extern void elv_requeue_request(struct request_queue *, struct request *); +extern int elv_reinsert_request(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); extern int elv_register_queue(struct request_queue *q); diff --git a/include/linux/errno.h b/include/linux/errno.h index 46685832ed9..2e2b696e224 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -16,7 +16,8 @@ #define ERESTARTNOHAND 514 /* restart if no handler.. */ #define ENOIOCTLCMD 515 /* No ioctl command */ #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ - +#define EPROBE_DEFER 517 /* Driver requests probe retry */ +#define EOPENSTALE 518 /* open found a stale dentry */ /* Defined for the NFSv3 protocol */ #define EBADHANDLE 521 /* Illegal NFS file handle */ #define ENOTSYNC 522 /* Update synchronization mismatch */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h new file mode 100644 index 00000000000..8003f8e092d --- /dev/null +++ b/include/linux/f2fs_fs.h @@ -0,0 +1,426 @@ +/** + * include/linux/f2fs_fs.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Copyright (c) 2014 XPerience(R) Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_F2FS_FS_H +#define _LINUX_F2FS_FS_H + +#include +#include + +#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ +#define F2FS_LOG_SECTOR_SIZE 9 /* 9 bits for 512 byte */ +#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ +#define F2FS_BLKSIZE 4096 /* support only 4KB block */ +#define F2FS_MAX_EXTENSION 64 /* # of extension entries */ + +#define NULL_ADDR ((block_t)0) /* used as block_t addresses */ +#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ + +#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) +#define F2FS_NODE_INO(sbi) (sbi->node_ino_num) +#define F2FS_META_INO(sbi) (sbi->meta_ino_num) + +/* This flag is used by node and meta inodes, and by recovery */ +#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) + +/* + * For further optimization on multi-head logs, on-disk layout supports maximum + * 16 logs by default. The number, 16, is expected to cover all the cases + * enoughly. The implementaion currently uses no more than 6 logs. + * Half the logs are used for nodes, and the other half are used for data. + */ +#define MAX_ACTIVE_LOGS 16 +#define MAX_ACTIVE_NODE_LOGS 8 +#define MAX_ACTIVE_DATA_LOGS 8 + +/* + * For superblock + */ +struct f2fs_super_block { + __le32 magic; /* Magic Number */ + __le16 major_ver; /* Major Version */ + __le16 minor_ver; /* Minor Version */ + __le32 log_sectorsize; /* log2 sector size in bytes */ + __le32 log_sectors_per_block; /* log2 # of sectors per block */ + __le32 log_blocksize; /* log2 block size in bytes */ + __le32 log_blocks_per_seg; /* log2 # of blocks per segment */ + __le32 segs_per_sec; /* # of segments per section */ + __le32 secs_per_zone; /* # of sections per zone */ + __le32 checksum_offset; /* checksum offset inside super block */ + __le64 block_count; /* total # of user blocks */ + __le32 section_count; /* total # of sections */ + __le32 segment_count; /* total # of segments */ + __le32 segment_count_ckpt; /* # of segments for checkpoint */ + __le32 segment_count_sit; /* # of segments for SIT */ + __le32 segment_count_nat; /* # of segments for NAT */ + __le32 segment_count_ssa; /* # of segments for SSA */ + __le32 segment_count_main; /* # of segments for main area */ + __le32 segment0_blkaddr; /* start block address of segment 0 */ + __le32 cp_blkaddr; /* start block address of checkpoint */ + __le32 sit_blkaddr; /* start block address of SIT */ + __le32 nat_blkaddr; /* start block address of NAT */ + __le32 ssa_blkaddr; /* start block address of SSA */ + __le32 main_blkaddr; /* start block address of main area */ + __le32 root_ino; /* root inode number */ + __le32 node_ino; /* node inode number */ + __le32 meta_ino; /* meta inode number */ + __u8 uuid[16]; /* 128-bit uuid for volume */ + __le16 volume_name[512]; /* volume name */ + __le32 extension_count; /* # of extensions below */ + __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ +} __packed; + +/* + * For checkpoint + */ +#define CP_ERROR_FLAG 0x00000008 +#define CP_COMPACT_SUM_FLAG 0x00000004 +#define CP_ORPHAN_PRESENT_FLAG 0x00000002 +#define CP_UMOUNT_FLAG 0x00000001 + +struct f2fs_checkpoint { + __le64 checkpoint_ver; /* checkpoint block version number */ + __le64 user_block_count; /* # of user blocks */ + __le64 valid_block_count; /* # of valid blocks in main area */ + __le32 rsvd_segment_count; /* # of reserved segments for gc */ + __le32 overprov_segment_count; /* # of overprovision segments */ + __le32 free_segment_count; /* # of free segments in main area */ + + /* information of current node segments */ + __le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS]; + __le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS]; + /* information of current data segments */ + __le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS]; + __le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS]; + __le32 ckpt_flags; /* Flags : umount and journal_present */ + __le32 cp_pack_total_block_count; /* total # of one cp pack */ + __le32 cp_pack_start_sum; /* start block number of data summary */ + __le32 valid_node_count; /* Total number of valid nodes */ + __le32 valid_inode_count; /* Total number of valid inodes */ + __le32 next_free_nid; /* Next free node number */ + __le32 sit_ver_bitmap_bytesize; /* Default value 64 */ + __le32 nat_ver_bitmap_bytesize; /* Default value 256 */ + __le32 checksum_offset; /* checksum offset inside cp block */ + __le64 elapsed_time; /* mounted time */ + /* allocation type of current segment */ + unsigned char alloc_type[MAX_ACTIVE_LOGS]; + + /* SIT and NAT version bitmap */ + unsigned char sit_nat_version_bitmap[1]; +} __packed; + +/* + * For orphan inode management + */ +#define F2FS_ORPHANS_PER_BLOCK 1020 + +struct f2fs_orphan_block { + __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */ + __le32 reserved; /* reserved */ + __le16 blk_addr; /* block index in current CP */ + __le16 blk_count; /* Number of orphan inode blocks in CP */ + __le32 entry_count; /* Total number of orphan nodes in current CP */ + __le32 check_sum; /* CRC32 for orphan inode block */ +} __packed; + +/* + * For NODE structure + */ +struct f2fs_extent { + __le32 fofs; /* start file offset of the extent */ + __le32 blk_addr; /* start block address of the extent */ + __le32 len; /* lengh of the extent */ +} __packed; + +#define F2FS_NAME_LEN 255 +#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ +#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ +#define ADDRS_PER_INODE(fi) addrs_per_inode(fi) +#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ + +#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) +#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) +#define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3) +#define NODE_IND2_BLOCK (DEF_ADDRS_PER_INODE + 4) +#define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5) + +#define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ + +struct f2fs_inode { + __le16 i_mode; /* file mode */ + __u8 i_advise; /* file hints */ + __u8 i_inline; /* file inline flags */ + __le32 i_uid; /* user ID */ + __le32 i_gid; /* group ID */ + __le32 i_links; /* links count */ + __le64 i_size; /* file size in bytes */ + __le64 i_blocks; /* file size in blocks */ + __le64 i_atime; /* access time */ + __le64 i_ctime; /* change time */ + __le64 i_mtime; /* modification time */ + __le32 i_atime_nsec; /* access time in nano scale */ + __le32 i_ctime_nsec; /* change time in nano scale */ + __le32 i_mtime_nsec; /* modification time in nano scale */ + __le32 i_generation; /* file version (for NFS) */ + __le32 i_current_depth; /* only for directory depth */ + __le32 i_xattr_nid; /* nid to save xattr */ + __le32 i_flags; /* file attributes */ + __le32 i_pino; /* parent inode number */ + __le32 i_namelen; /* file name length */ + __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */ + __u8 i_reserved2; /* for backward compatibility */ + + struct f2fs_extent i_ext; /* caching a largest extent */ + + __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ + + __le32 i_nid[5]; /* direct(2), indirect(2), + double_indirect(1) node id */ +} __packed; + +struct direct_node { + __le32 addr[ADDRS_PER_BLOCK]; /* array of data block address */ +} __packed; + +struct indirect_node { + __le32 nid[NIDS_PER_BLOCK]; /* array of data block address */ +} __packed; + +enum { + COLD_BIT_SHIFT = 0, + FSYNC_BIT_SHIFT, + DENT_BIT_SHIFT, + OFFSET_BIT_SHIFT +}; + +struct node_footer { + __le32 nid; /* node id */ + __le32 ino; /* inode nunmber */ + __le32 flag; /* include cold/fsync/dentry marks and offset */ + __le64 cp_ver; /* checkpoint version */ + __le32 next_blkaddr; /* next node page block address */ +} __packed; + +struct f2fs_node { + /* can be one of three types: inode, direct, and indirect types */ + union { + struct f2fs_inode i; + struct direct_node dn; + struct indirect_node in; + }; + struct node_footer footer; +} __packed; + +/* + * For NAT entries + */ +#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry)) + +struct f2fs_nat_entry { + __u8 version; /* latest version of cached nat entry */ + __le32 ino; /* inode number */ + __le32 block_addr; /* block address */ +} __packed; + +struct f2fs_nat_block { + struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK]; +} __packed; + +/* + * For SIT entries + * + * Each segment is 2MB in size by default so that a bitmap for validity of + * there-in blocks should occupy 64 bytes, 512 bits. + * Not allow to change this. + */ +#define SIT_VBLOCK_MAP_SIZE 64 +#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry)) + +/* + * Note that f2fs_sit_entry->vblocks has the following bit-field information. + * [15:10] : allocation type such as CURSEG_XXXX_TYPE + * [9:0] : valid block count + */ +#define SIT_VBLOCKS_SHIFT 10 +#define SIT_VBLOCKS_MASK ((1 << SIT_VBLOCKS_SHIFT) - 1) +#define GET_SIT_VBLOCKS(raw_sit) \ + (le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK) +#define GET_SIT_TYPE(raw_sit) \ + ((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK) \ + >> SIT_VBLOCKS_SHIFT) + +struct f2fs_sit_entry { + __le16 vblocks; /* reference above */ + __u8 valid_map[SIT_VBLOCK_MAP_SIZE]; /* bitmap for valid blocks */ + __le64 mtime; /* segment age for cleaning */ +} __packed; + +struct f2fs_sit_block { + struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK]; +} __packed; + +/* + * For segment summary + * + * One summary block contains exactly 512 summary entries, which represents + * exactly 2MB segment by default. Not allow to change the basic units. + * + * NOTE: For initializing fields, you must use set_summary + * + * - If data page, nid represents dnode's nid + * - If node page, nid represents the node page's nid. + * + * The ofs_in_node is used by only data page. It represents offset + * from node's page's beginning to get a data block address. + * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) + */ +#define ENTRIES_IN_SUM 512 +#define SUMMARY_SIZE (7) /* sizeof(struct summary) */ +#define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ +#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) + +/* a summary entry for a 4KB-sized block in a segment */ +struct f2fs_summary { + __le32 nid; /* parent node id */ + union { + __u8 reserved[3]; + struct { + __u8 version; /* node version number */ + __le16 ofs_in_node; /* block index in parent node */ + } __packed; + }; +} __packed; + +/* summary block type, node or data, is stored to the summary_footer */ +#define SUM_TYPE_NODE (1) +#define SUM_TYPE_DATA (0) + +struct summary_footer { + unsigned char entry_type; /* SUM_TYPE_XXX */ + __u32 check_sum; /* summary checksum */ +} __packed; + +#define SUM_JOURNAL_SIZE (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\ + SUM_ENTRY_SIZE) +#define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ + sizeof(struct nat_journal_entry)) +#define NAT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ + sizeof(struct nat_journal_entry)) +#define SIT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ + sizeof(struct sit_journal_entry)) +#define SIT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ + sizeof(struct sit_journal_entry)) +/* + * frequently updated NAT/SIT entries can be stored in the spare area in + * summary blocks + */ +enum { + NAT_JOURNAL = 0, + SIT_JOURNAL +}; + +struct nat_journal_entry { + __le32 nid; + struct f2fs_nat_entry ne; +} __packed; + +struct nat_journal { + struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES]; + __u8 reserved[NAT_JOURNAL_RESERVED]; +} __packed; + +struct sit_journal_entry { + __le32 segno; + struct f2fs_sit_entry se; +} __packed; + +struct sit_journal { + struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES]; + __u8 reserved[SIT_JOURNAL_RESERVED]; +} __packed; + +/* 4KB-sized summary block structure */ +struct f2fs_summary_block { + struct f2fs_summary entries[ENTRIES_IN_SUM]; + union { + __le16 n_nats; + __le16 n_sits; + }; + /* spare area is used by NAT or SIT journals */ + union { + struct nat_journal nat_j; + struct sit_journal sit_j; + }; + struct summary_footer footer; +} __packed; + +/* + * For directory operations + */ +#define F2FS_DOT_HASH 0 +#define F2FS_DDOT_HASH F2FS_DOT_HASH +#define F2FS_MAX_HASH (~((0x3ULL) << 62)) +#define F2FS_HASH_COL_BIT ((0x1ULL) << 63) + +typedef __le32 f2fs_hash_t; + +/* One directory entry slot covers 8bytes-long file name */ +#define F2FS_SLOT_LEN 8 +#define F2FS_SLOT_LEN_BITS 3 + +#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) + +/* the number of dentry in a block */ +#define NR_DENTRY_IN_BLOCK 214 + +/* MAX level for dir lookup */ +#define MAX_DIR_HASH_DEPTH 63 + +#define SIZE_OF_DIR_ENTRY 11 /* by byte */ +#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ + BITS_PER_BYTE) +#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ + F2FS_SLOT_LEN) * \ + NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) + +/* One directory entry slot representing F2FS_SLOT_LEN-sized file name */ +struct f2fs_dir_entry { + __le32 hash_code; /* hash code of file name */ + __le32 ino; /* inode number */ + __le16 name_len; /* lengh of file name */ + __u8 file_type; /* file type */ +} __packed; + +/* 4KB-sized directory entry block */ +struct f2fs_dentry_block { + /* validity bitmap for directory entries in each block */ + __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; + __u8 reserved[SIZE_OF_RESERVED]; + struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK]; + __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; +} __packed; + +/* file types used in inode_info->flags */ +enum { + F2FS_FT_UNKNOWN, + F2FS_FT_REG_FILE, + F2FS_FT_DIR, + F2FS_FT_CHRDEV, + F2FS_FT_BLKDEV, + F2FS_FT_FIFO, + F2FS_FT_SOCK, + F2FS_FT_SYMLINK, + F2FS_FT_MAX +}; + +#endif /* _LINUX_F2FS_FS_H */ diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 51da65b68b8..9dcdb6251cb 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -49,6 +49,8 @@ enum { FRA_TABLE, /* Extended table id */ FRA_FWMASK, /* mask for netfilter mark */ FRA_OIFNAME, + FRA_UID_START, /* UID range */ + FRA_UID_END, __FRA_MAX }; diff --git a/include/linux/fs.h b/include/linux/fs.h index cf7bc25928c..2e9d230ff95 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1743,6 +1743,19 @@ static inline void mark_inode_dirty_sync(struct inode *inode) __mark_inode_dirty(inode, I_DIRTY_SYNC); } +/** + * set_nlink - directly set an inode's link count + * @inode: inode + * @nlink: new nlink (should be non-zero) + * + * This is a low-level filesystem helper to replace any + * direct filesystem manipulation of i_nlink. + */ +static inline void set_nlink(struct inode *inode, unsigned int nlink) +{ + inode->i_nlink = nlink; +} + /** * inc_nlink - directly increment an inode's link count * @inode: inode @@ -2077,6 +2090,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) } #endif extern int sync_filesystem(struct super_block *); +extern void sync_filesystems(int wait); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; extern const struct file_operations bad_sock_fops; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d14e058aaee..8499db0657f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -117,8 +117,17 @@ extern struct group_info init_groups; extern struct cred init_cred; +extern struct task_group root_task_group; + +#ifdef CONFIG_CGROUP_SCHED +# define INIT_CGROUP_SCHED(tsk) \ + .sched_task_group = &root_task_group, +#else +# define INIT_CGROUP_SCHED(tsk) +#endif + #ifdef CONFIG_PERF_EVENTS -# define INIT_PERF_EVENTS(tsk) \ +# define INIT_PERF_EVENTS(tsk) \ .perf_event_mutex = \ __MUTEX_INITIALIZER(tsk.perf_event_mutex), \ .perf_event_list = LIST_HEAD_INIT(tsk.perf_event_list), @@ -126,6 +135,8 @@ extern struct cred init_cred; # define INIT_PERF_EVENTS(tsk) #endif +#define INIT_TASK_COMM "swapper" + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -153,6 +164,7 @@ extern struct cred init_cred; }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ INIT_PUSHABLE_TASKS(tsk) \ + INIT_CGROUP_SCHED(tsk) \ .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ .real_parent = &tsk, \ @@ -162,7 +174,7 @@ extern struct cred init_cred; .group_leader = &tsk, \ RCU_INIT_POINTER(.real_cred, &init_cred), \ RCU_INIT_POINTER(.cred, &init_cred), \ - .comm = "swapper", \ + .comm = INIT_TASK_COMM, \ .thread = INIT_THREAD, \ .fs = &init_fs, \ .files = &init_files, \ diff --git a/include/linux/input.h b/include/linux/input.h index 48857fddf9a..cf61dc05f57 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1139,6 +1139,18 @@ struct ff_effect { #include #include +/** + * struct input_value - input value representation + * @type: type of value (EV_KEY, EV_ABS, etc) + * @code: the value code + * @value: the value + */ +struct input_value { + __u16 type; + __u16 code; + __s32 value; +}; + /** * struct input_dev - represents an input device * @name: name of the device @@ -1215,7 +1227,6 @@ struct ff_effect { * last user closes the device * @going_away: marks devices that are in a middle of unregistering and * causes input_open_device*() fail with -ENODEV. - * @sync: set to %true when there were no new events since last EV_SYN * @dev: driver model's view of this device * @h_list: list of input handles associated with the device. When * accessing the list dev->mutex must be held @@ -1283,12 +1294,14 @@ struct input_dev { unsigned int users; bool going_away; - bool sync; - struct device dev; struct list_head h_list; struct list_head node; + + unsigned int num_vals; + unsigned int max_vals; + struct input_value *vals; }; #define to_input_dev(d) container_of(d, struct input_dev, dev) @@ -1349,6 +1362,9 @@ struct input_handle; * @event: event handler. This method is being called by input core with * interrupts disabled and dev->event_lock spinlock held and so * it may not sleep + * @events: event sequence handler. This method is being called by + * input core with interrupts disabled and dev->event_lock + * spinlock held and so it may not sleep * @filter: similar to @event; separates normal event handlers from * "filters". * @match: called after comparing device's id with handler's id_table @@ -1385,6 +1401,8 @@ struct input_handler { void *private; void (*event)(struct input_handle *handle, unsigned int type, unsigned int code, int value); + void (*events)(struct input_handle *handle, + const struct input_value *vals, unsigned int count); bool (*filter)(struct input_handle *handle, unsigned int type, unsigned int code, int value); bool (*match)(struct input_handler *handler, struct input_dev *dev); int (*connect)(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id); diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 5037a0ad231..6f63e1f2209 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -1,10 +1,10 @@ #ifndef IOCONTEXT_H #define IOCONTEXT_H +#include #include #include -struct cfq_queue; struct cfq_ttime { unsigned long last_end_request; @@ -16,12 +16,19 @@ struct cfq_ttime { struct cfq_io_context { void *key; - struct cfq_queue *cfqq[2]; + void *cfqq[2]; struct io_context *ioc; struct cfq_ttime ttime; + unsigned int wr_time_left; + unsigned int saved_idle_window; + unsigned int saved_IO_bound; + + unsigned int cooperations; + unsigned int failed_cooperations; + struct list_head queue_list; struct hlist_node cic_list; @@ -31,6 +38,16 @@ struct cfq_io_context { struct rcu_head rcu_head; }; +/* + * Indexes into the ioprio_changed bitmap. A bit set indicates that + * the corresponding I/O scheduler needs to see a ioprio update. + */ +enum { + IOC_CFQ_IOPRIO_CHANGED, + IOC_BFQ_IOPRIO_CHANGED, + IOC_IOPRIO_CHANGED_BITS +}; + /* * I/O subsystem state of the associated processes. It is refcounted * and kmalloc'ed. These could be shared between processes. @@ -43,7 +60,7 @@ struct io_context { spinlock_t lock; unsigned short ioprio; - unsigned short ioprio_changed; + DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS); #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) unsigned short cgroup_changed; @@ -57,6 +74,8 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; + struct radix_tree_root bfq_radix_root; + struct hlist_head bfq_cic_list; void __rcu *ioc_data; }; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0c997767429..15395001dc5 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -161,6 +161,7 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif + __s32 accept_ra_rt_table; __s32 proxy_ndp; __s32 accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -213,6 +214,7 @@ enum { DEVCONF_DISABLE_IPV6, DEVCONF_ACCEPT_DAD, DEVCONF_FORCE_TLLAO, + DEVCONF_ACCEPT_RA_RT_TABLE, DEVCONF_MAX }; diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 4fa09d4d0b7..6a9e8f5399e 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -1,20 +1,23 @@ #ifndef _LINUX_IRQ_WORK_H #define _LINUX_IRQ_WORK_H +#include + struct irq_work { - struct irq_work *next; + unsigned long flags; + struct llist_node llnode; void (*func)(struct irq_work *); }; static inline -void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *)) +void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) { - entry->next = NULL; - entry->func = func; + work->flags = 0; + work->func = func; } -bool irq_work_queue(struct irq_work *entry); +bool irq_work_queue(struct irq_work *work); void irq_work_run(void); -void irq_work_sync(struct irq_work *entry); +void irq_work_sync(struct irq_work *work); #endif /* _LINUX_IRQ_WORK_H */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c2478a342cd..e0f1cee6616 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -101,6 +101,10 @@ struct kimage { #define KEXEC_TYPE_CRASH 1 unsigned int preserve_context : 1; +#ifdef CONFIG_KEXEC_HARDBOOT + unsigned int hardboot : 1; +#endif + #ifdef ARCH_HAS_KIMAGE_ARCH struct kimage_arch arch; #endif @@ -165,6 +169,11 @@ extern struct kimage *kexec_crash_image; #define KEXEC_ON_CRASH 0x00000001 #define KEXEC_PRESERVE_CONTEXT 0x00000002 + +#ifdef CONFIG_KEXEC_HARDBOOT +#define KEXEC_HARDBOOT 0x00000004 +#endif + #define KEXEC_ARCH_MASK 0xffff0000 /* These values match the ELF architecture values. @@ -183,10 +192,14 @@ extern struct kimage *kexec_crash_image; #define KEXEC_ARCH_MIPS ( 8 << 16) /* List of defined/legal kexec flags */ -#ifndef CONFIG_KEXEC_JUMP -#define KEXEC_FLAGS KEXEC_ON_CRASH -#else +#if defined(CONFIG_KEXEC_JUMP) && defined(CONFIG_KEXEC_HARDBOOT) +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT | KEXEC_HARDBOOT) +#elif defined(CONFIG_KEXEC_JUMP) #define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT) +#elif defined(CONFIG_KEXEC_HARDBOOT) +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_HARDBOOT) +#else +#define KEXEC_FLAGS (KEXEC_ON_CRASH) #endif #define VMCOREINFO_BYTES (4096) diff --git a/include/linux/llist.h b/include/linux/llist.h index aa0c8b5b3cd..801b44b07aa 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -35,10 +35,30 @@ * * The basic atomic operation of this list is cmpxchg on long. On * architectures that don't have NMI-safe cmpxchg implementation, the - * list can NOT be used in NMI handler. So code uses the list in NMI - * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. + * list can NOT be used in NMI handlers. So code that uses the list in + * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. + * + * Copyright 2010,2011 Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include +#include +#include + struct llist_head { struct llist_node *first; }; @@ -113,14 +133,55 @@ static inline void init_llist_head(struct llist_head *list) * test whether the list is empty without deleting something from the * list. */ -static inline int llist_empty(const struct llist_head *head) +static inline bool llist_empty(const struct llist_head *head) { return ACCESS_ONCE(head->first) == NULL; } -void llist_add(struct llist_node *new, struct llist_head *head); -void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, - struct llist_head *head); -struct llist_node *llist_del_first(struct llist_head *head); -struct llist_node *llist_del_all(struct llist_head *head); +static inline struct llist_node *llist_next(struct llist_node *node) +{ + return node->next; +} + +/** + * llist_add - add a new entry + * @new: new entry to be added + * @head: the head for your lock-less list + * + * Returns true if the list was empty prior to adding this entry. + */ +static inline bool llist_add(struct llist_node *new, struct llist_head *head) +{ + struct llist_node *entry, *old_entry; + + entry = head->first; + for (;;) { + old_entry = entry; + new->next = entry; + entry = cmpxchg(&head->first, old_entry, new); + if (entry == old_entry) + break; + } + + return old_entry == NULL; +} + +/** + * llist_del_all - delete all entries from lock-less list + * @head: the head of lock-less list to delete all entries + * + * If list is empty, return NULL, otherwise, delete all entries and + * return the pointer to the first entry. The order of entries + * deleted is from the newest to the oldest added one. + */ +static inline struct llist_node *llist_del_all(struct llist_head *head) +{ + return xchg(&head->first, NULL); +} + +extern bool llist_add_batch(struct llist_node *new_first, + struct llist_node *new_last, + struct llist_head *head); +extern struct llist_node *llist_del_first(struct llist_head *head); + #endif /* LLIST_H */ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index ef820a3c378..b6a56e37284 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -548,7 +548,7 @@ do { \ #endif #ifdef CONFIG_PROVE_RCU -extern void lockdep_rcu_dereference(const char *file, const int line); +void lockdep_rcu_suspicious(const char *file, const int line, const char *s); #endif #endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/lz4.h b/include/linux/lz4.h new file mode 100644 index 00000000000..312f44939c7 --- /dev/null +++ b/include/linux/lz4.h @@ -0,0 +1,84 @@ +#ifndef __LZ4_H__ +#define __LZ4_H__ +/* + * LZ4 Kernel Interface + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define LZ4_MEM_COMPRESS (4096 * sizeof(unsigned char *)) +#define LZ4HC_MEM_COMPRESS (65538 * sizeof(unsigned char *)) + +/* + * LZ4_COMPRESSBOUND() + * Provides the maximum size that LZ4 may output in a "worst case" scenario + * (input data not compressible) + */ +#define LZ4_COMPRESSBOUND(isize) (isize + ((isize)/255) + 16) + +/* + * lz4_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + + /* + * lz4hc_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + +/* + * lz4_decompress() + * src : source address of the compressed data + * src_len : is the input size, whcih is returned after decompress done + * dest : output buffer address of the decompressed data + * actual_dest_len: is the size of uncompressed data, supposing it's known + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + * a bit faster than lz4_decompress_unknownoutputsize() + */ +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len); + +/* + * lz4_decompress_unknownoutputsize() + * src : source address of the compressed data + * src_len : is the input size, therefore the compressed size + * dest : output buffer address of the decompressed data + * dest_len: is the max size of the destination buffer, which is + * returned with actual size of decompressed data after + * decompress done + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + */ +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len); +#endif diff --git a/include/linux/lzo.h b/include/linux/lzo.h index d793497ec1c..a0848d9377e 100644 --- a/include/linux/lzo.h +++ b/include/linux/lzo.h @@ -4,28 +4,28 @@ * LZO Public Kernel Interface * A mini subset of the LZO real-time data compression library * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ -#define LZO1X_MEM_COMPRESS (16384 * sizeof(unsigned char *)) -#define LZO1X_1_MEM_COMPRESS LZO1X_MEM_COMPRESS +#define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short)) +#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS #define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3) -/* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */ +/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ int lzo1x_1_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem); + unsigned char *dst, size_t *dst_len, void *wrkmem); /* safe decompression with overrun testing */ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len); + unsigned char *dst, size_t *dst_len); /* * Return values (< 0 = Error) @@ -40,5 +40,6 @@ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, #define LZO_E_EOF_NOT_FOUND (-7) #define LZO_E_INPUT_NOT_CONSUMED (-8) #define LZO_E_NOT_YET_IMPLEMENTED (-9) +#define LZO_E_INVALID_ARGUMENT (-10) #endif diff --git a/include/linux/magic.h b/include/linux/magic.h index 1e5df2af8d8..2616b546e83 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -24,6 +24,7 @@ #define EXT4_SUPER_MAGIC 0xEF53 #define BTRFS_SUPER_MAGIC 0x9123683E #define NILFS_SUPER_MAGIC 0x3434 +#define F2FS_SUPER_MAGIC 0xF2F52010 #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 diff --git a/include/linux/memcopy.h b/include/linux/memcopy.h new file mode 100644 index 00000000000..9c65ac847f5 --- /dev/null +++ b/include/linux/memcopy.h @@ -0,0 +1,226 @@ +/* + * memcopy.h -- definitions for memory copy functions. Generic C version. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * The code is derived from the GNU C Library. + * Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc. + */ +#ifndef _LINUX_MEMCOPY_H_ +#define _LINUX_MEMCOPY_H_ + +/* + * The strategy of the memory functions is: + * + * 1. Copy bytes until the destination pointer is aligned. + * + * 2. Copy words in unrolled loops. If the source and destination + * are not aligned in the same way, use word memory operations, + * but shift and merge two read words before writing. + * + * 3. Copy the few remaining bytes. + * + * This is fast on processors that have at least 10 registers for + * allocation by GCC, and that can access memory at reg+const in one + * instruction. + */ + +#include +#include +#include + +/* + * The macros defined in this file are: + * + * BYTE_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy) + * + * BYTE_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy) + * + * WORD_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_remaining, nbytes_to_copy) + * + * WORD_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_remaining, nbytes_to_copy) + * + * MERGE(old_word, sh_1, new_word, sh_2) + * + * MEM_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy) + * + * MEM_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy) + */ + +#define OP_T_THRESHOLD 16 + +/* + * Type to use for aligned memory operations. + * This should normally be the biggest type supported by a single load + * and store. + */ +#define op_t unsigned long int +#define OPSIZ (sizeof(op_t)) + +/* Type to use for unaligned operations. */ +typedef unsigned char byte; + +#ifndef MERGE +# ifdef __LITTLE_ENDIAN +# define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) +# elif defined(__BIG_ENDIAN) +# define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2))) +# else +# error "Macro MERGE() hasn't defined!" +# endif +#endif + +/* + * Copy exactly NBYTES bytes from SRC_BP to DST_BP, + * without any assumptions about alignment of the pointers. + */ +#ifndef BYTE_COPY_FWD +#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ +do { \ + size_t __nbytes = (nbytes); \ + while (__nbytes > 0) { \ + byte __x = ((byte *) src_bp)[0]; \ + src_bp += 1; \ + __nbytes -= 1; \ + ((byte *) dst_bp)[0] = __x; \ + dst_bp += 1; \ + } \ +} while (0) +#endif + +/* + * Copy exactly NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR, + * beginning at the bytes right before the pointers and continuing towards + * smaller addresses. Don't assume anything about alignment of the + * pointers. + */ +#ifndef BYTE_COPY_BWD +#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \ +do { \ + size_t __nbytes = (nbytes); \ + while (__nbytes > 0) { \ + byte __x; \ + src_ep -= 1; \ + __x = ((byte *) src_ep)[0]; \ + dst_ep -= 1; \ + __nbytes -= 1; \ + ((byte *) dst_ep)[0] = __x; \ + } \ +} while (0) +#endif +/* + * Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with + * the assumption that DST_BP is aligned on an OPSIZ multiple. If + * not all bytes could be easily copied, store remaining number of bytes + * in NBYTES_LEFT, otherwise store 0. + */ +extern void _wordcopy_fwd_aligned(long int, long int, size_t); +extern void _wordcopy_fwd_dest_aligned(long int, long int, size_t); +#ifndef WORD_COPY_FWD +#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ +do { \ + if (src_bp % OPSIZ == 0) \ + _wordcopy_fwd_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \ + else \ + _wordcopy_fwd_dest_aligned (dst_bp, src_bp, (nbytes) / OPSIZ);\ + \ + src_bp += (nbytes) & -OPSIZ; \ + dst_bp += (nbytes) & -OPSIZ; \ + (nbytes_left) = (nbytes) % OPSIZ; \ +} while (0) +#endif + +/* + * Copy *up to* NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR, + * beginning at the words (of type op_t) right before the pointers and + * continuing towards smaller addresses. May take advantage of that + * DST_END_PTR is aligned on an OPSIZ multiple. If not all bytes could be + * easily copied, store remaining number of bytes in NBYTES_REMAINING, + * otherwise store 0. + */ +extern void _wordcopy_bwd_aligned(long int, long int, size_t); +extern void _wordcopy_bwd_dest_aligned(long int, long int, size_t); +#ifndef WORD_COPY_BWD +#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \ +do { \ + if (src_ep % OPSIZ == 0) \ + _wordcopy_bwd_aligned (dst_ep, src_ep, (nbytes) / OPSIZ); \ + else \ + _wordcopy_bwd_dest_aligned (dst_ep, src_ep, (nbytes) / OPSIZ);\ + \ + src_ep -= (nbytes) & -OPSIZ; \ + dst_ep -= (nbytes) & -OPSIZ; \ + (nbytes_left) = (nbytes) % OPSIZ; \ +} while (0) +#endif + +/* Copy memory from the beginning to the end */ +#ifndef MEM_COPY_FWD +static __always_inline void mem_copy_fwd(unsigned long dstp, + unsigned long srcp, + size_t count) +{ + /* If there not too few bytes to copy, use word copy. */ + if (count >= OP_T_THRESHOLD) { + /* Copy just a few bytes to make dstp aligned. */ + count -= (-dstp) % OPSIZ; + BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ); + + /* + * Copy from srcp to dstp taking advantage of the known + * alignment of dstp. Number if bytes remaining is put in + * the third argument. + */ + WORD_COPY_FWD(dstp, srcp, count, count); + + /* Fall out and copy the tail. */ + } + + /* There are just a few bytes to copy. Use byte memory operations. */ + BYTE_COPY_FWD(dstp, srcp, count); +} +#endif + +/* Copy memory from the end to the beginning. */ +#ifndef MEM_COPY_BWD +static __always_inline void mem_copy_bwd(unsigned long dstp, + unsigned long srcp, + size_t count) +{ + srcp += count; + dstp += count; + + /* If there not too few bytes to copy, use word copy. */ + if (count >= OP_T_THRESHOLD) { + /* Copy just a few bytes to make dstp aligned. */ + count -= dstp % OPSIZ; + BYTE_COPY_BWD(dstp, srcp, dstp % OPSIZ); + + /* + * Copy from srcp to dstp taking advantage of the known + * alignment of dstp. Number if bytes remaining is put in + * the third argument. + */ + WORD_COPY_BWD(dstp, srcp, count, count); + + /* Fall out and copy the tail. */ + } + + /* There are just a few bytes to copy. Use byte memory operations. */ + BYTE_COPY_BWD (dstp, srcp, count); +} +#endif + +#endif diff --git a/include/linux/mm.h b/include/linux/mm.h index d1d9840093f..f6ce763ac32 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1434,8 +1434,8 @@ int write_one_page(struct page *page, int wait); void task_dirty_inc(struct task_struct *tsk); /* readahead.c */ -#define VM_MAX_READAHEAD 128 /* kbytes */ -#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ +#define VM_MAX_READAHEAD 256 /* kbytes */ +#define VM_MIN_READAHEAD 32 /* kbytes (includes current page) */ int force_page_cache_readahead(struct address_space *mapping, struct file *filp, pgoff_t offset, unsigned long nr_to_read); diff --git a/include/linux/mpu.h b/include/linux/mpu.h index 5105fb201a7..4391226152c 100644 --- a/include/linux/mpu.h +++ b/include/linux/mpu.h @@ -86,6 +86,7 @@ enum ext_slave_id { * @sec_slave_id: id of the secondary slave device * @secondary_i2c_address: secondary device's i2c address * @secondary_orientation: secondary device's orientation matrix + * @key: key for MPL library. * * Contains platform specific information on how to configure the MPU3050 to * work on this platform. The orientation matricies are 3x3 rotation matricies diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e90a673be67..d7fc4ee692e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -107,6 +107,7 @@ enum pageflags { #ifdef CONFIG_TRANSPARENT_HUGEPAGE PG_compound_lock, #endif + PG_readahead, /* page in a readahead window */ __NR_PAGEFLAGS, /* Filesystems */ diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h index be63f01630a..f09793d0eba 100644 --- a/include/linux/pm_qos_params.h +++ b/include/linux/pm_qos_params.h @@ -29,7 +29,7 @@ enum { #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 #define PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE 0 -#define PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE LONG_MAX +#define PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE 4 #define PM_QOS_CPU_FREQ_MIN_DEFAULT_VALUE 0 #define PM_QOS_CPU_FREQ_MAX_DEFAULT_VALUE LONG_MAX diff --git a/include/linux/prctl.h b/include/linux/prctl.h index a3baeb2c216..b2ad45b73f4 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -102,4 +102,16 @@ #define PR_MCE_KILL_GET 34 +/* + * Get effective timerslack value for the process. + * It can be higher than PR_GET_TIMERSLACK. + */ +#define PR_GET_EFFECTIVE_TIMERSLACK 35 + +/* Sets the timerslack for arbitrary threads + * arg2 slack value, 0 means "use default" + * arg3 pid of the thread whose timer slack needs to be set + */ +#define PR_SET_TIMERSLACK_PID 41 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8f4f881a0ad..8e7470d8b67 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -297,19 +297,31 @@ extern int rcu_my_thread_group_empty(void); /** * rcu_lockdep_assert - emit lockdep splat if specified condition not met * @c: condition to check + * @s: informative message */ -#define rcu_lockdep_assert(c) \ +#define rcu_lockdep_assert(c, s) \ do { \ static bool __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ __warned = true; \ - lockdep_rcu_dereference(__FILE__, __LINE__); \ + lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ } \ } while (0) +#define rcu_sleep_check() \ + do { \ + rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ + "Illegal context switch in RCU-bh" \ + " read-side critical section"); \ + rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \ + "Illegal context switch in RCU-sched"\ + " read-side critical section"); \ + } while (0) + #else /* #ifdef CONFIG_PROVE_RCU */ -#define rcu_lockdep_assert(c) do { } while (0) +#define rcu_lockdep_assert(c, s) do { } while (0) +#define rcu_sleep_check() do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ @@ -338,14 +350,16 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_check(p, c, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - rcu_lockdep_assert(c); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \ + " usage"); \ rcu_dereference_sparse(p, space); \ smp_read_barrier_depends(); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ - rcu_lockdep_assert(c); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \ + " usage"); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) @@ -359,7 +373,9 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_index_check(p, c) \ ({ \ typeof(p) _________p1 = ACCESS_ONCE(p); \ - rcu_lockdep_assert(c); \ + rcu_lockdep_assert(c, \ + "suspicious rcu_dereference_index_check()" \ + " usage"); \ smp_read_barrier_depends(); \ (_________p1); \ }) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8e872ead88b..97a95808012 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -283,6 +283,8 @@ enum rtattr_type_t { RTA_MP_ALGO, /* no longer used */ RTA_TABLE, RTA_MARK, + RTA_MFC_STATS, /* not used - backported from the future */ + RTA_UID, __RTA_MAX }; diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index 34701241b67..d5b13bc07a0 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -22,7 +22,7 @@ */ struct rw_semaphore { __s32 activity; - spinlock_t wait_lock; + raw_spinlock_t wait_lock; struct list_head wait_list; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 6a6741440cb..63d40655439 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -25,7 +25,7 @@ struct rw_semaphore; /* All arch specific implementations share the same struct */ struct rw_semaphore { long count; - spinlock_t wait_lock; + raw_spinlock_t wait_lock; struct list_head wait_list; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -56,9 +56,11 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) # define __RWSEM_DEP_MAP_INIT(lockname) #endif -#define __RWSEM_INITIALIZER(name) \ - { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED(name.wait_lock), \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, \ + __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ + LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) diff --git a/include/linux/sched.h b/include/linux/sched.h index 5bb4dd2e4c5..370b48247dc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -90,6 +90,7 @@ struct sched_param { #include #include #include +#include #include @@ -122,7 +123,7 @@ extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); #define FSHIFT 11 /* nr of bits of precision */ #define FIXED_1 (1<thread_info->cpu access. No-op on UP. @@ -2632,7 +2659,7 @@ extern int sched_group_set_rt_period(struct task_group *tg, extern long sched_group_rt_period(struct task_group *tg); extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); #endif -#endif +#endif /* CONFIG_CGROUP_SCHED */ extern int task_can_switch_user(struct user_struct *up, struct task_struct *tsk); @@ -2714,6 +2741,16 @@ static inline unsigned long rlimit_max(unsigned int limit) return task_rlimit_max(current, limit); } +#ifdef CONFIG_CGROUP_TIMER_SLACK +extern unsigned long task_get_effective_timer_slack(struct task_struct *tsk); +#else +static inline unsigned long task_get_effective_timer_slack( + struct task_struct *tsk) +{ + return tsk->timer_slack_ns; +} +#endif + #endif /* __KERNEL__ */ #endif diff --git a/include/linux/security.h b/include/linux/security.h index ebd2a53a3d0..60209254b64 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -6,6 +6,7 @@ * Copyright (C) 2001 Networks Associates Technology, Inc * Copyright (C) 2001 James Morris * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group) + * Copyright (c) 2014 XPerience(R) Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,6 +37,7 @@ #include #include #include +#include #include /* Maximum number of letters for an LSM name string */ @@ -147,6 +149,10 @@ extern int mmap_min_addr_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif +/* security_inode_init_security callback function to write xattrs */ +typedef int (*initxattrs) (struct inode *inode, + const struct xattr *xattr_array, void *fs_data); + #ifdef CONFIG_SECURITY struct security_mnt_opts { @@ -1375,6 +1381,11 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) struct security_operations { char name[SECURITY_NAME_MAX + 1]; + int (*binder_set_context_mgr) (struct task_struct *mgr); + int (*binder_transaction) (struct task_struct *from, struct task_struct *to); + int (*binder_transfer_binder) (struct task_struct *from, struct task_struct *to); + int (*binder_transfer_file) (struct task_struct *from, struct task_struct *to, struct file *file); + int (*ptrace_access_check) (struct task_struct *child, unsigned int mode); int (*ptrace_traceme) (struct task_struct *parent); int (*capget) (struct task_struct *target, @@ -1657,6 +1668,10 @@ extern int security_module_enable(struct security_operations *ops); extern int register_security(struct security_operations *ops); /* Security operations */ +int security_binder_set_context_mgr(struct task_struct *mgr); +int security_binder_transaction(struct task_struct *from, struct task_struct *to); +int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to); +int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file); int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); int security_capget(struct task_struct *target, @@ -1706,6 +1721,9 @@ void security_inode_free(struct inode *inode); int security_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, char **name, void **value, size_t *len); +int security_new_inode_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, + initxattrs initxattrs, void *fs_data); int security_inode_create(struct inode *dir, struct dentry *dentry, int mode); int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry); @@ -1836,6 +1854,26 @@ static inline int security_init(void) return 0; } +static inline int security_binder_set_context_mgr(struct task_struct *mgr) +{ + return 0; +} + +static inline int security_binder_transaction(struct task_struct *from, struct task_struct *to) +{ + return 0; +} + +static inline int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to) +{ + return 0; +} + +static inline int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file) +{ + return 0; +} + static inline int security_ptrace_access_check(struct task_struct *child, unsigned int mode) { @@ -2038,7 +2076,16 @@ static inline int security_inode_init_security(struct inode *inode, void **value, size_t *len) { - return -EOPNOTSUPP; + return 0; +} + +static inline int security_new_inode_init_security(struct inode *inode, + struct inode *dir, + const struct qstr *qstr, + initxattrs initxattrs, + void *fs_data) +{ + return 0; } static inline int security_inode_create(struct inode *dir, diff --git a/include/linux/smb347-charger.h b/include/linux/smb347-charger.h index ac7ccbdc087..83c97c96e48 100644 --- a/include/linux/smb347-charger.h +++ b/include/linux/smb347-charger.h @@ -74,6 +74,7 @@ struct smb347_charger { struct wake_lock wake_lock_dockin; struct mutex cable_lock; struct mutex dockin_lock; + struct mutex pinctrl_lock; void *charger_cb_data; enum charging_states state; enum charger_type chrg_type; diff --git a/include/linux/sweep2wake.h b/include/linux/sweep2wake.h new file mode 100644 index 00000000000..a1e2f5c3e16 --- /dev/null +++ b/include/linux/sweep2wake.h @@ -0,0 +1,23 @@ +/* +* include/linux/sweep2wake.h +* +* Copyright (c) 2013, Aaron Segaert (flar2) asegaert at gmail.com +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +* more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ + + +extern void sweep2wake_setdev(struct input_dev * input_device); + diff --git a/include/linux/topology.h b/include/linux/topology.h index fc839bfa793..e26db031303 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -201,6 +201,10 @@ int arch_update_cpu_topology(void); .balance_interval = 64, \ } +#ifndef SD_NODES_PER_DOMAIN +#define SD_NODES_PER_DOMAIN 16 +#endif + #ifdef CONFIG_SCHED_BOOK #ifndef SD_BOOK_INIT #error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!! diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h new file mode 100644 index 00000000000..7ce50f0debc --- /dev/null +++ b/include/linux/wakeup_reason.h @@ -0,0 +1,23 @@ +/* + * include/linux/wakeup_reason.h + * + * Logs the reason which caused the kernel to resume + * from the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_WAKEUP_REASON_H +#define _LINUX_WAKEUP_REASON_H + +void log_wakeup_reason(int irq); + +#endif /* _LINUX_WAKEUP_REASON_H */ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 0d556deb497..e228ca9e1b5 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -289,12 +289,16 @@ enum { * * system_freezable_wq is equivalent to system_wq except that it's * freezable. + * + * system_nrt_freezable_wq is equivalent to system_nrt_wq except that + * it's freezable. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_nrt_wq; extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; +extern struct workqueue_struct *system_nrt_freezable_wq; extern struct workqueue_struct * __alloc_workqueue_key(const char *name, unsigned int flags, int max_active, diff --git a/include/linux/xattr.h b/include/linux/xattr.h index aed54c50aa6..7a378662ddf 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -67,6 +67,12 @@ struct xattr_handler { size_t size, int flags, int handler_flags); }; +struct xattr { + char *name; + void *value; + size_t value_len; +}; + ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index cbc6bb0a683..f48830293d8 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -166,6 +166,8 @@ extern int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr extern int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table); + /* Device notifier */ extern int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 075f1e3a0fe..52e77a366bf 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -23,6 +23,8 @@ struct fib_rule { struct fib_rule __rcu *ctarget; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + uid_t uid_start; + uid_t uid_end; struct rcu_head rcu; struct net * fr_net; }; @@ -79,7 +81,9 @@ struct fib_rules_ops { [FRA_FWMARK] = { .type = NLA_U32 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 } + [FRA_GOTO] = { .type = NLA_U32 }, \ + [FRA_UID_START] = { .type = NLA_U32 }, \ + [FRA_UID_END] = { .type = NLA_U32 } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 57f15a7f1cd..1beab06a4ce 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -23,6 +23,7 @@ struct flowi_common { #define FLOWI_FLAG_PRECOW_METRICS 0x02 #define FLOWI_FLAG_CAN_SLEEP 0x04 __u32 flowic_secid; + uid_t flowic_uid; }; union flowi_uli { @@ -59,6 +60,7 @@ struct flowi4 { #define flowi4_proto __fl_common.flowic_proto #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid +#define flowi4_uid __fl_common.flowic_uid __be32 daddr; __be32 saddr; union flowi_uli uli; @@ -75,7 +77,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be32 sport) + __be16 dport, __be32 sport, + uid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = 0; @@ -85,6 +88,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_proto = proto; fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; @@ -102,6 +106,7 @@ struct flowi6 { #define flowi6_proto __fl_common.flowic_proto #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; __be32 flowlabel; @@ -145,6 +150,7 @@ struct flowi { #define flowi_proto u.__fl_common.flowic_proto #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b897d6e6d0a..da4d79faee3 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -87,6 +87,7 @@ struct inet_request_sock { no_srccheck: 1; kmemcheck_bitfield_end(flags); struct ip_options_rcu *opt; + u32 ir_mark; }; static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) @@ -94,6 +95,15 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } +static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb) +{ + if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) { + return skb->mark; + } else { + return sk->sk_mark; + } +} + struct inet_cork { unsigned int flags; __be32 addr; diff --git a/include/net/ip.h b/include/net/ip.h index aa76c7a4d9c..d7c988fe292 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -165,6 +165,7 @@ struct ip_reply_arg { int csumoffset; /* u16 offset of csum in iov[0].iov_base */ /* -1 if not needed */ int bound_dev_if; + uid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 @@ -236,6 +237,9 @@ extern void ipfrag_init(void); extern void ip_static_sysctl_init(void); +#define IP4_REPLY_MARK(net, mark) \ + ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + static inline bool ip_is_fragment(const struct iphdr *iph) { return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3b5ac1fbff3..d32527d2715 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -109,6 +109,9 @@ struct frag_hdr { #define IP6_MF 0x0001 +#define IP6_REPLY_MARK(net, mark) \ + ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) + #include /* sysctls */ @@ -243,6 +246,14 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl) atomic_dec(&fl->users); } +extern void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info); + +int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len); + +struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, + struct sock *sk, struct flowi6 *fl6); + extern int ip6_ra_control(struct sock *sk, int sel); extern int ipv6_parse_hopopts(struct sk_buff *skb); @@ -285,6 +296,18 @@ static inline int ipv6_addr_src_scope(const struct in6_addr *addr) return __ipv6_addr_src_scope(__ipv6_addr_type(addr)); } +static inline bool __ipv6_addr_needs_scope_id(int type) +{ + return type & IPV6_ADDR_LINKLOCAL || + (type & IPV6_ADDR_MULTICAST && + (type & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL))); +} + +static inline __u32 ipv6_iface_scope_id(const struct in6_addr *addr, int iface) +{ + return __ipv6_addr_needs_scope_id(__ipv6_addr_type(addr)) ? iface : 0; +} + static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2) { return memcmp(a1, a2, sizeof(struct in6_addr)); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d786b4fc02a..76ebd40d524 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -55,6 +55,8 @@ struct netns_ipv4 { int current_rt_cache_rebuild_count; unsigned int sysctl_ping_group_range[2]; + int sysctl_fwmark_reflect; + int sysctl_tcp_fwmark_accept; atomic_t rt_genid; atomic_t dev_addr_genid; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 81abfcb2eb4..20b76abcb15 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -25,6 +25,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_mtu_expires; int ip6_rt_min_advmss; int icmpv6_time; + int fwmark_reflect; }; struct netns_ipv6 { diff --git a/include/net/ping.h b/include/net/ping.h index 682b5ae9af5..c103135efe2 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -13,6 +13,7 @@ #ifndef _PING_H #define _PING_H +#include #include /* PING_HTABLE_SIZE must be power of 2 */ @@ -28,6 +29,18 @@ */ #define GID_T_MAX (((gid_t)~0U) >> 1) +/* Compatibility glue so we can support IPv6 when it's compiled as a module */ +struct pingv6_ops { + int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len); + int (*datagram_recv_ctl)(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb); + int (*icmpv6_err_convert)(u8 type, u8 code, int *err); + void (*ipv6_icmp_error)(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload); + int (*ipv6_chk_addr)(struct net *net, const struct in6_addr *addr, + struct net_device *dev, int strict); +}; + struct ping_table { struct hlist_nulls_head hash[PING_HTABLE_SIZE]; rwlock_t lock; @@ -39,10 +52,40 @@ struct ping_iter_state { }; extern struct proto ping_prot; +extern struct ping_table ping_table; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +extern struct pingv6_ops pingv6_ops; +#endif +struct pingfakehdr { + struct icmphdr icmph; + struct iovec *iov; + sa_family_t family; + __wsum wcheck; +}; -extern void ping_rcv(struct sk_buff *); -extern void ping_err(struct sk_buff *, u32 info); +int ping_get_port(struct sock *sk, unsigned short ident); +void ping_hash(struct sock *sk); +void ping_unhash(struct sock *sk); + +int ping_init_sock(struct sock *sk); +void ping_close(struct sock *sk, long timeout); +int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len); +void ping_err(struct sk_buff *skb, int offset, u32 info); +void ping_v4_err(struct sk_buff *skb, u32 info); +int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, + struct sk_buff *); + +int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len); +int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, + void *user_icmph, size_t icmph_len); +int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len); +int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len); +int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +void ping_rcv(struct sk_buff *skb); #ifdef CONFIG_PROC_FS extern int __init ping_proc_init(void); @@ -50,6 +93,7 @@ extern void ping_proc_exit(void); #endif void __init ping_init(void); - +int __init pingv6_init(void); +void pingv6_exit(void); #endif /* _PING_H */ diff --git a/include/net/route.h b/include/net/route.h index db7b3432f07..5e9519ed80e 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -60,6 +60,7 @@ struct rtable { int rt_iif; int rt_oif; __u32 rt_mark; + uid_t rt_uid; /* Info on neighbour */ __be32 rt_gateway; @@ -146,7 +147,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sk ? sock_i_uid(sk) : 0); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -250,7 +251,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_CAN_SLEEP; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sock_i_uid(sk)); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/include/net/tcp.h b/include/net/tcp.h index 4fcd77af405..97cee56cde3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -252,6 +252,7 @@ extern int sysctl_tcp_max_ssthresh; extern int sysctl_tcp_cookie_size; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; +extern int sysctl_tcp_default_init_rwnd; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 498433dd067..48b42ea9c2f 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -11,6 +11,7 @@ extern struct proto rawv6_prot; extern struct proto udpv6_prot; extern struct proto udplitev6_prot; extern struct proto tcpv6_prot; +extern struct proto pingv6_prot; struct flowi6; @@ -21,6 +22,8 @@ extern int ipv6_frag_init(void); extern void ipv6_frag_exit(void); /* transport protocols */ +extern int pingv6_init(void); +extern void pingv6_exit(void); extern int rawv6_init(void); extern void rawv6_exit(void); extern int udpv6_init(void); diff --git a/include/trace/events/cpufreq_interactive.h b/include/trace/events/cpufreq_interactive.h index bcef5053be8..951e6ca12da 100644 --- a/include/trace/events/cpufreq_interactive.h +++ b/include/trace/events/cpufreq_interactive.h @@ -8,7 +8,7 @@ DECLARE_EVENT_CLASS(set, TP_PROTO(u32 cpu_id, unsigned long targfreq, - unsigned long actualfreq), + unsigned long actualfreq), TP_ARGS(cpu_id, targfreq, actualfreq), TP_STRUCT__entry( @@ -28,13 +28,7 @@ DECLARE_EVENT_CLASS(set, __entry->actualfreq) ); -DEFINE_EVENT(set, cpufreq_interactive_up, - TP_PROTO(u32 cpu_id, unsigned long targfreq, - unsigned long actualfreq), - TP_ARGS(cpu_id, targfreq, actualfreq) -); - -DEFINE_EVENT(set, cpufreq_interactive_down, +DEFINE_EVENT(set, cpufreq_interactive_setspeed, TP_PROTO(u32 cpu_id, unsigned long targfreq, unsigned long actualfreq), TP_ARGS(cpu_id, targfreq, actualfreq) @@ -42,68 +36,74 @@ DEFINE_EVENT(set, cpufreq_interactive_down, DECLARE_EVENT_CLASS(loadeval, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curfreq, unsigned long targfreq), - TP_ARGS(cpu_id, load, curfreq, targfreq), + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg), TP_STRUCT__entry( __field(unsigned long, cpu_id ) __field(unsigned long, load ) - __field(unsigned long, curfreq ) - __field(unsigned long, targfreq ) + __field(unsigned long, curtarg ) + __field(unsigned long, curactual ) + __field(unsigned long, newtarg ) ), TP_fast_assign( __entry->cpu_id = cpu_id; __entry->load = load; - __entry->curfreq = curfreq; - __entry->targfreq = targfreq; + __entry->curtarg = curtarg; + __entry->curactual = curactual; + __entry->newtarg = newtarg; ), - TP_printk("cpu=%lu load=%lu cur=%lu targ=%lu", - __entry->cpu_id, __entry->load, __entry->curfreq, - __entry->targfreq) + TP_printk("cpu=%lu load=%lu cur=%lu actual=%lu targ=%lu", + __entry->cpu_id, __entry->load, __entry->curtarg, + __entry->curactual, __entry->newtarg) ); DEFINE_EVENT(loadeval, cpufreq_interactive_target, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curfreq, unsigned long targfreq), - TP_ARGS(cpu_id, load, curfreq, targfreq) + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) ); DEFINE_EVENT(loadeval, cpufreq_interactive_already, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curfreq, unsigned long targfreq), - TP_ARGS(cpu_id, load, curfreq, targfreq) + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) ); DEFINE_EVENT(loadeval, cpufreq_interactive_notyet, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curfreq, unsigned long targfreq), - TP_ARGS(cpu_id, load, curfreq, targfreq) + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) ); TRACE_EVENT(cpufreq_interactive_boost, - TP_PROTO(unsigned long freq), - TP_ARGS(freq), + TP_PROTO(const char *s), + TP_ARGS(s), TP_STRUCT__entry( - __field(unsigned long, freq) + __string(s, s) ), TP_fast_assign( - __entry->freq = freq; + __assign_str(s, s); ), - TP_printk("freq=%lu", __entry->freq) + TP_printk("%s", __get_str(s)) ); TRACE_EVENT(cpufreq_interactive_unboost, - TP_PROTO(unsigned long freq), - TP_ARGS(freq), + TP_PROTO(const char *s), + TP_ARGS(s), TP_STRUCT__entry( - __field(unsigned long, freq) + __string(s, s) ), TP_fast_assign( - __entry->freq = freq; + __assign_str(s, s); ), - TP_printk("freq=%lu", __entry->freq) + TP_printk("%s", __get_str(s)) ); #endif /* _TRACE_CPUFREQ_INTERACTIVE_H */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h new file mode 100644 index 00000000000..52ae54828ed --- /dev/null +++ b/include/trace/events/f2fs.h @@ -0,0 +1,682 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM f2fs + +#if !defined(_TRACE_F2FS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_F2FS_H + +#include + +#define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev) +#define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino + +#define show_block_type(type) \ + __print_symbolic(type, \ + { NODE, "NODE" }, \ + { DATA, "DATA" }, \ + { META, "META" }, \ + { META_FLUSH, "META_FLUSH" }) + +#define show_bio_type(type) \ + __print_symbolic(type, \ + { READ, "READ" }, \ + { READA, "READAHEAD" }, \ + { READ_SYNC, "READ_SYNC" }, \ + { WRITE, "WRITE" }, \ + { WRITE_SYNC, "WRITE_SYNC" }, \ + { WRITE_FLUSH, "WRITE_FLUSH" }, \ + { WRITE_FUA, "WRITE_FUA" }) + +#define show_data_type(type) \ + __print_symbolic(type, \ + { CURSEG_HOT_DATA, "Hot DATA" }, \ + { CURSEG_WARM_DATA, "Warm DATA" }, \ + { CURSEG_COLD_DATA, "Cold DATA" }, \ + { CURSEG_HOT_NODE, "Hot NODE" }, \ + { CURSEG_WARM_NODE, "Warm NODE" }, \ + { CURSEG_COLD_NODE, "Cold NODE" }, \ + { NO_CHECK_TYPE, "No TYPE" }) + +#define show_gc_type(type) \ + __print_symbolic(type, \ + { FG_GC, "Foreground GC" }, \ + { BG_GC, "Background GC" }) + +#define show_alloc_mode(type) \ + __print_symbolic(type, \ + { LFS, "LFS-mode" }, \ + { SSR, "SSR-mode" }) + +#define show_victim_policy(type) \ + __print_symbolic(type, \ + { GC_GREEDY, "Greedy" }, \ + { GC_CB, "Cost-Benefit" }) + +struct victim_sel_policy; + +DECLARE_EVENT_CLASS(f2fs__inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(ino_t, pino) + __field(umode_t, mode) + __field(loff_t, size) + __field(unsigned int, nlink) + __field(blkcnt_t, blocks) + __field(__u8, advise) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pino = F2FS_I(inode)->i_pino; + __entry->mode = inode->i_mode; + __entry->nlink = inode->i_nlink; + __entry->size = inode->i_size; + __entry->blocks = inode->i_blocks; + __entry->advise = F2FS_I(inode)->i_advise; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pino = %lu, i_mode = 0x%hx, " + "i_size = %lld, i_nlink = %u, i_blocks = %llu, i_advise = 0x%x", + show_dev_ino(__entry), + (unsigned long)__entry->pino, + __entry->mode, + __entry->size, + (unsigned int)__entry->nlink, + (unsigned long long)__entry->blocks, + (unsigned char)__entry->advise) +); + +DECLARE_EVENT_CLASS(f2fs__inode_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, ret = %d", + show_dev_ino(__entry), + __entry->ret) +); + +DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +TRACE_EVENT(f2fs_sync_file_exit, + + TP_PROTO(struct inode *inode, bool need_cp, int datasync, int ret), + + TP_ARGS(inode, need_cp, datasync, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(bool, need_cp) + __field(int, datasync) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->need_cp = need_cp; + __entry->datasync = datasync; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, " + "datasync = %d, ret = %d", + show_dev_ino(__entry), + __entry->need_cp ? "needed" : "not needed", + __entry->datasync, + __entry->ret) +); + +TRACE_EVENT(f2fs_sync_fs, + + TP_PROTO(struct super_block *sb, int wait), + + TP_ARGS(sb, wait), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, dirty) + __field(int, wait) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->dirty = F2FS_SB(sb)->s_dirty; + __entry->wait = wait; + ), + + TP_printk("dev = (%d,%d), superblock is %s, wait = %d", + show_dev(__entry), + __entry->dirty ? "dirty" : "not dirty", + __entry->wait) +); + +DEFINE_EVENT(f2fs__inode, f2fs_iget, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_iget_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__inode, f2fs_evict_inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_new_inode, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +TRACE_EVENT(f2fs_unlink_enter, + + TP_PROTO(struct inode *dir, struct dentry *dentry), + + TP_ARGS(dir, dentry), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, size) + __field(blkcnt_t, blocks) + __field(const char *, name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->size = dir->i_size; + __entry->blocks = dir->i_blocks; + __entry->name = dentry->d_name.name; + ), + + TP_printk("dev = (%d,%d), dir ino = %lu, i_size = %lld, " + "i_blocks = %llu, name = %s", + show_dev_ino(__entry), + __entry->size, + (unsigned long long)__entry->blocks, + __entry->name) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__inode, f2fs_truncate, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +TRACE_EVENT(f2fs_truncate_data_blocks_range, + + TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs, int free), + + TP_ARGS(inode, nid, ofs, free), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(nid_t, nid) + __field(unsigned int, ofs) + __field(int, free) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nid = nid; + __entry->ofs = ofs; + __entry->free = free; + ), + + TP_printk("dev = (%d,%d), ino = %lu, nid = %u, offset = %u, freed = %d", + show_dev_ino(__entry), + (unsigned int)__entry->nid, + __entry->ofs, + __entry->free) +); + +DECLARE_EVENT_CLASS(f2fs__truncate_op, + + TP_PROTO(struct inode *inode, u64 from), + + TP_ARGS(inode, from), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, size) + __field(blkcnt_t, blocks) + __field(u64, from) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->size = inode->i_size; + __entry->blocks = inode->i_blocks; + __entry->from = from; + ), + + TP_printk("dev = (%d,%d), ino = %lu, i_size = %lld, i_blocks = %llu, " + "start file offset = %llu", + show_dev_ino(__entry), + __entry->size, + (unsigned long long)__entry->blocks, + (unsigned long long)__entry->from) +); + +DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_blocks_enter, + + TP_PROTO(struct inode *inode, u64 from), + + TP_ARGS(inode, from) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_blocks_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_inode_blocks_enter, + + TP_PROTO(struct inode *inode, u64 from), + + TP_ARGS(inode, from) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_inode_blocks_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DECLARE_EVENT_CLASS(f2fs__truncate_node, + + TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), + + TP_ARGS(inode, nid, blk_addr), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(nid_t, nid) + __field(block_t, blk_addr) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nid = nid; + __entry->blk_addr = blk_addr; + ), + + TP_printk("dev = (%d,%d), ino = %lu, nid = %u, block_address = 0x%llx", + show_dev_ino(__entry), + (unsigned int)__entry->nid, + (unsigned long long)__entry->blk_addr) +); + +DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_nodes_enter, + + TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), + + TP_ARGS(inode, nid, blk_addr) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_nodes_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node, + + TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), + + TP_ARGS(inode, nid, blk_addr) +); + +TRACE_EVENT(f2fs_truncate_partial_nodes, + + TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err), + + TP_ARGS(inode, nid, depth, err), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(nid_t, nid[3]) + __field(int, depth) + __field(int, err) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nid[0] = nid[0]; + __entry->nid[1] = nid[1]; + __entry->nid[2] = nid[2]; + __entry->depth = depth; + __entry->err = err; + ), + + TP_printk("dev = (%d,%d), ino = %lu, " + "nid[0] = %u, nid[1] = %u, nid[2] = %u, depth = %d, err = %d", + show_dev_ino(__entry), + (unsigned int)__entry->nid[0], + (unsigned int)__entry->nid[1], + (unsigned int)__entry->nid[2], + __entry->depth, + __entry->err) +); + +TRACE_EVENT_CONDITION(f2fs_readpage, + + TP_PROTO(struct page *page, sector_t blkaddr, int type), + + TP_ARGS(page, blkaddr, type), + + TP_CONDITION(page->mapping), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(pgoff_t, index) + __field(sector_t, blkaddr) + __field(int, type) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->ino = page->mapping->host->i_ino; + __entry->index = page->index; + __entry->blkaddr = blkaddr; + __entry->type = type; + ), + + TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " + "blkaddr = 0x%llx, bio_type = %s", + show_dev_ino(__entry), + (unsigned long)__entry->index, + (unsigned long long)__entry->blkaddr, + show_bio_type(__entry->type)) +); + +TRACE_EVENT(f2fs_get_data_block, + TP_PROTO(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int ret), + + TP_ARGS(inode, iblock, bh, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(sector_t, iblock) + __field(sector_t, bh_start) + __field(size_t, bh_size) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->iblock = iblock; + __entry->bh_start = bh->b_blocknr; + __entry->bh_size = bh->b_size; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " + "start blkaddr = 0x%llx, len = 0x%llx bytes, err = %d", + show_dev_ino(__entry), + (unsigned long long)__entry->iblock, + (unsigned long long)__entry->bh_start, + (unsigned long long)__entry->bh_size, + __entry->ret) +); + +TRACE_EVENT(f2fs_get_victim, + + TP_PROTO(struct super_block *sb, int type, int gc_type, + struct victim_sel_policy *p, unsigned int pre_victim, + unsigned int prefree, unsigned int free), + + TP_ARGS(sb, type, gc_type, p, pre_victim, prefree, free), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(int, gc_type) + __field(int, alloc_mode) + __field(int, gc_mode) + __field(unsigned int, victim) + __field(unsigned int, ofs_unit) + __field(unsigned int, pre_victim) + __field(unsigned int, prefree) + __field(unsigned int, free) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->type = type; + __entry->gc_type = gc_type; + __entry->alloc_mode = p->alloc_mode; + __entry->gc_mode = p->gc_mode; + __entry->victim = p->min_segno; + __entry->ofs_unit = p->ofs_unit; + __entry->pre_victim = pre_victim; + __entry->prefree = prefree; + __entry->free = free; + ), + + TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u " + "ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u", + show_dev(__entry), + show_data_type(__entry->type), + show_gc_type(__entry->gc_type), + show_alloc_mode(__entry->alloc_mode), + show_victim_policy(__entry->gc_mode), + __entry->victim, + __entry->ofs_unit, + (int)__entry->pre_victim, + __entry->prefree, + __entry->free) +); + +TRACE_EVENT(f2fs_fallocate, + + TP_PROTO(struct inode *inode, int mode, + loff_t offset, loff_t len, int ret), + + TP_ARGS(inode, mode, offset, len, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, mode) + __field(loff_t, offset) + __field(loff_t, len) + __field(loff_t, size) + __field(blkcnt_t, blocks) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->mode = mode; + __entry->offset = offset; + __entry->len = len; + __entry->size = inode->i_size; + __entry->blocks = inode->i_blocks; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, mode = %x, offset = %lld, " + "len = %lld, i_size = %lld, i_blocks = %llu, ret = %d", + show_dev_ino(__entry), + __entry->mode, + (unsigned long long)__entry->offset, + (unsigned long long)__entry->len, + (unsigned long long)__entry->size, + (unsigned long long)__entry->blocks, + __entry->ret) +); + +TRACE_EVENT(f2fs_reserve_new_block, + + TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), + + TP_ARGS(inode, nid, ofs_in_node), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(nid_t, nid) + __field(unsigned int, ofs_in_node) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->nid = nid; + __entry->ofs_in_node = ofs_in_node; + ), + + TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u", + show_dev(__entry), + (unsigned int)__entry->nid, + __entry->ofs_in_node) +); + +TRACE_EVENT(f2fs_do_submit_bio, + + TP_PROTO(struct super_block *sb, int btype, bool sync, struct bio *bio), + + TP_ARGS(sb, btype, sync, bio), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, btype) + __field(bool, sync) + __field(sector_t, sector) + __field(unsigned int, size) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->btype = btype; + __entry->sync = sync; + __entry->sector = bio->bi_sector; + __entry->size = bio->bi_size; + ), + + TP_printk("dev = (%d,%d), type = %s, io = %s, sector = %lld, size = %u", + show_dev(__entry), + show_block_type(__entry->btype), + __entry->sync ? "sync" : "no sync", + (unsigned long long)__entry->sector, + __entry->size) +); + +TRACE_EVENT(f2fs_submit_write_page, + + TP_PROTO(struct page *page, block_t blk_addr, int type), + + TP_ARGS(page, blk_addr, type), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, type) + __field(pgoff_t, index) + __field(block_t, block) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->ino = page->mapping->host->i_ino; + __entry->type = type; + __entry->index = page->index; + __entry->block = blk_addr; + ), + + TP_printk("dev = (%d,%d), ino = %lu, %s, index = %lu, blkaddr = 0x%llx", + show_dev_ino(__entry), + show_block_type(__entry->type), + (unsigned long)__entry->index, + (unsigned long long)__entry->block) +); + +TRACE_EVENT(f2fs_write_checkpoint, + + TP_PROTO(struct super_block *sb, bool is_umount, char *msg), + + TP_ARGS(sb, is_umount, msg), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(bool, is_umount) + __field(char *, msg) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->is_umount = is_umount; + __entry->msg = msg; + ), + + TP_printk("dev = (%d,%d), checkpoint for %s, state = %s", + show_dev(__entry), + __entry->is_umount ? "clean umount" : "consistency", + __entry->msg) +); + +#endif /* _TRACE_F2FS_H */ + + /* This part must be outside protection */ +#include diff --git a/init/Kconfig b/init/Kconfig index 6aad581f18e..a49af148c28 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -130,10 +130,13 @@ config HAVE_KERNEL_XZ config HAVE_KERNEL_LZO bool +config HAVE_KERNEL_LZ4 + bool + choice prompt "Kernel compression mode" default KERNEL_GZIP - depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO + depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 help The linux kernel is a kind of self-extracting executable. Several compression algorithms are available, which differ @@ -201,6 +204,14 @@ config KERNEL_LZO size is about 10% bigger than gzip; however its speed (both compression and decompression) is the fastest. +config KERNEL_LZ4 + bool "LZ4" + depends on HAVE_KERNEL_LZ4 + help + Its compression ratio is worse than LZO. The size of the kernel + is about 8% bigger than LZO. But the decompression speed is + faster than LZO. + endchoice config DEFAULT_HOSTNAME @@ -603,6 +614,14 @@ config CGROUP_FREEZER Provides a way to freeze and unfreeze all tasks in a cgroup. +config CGROUP_TIMER_SLACK + bool "Timer slack cgroup controller" + help + Provides a way to set minimal timer slack value for tasks in + a cgroup. + It's useful in mobile devices where certain background apps + are attached to a cgroup and combined wakeups are desired. + config CGROUP_DEVICE bool "Device controller for cgroups" help @@ -715,6 +734,18 @@ config FAIR_GROUP_SCHED depends on CGROUP_SCHED default CGROUP_SCHED +config CFS_BANDWIDTH + bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED" + depends on EXPERIMENTAL + depends on FAIR_GROUP_SCHED + default n + help + This option allows users to define CPU bandwidth rates (limits) for + tasks running within the fair group scheduler. Groups with no limit + set are considered to be unconstrained and will run with no + restriction. + See tip/Documentation/scheduler/sched-bwc.txt for more information. + config RT_GROUP_SCHED bool "Group scheduling for SCHED_RR/FIFO" depends on EXPERIMENTAL @@ -924,7 +955,7 @@ config PANIC_TIMEOUT menuconfig EXPERT bool "Configure standard kernel features (expert users)" # Unhide debug options, to make the on-by-default options visible - select DEBUG_KERNEL + # We do not select DEBUG_KERNEL help This option allows certain base kernel options and settings to be disabled or tweaked. This is for specialized diff --git a/kernel/Makefile b/kernel/Makefile index eca595e2fd5..9e8629bb223 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -60,6 +60,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CGROUPS) += cgroup.o obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o +obj-$(CONFIG_CGROUP_TIMER_SLACK) += cgroup_timer_slack.o obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_USER_NS) += user_namespace.o @@ -109,14 +110,15 @@ obj-$(CONFIG_PADATA) += padata.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o -ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) +#ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is # needed for x86 only. Why this used to be enabled for all architectures is beyond # me. I suspect most platforms don't need this, but until we know that for sure # I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k # to get a correct value for the wait-channel (WCHAN in ps). --davidm -CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer -endif +#CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer +CFLAGS_sched.o := -O2 -fomit-frame-pointer -mtune=cortex-a8 -march=armv7-a -ftree-vectorize +#endif $(obj)/configs.o: $(obj)/config_data.h diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 54a36fe288f..0e4298fc19a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -272,7 +272,7 @@ static void check_for_release(struct cgroup *cgrp); /* * A queue for waiters to do rmdir() cgroup. A tasks will sleep when - * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some + * list_empty(&cgroup->children) && subsys has some * reference to css->refcnt. In general, this refcnt is expected to goes down * to zero, soon. * @@ -3935,6 +3935,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) struct cgroup_subsys *ss; unsigned long flags; bool failed = false; + + if (atomic_read(&cgrp->count) != 0) + return false; + local_irq_save(flags); for_each_subsys(cgrp->root, ss) { struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; @@ -3977,19 +3981,23 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) return !failed; } -/* checks if all of the css_sets attached to a cgroup have a refcount of 0. - * Must be called with css_set_lock held */ +/* Checks if all of the css_sets attached to a cgroup have a refcount of 0. */ static int cgroup_css_sets_empty(struct cgroup *cgrp) { struct cg_cgroup_link *link; + int retval = 1; + read_lock(&css_set_lock); list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { struct css_set *cg = link->cg; - if (atomic_read(&cg->refcount) > 0) - return 0; + if (atomic_read(&cg->refcount) > 0) { + retval = 0; + break; + } } + read_unlock(&css_set_lock); - return 1; + return retval; } static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) diff --git a/kernel/cgroup_timer_slack.c b/kernel/cgroup_timer_slack.c new file mode 100644 index 00000000000..3226deb86c5 --- /dev/null +++ b/kernel/cgroup_timer_slack.c @@ -0,0 +1,139 @@ +/* + * cgroup_timer_slack.c - control group timer slack subsystem + * + * Copyright Nokia Corparation, 2011 + * Author: Kirill A. Shutemov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include +#include +#include + +struct cgroup_subsys timer_slack_subsys; +struct tslack_cgroup { + struct cgroup_subsys_state css; + unsigned long min_slack_ns; +}; + +static struct tslack_cgroup *cgroup_to_tslack(struct cgroup *cgroup) +{ + struct cgroup_subsys_state *css; + + css = cgroup_subsys_state(cgroup, timer_slack_subsys.subsys_id); + return container_of(css, struct tslack_cgroup, css); +} + +static struct cgroup_subsys_state *tslack_create(struct cgroup_subsys *subsys, + struct cgroup *cgroup) +{ + struct tslack_cgroup *tslack_cgroup; + + tslack_cgroup = kmalloc(sizeof(*tslack_cgroup), GFP_KERNEL); + if (!tslack_cgroup) + return ERR_PTR(-ENOMEM); + + if (cgroup->parent) { + struct tslack_cgroup *parent; + + parent = cgroup_to_tslack(cgroup->parent); + tslack_cgroup->min_slack_ns = parent->min_slack_ns; + } else + tslack_cgroup->min_slack_ns = 0UL; + + return &tslack_cgroup->css; +} + +static void tslack_destroy(struct cgroup_subsys *tslack_cgroup, + struct cgroup *cgroup) +{ + kfree(cgroup_to_tslack(cgroup)); +} + +static int tslack_allow_attach(struct cgroup *cgrp, struct task_struct *tsk) +{ + const struct cred *cred = current_cred(), *tcred; + + tcred = __task_cred(tsk); + + if ((current != tsk) && !capable(CAP_SYS_NICE) && + cred->euid != tcred->uid && cred->euid != tcred->suid) + return -EACCES; + + return 0; +} + +static u64 tslack_read_min(struct cgroup *cgroup, struct cftype *cft) +{ + return cgroup_to_tslack(cgroup)->min_slack_ns; +} + +static int tslack_write_min(struct cgroup *cgroup, struct cftype *cft, u64 val) +{ + if (val > ULONG_MAX) + return -EINVAL; + + cgroup_to_tslack(cgroup)->min_slack_ns = val; + + return 0; +} + +static u64 tslack_read_effective(struct cgroup *cgroup, struct cftype *cft) +{ + unsigned long min; + + min = cgroup_to_tslack(cgroup)->min_slack_ns; + while (cgroup->parent) { + cgroup = cgroup->parent; + min = max(cgroup_to_tslack(cgroup)->min_slack_ns, min); + } + + return min; +} + +static struct cftype files[] = { + { + .name = "min_slack_ns", + .read_u64 = tslack_read_min, + .write_u64 = tslack_write_min, + }, + { + .name = "effective_slack_ns", + .read_u64 = tslack_read_effective, + }, +}; + +static int tslack_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + return cgroup_add_files(cgroup, subsys, files, ARRAY_SIZE(files)); +} + +struct cgroup_subsys timer_slack_subsys = { + .name = "timer_slack", + .subsys_id = timer_slack_subsys_id, + .create = tslack_create, + .destroy = tslack_destroy, + .allow_attach = tslack_allow_attach, + .populate = tslack_populate, +}; + +unsigned long task_get_effective_timer_slack(struct task_struct *tsk) +{ + struct cgroup *cgroup; + unsigned long slack; + + rcu_read_lock(); + cgroup = task_cgroup(tsk, timer_slack_subsys.subsys_id); + slack = tslack_read_effective(cgroup, NULL); + rcu_read_unlock(); + + return max(tsk->timer_slack_ns, slack); +} diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 10131fdaff7..11191ad8d7a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2085,6 +2085,9 @@ static void scan_for_empty_cpusets(struct cpuset *root) * (of no affect) on systems that are actively using CPU hotplug * but making no active use of cpusets. * + * The only exception to this is suspend/resume, where we don't + * modify cpusets at all. + * * This routine ensures that top_cpuset.cpus_allowed tracks * cpu_active_mask on each CPU hotplug (cpuhp) event. * diff --git a/kernel/events/core.c b/kernel/events/core.c index 0f857782d06..25a977abe7d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5072,7 +5072,7 @@ static void sw_perf_event_destroy(struct perf_event *event) static int perf_swevent_init(struct perf_event *event) { - int event_id = event->attr.config; + u64 event_id = event->attr.config; if (event->attr.type != PERF_TYPE_SOFTWARE) return -ENOENT; diff --git a/kernel/fork.c b/kernel/fork.c index f65fa0627c0..50a68c44a60 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1169,6 +1169,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif + /* + * Save current task's (not effective) timer slack value as default + * timer slack value for new task. + */ p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); diff --git a/kernel/futex.c b/kernel/futex.c index e6160fa842e..5c64512c6bc 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -586,6 +586,55 @@ void exit_pi_state_list(struct task_struct *curr) raw_spin_unlock_irq(&curr->pi_lock); } +/* + * We need to check the following states: + * + * Waiter | pi_state | pi->owner | uTID | uODIED | ? + * + * [1] NULL | --- | --- | 0 | 0/1 | Valid + * [2] NULL | --- | --- | >0 | 0/1 | Valid + * + * [3] Found | NULL | -- | Any | 0/1 | Invalid + * + * [4] Found | Found | NULL | 0 | 1 | Valid + * [5] Found | Found | NULL | >0 | 1 | Invalid + * + * [6] Found | Found | task | 0 | 1 | Valid + * + * [7] Found | Found | NULL | Any | 0 | Invalid + * + * [8] Found | Found | task | ==taskTID | 0/1 | Valid + * [9] Found | Found | task | 0 | 0 | Invalid + * [10] Found | Found | task | !=taskTID | 0/1 | Invalid + * + * [1] Indicates that the kernel can acquire the futex atomically. We + * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. + * + * [2] Valid, if TID does not belong to a kernel thread. If no matching + * thread is found then it indicates that the owner TID has died. + * + * [3] Invalid. The waiter is queued on a non PI futex + * + * [4] Valid state after exit_robust_list(), which sets the user space + * value to FUTEX_WAITERS | FUTEX_OWNER_DIED. + * + * [5] The user space value got manipulated between exit_robust_list() + * and exit_pi_state_list() + * + * [6] Valid state after exit_pi_state_list() which sets the new owner in + * the pi_state but cannot access the user space value. + * + * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. + * + * [8] Owner and user space value match + * + * [9] There is no transient state which sets the user space TID to 0 + * except exit_robust_list(), but this is indicated by the + * FUTEX_OWNER_DIED bit. See [4] + * + * [10] There is no transient state which leaves owner and user space + * TID out of sync. + */ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps) @@ -601,12 +650,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, plist_for_each_entry_safe(this, next, head, list) { if (match_futex(&this->key, key)) { /* - * Another waiter already exists - bump up - * the refcount and return its pi_state: + * Sanity check the waiter before increasing + * the refcount and attaching to it. */ pi_state = this->pi_state; /* - * Userspace might have messed up non-PI and PI futexes + * Userspace might have messed up non-PI and + * PI futexes [3] */ if (unlikely(!pi_state)) return -EINVAL; @@ -614,34 +664,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, WARN_ON(!atomic_read(&pi_state->refcount)); /* - * When pi_state->owner is NULL then the owner died - * and another waiter is on the fly. pi_state->owner - * is fixed up by the task which acquires - * pi_state->rt_mutex. - * - * We do not check for pid == 0 which can happen when - * the owner died and robust_list_exit() cleared the - * TID. + * Handle the owner died case: */ - if (pid && pi_state->owner) { + if (uval & FUTEX_OWNER_DIED) { /* - * Bail out if user space manipulated the - * futex value. + * exit_pi_state_list sets owner to NULL and + * wakes the topmost waiter. The task which + * acquires the pi_state->rt_mutex will fixup + * owner. */ - if (pid != task_pid_vnr(pi_state->owner)) + if (!pi_state->owner) { + /* + * No pi state owner, but the user + * space TID is not 0. Inconsistent + * state. [5] + */ + if (pid) + return -EINVAL; + /* + * Take a ref on the state and + * return. [4] + */ + goto out_state; + } + + /* + * If TID is 0, then either the dying owner + * has not yet executed exit_pi_state_list() + * or some waiter acquired the rtmutex in the + * pi state, but did not yet fixup the TID in + * user space. + * + * Take a ref on the state and return. [6] + */ + if (!pid) + goto out_state; + } else { + /* + * If the owner died bit is not set, + * then the pi_state must have an + * owner. [7] + */ + if (!pi_state->owner) return -EINVAL; } + /* + * Bail out if user space manipulated the + * futex value. If pi state exists then the + * owner TID must be the same as the user + * space TID. [9/10] + */ + if (pid != task_pid_vnr(pi_state->owner)) + return -EINVAL; + + out_state: atomic_inc(&pi_state->refcount); *ps = pi_state; - return 0; } } /* * We are the first waiter - try to look up the real owner and attach - * the new pi_state to it, but bail out when TID = 0 + * the new pi_state to it, but bail out when TID = 0 [1] */ if (!pid) return -ESRCH; @@ -669,6 +755,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, return ret; } + /* + * No existing pi state. First waiter. [2] + */ pi_state = alloc_pi_state(); /* @@ -740,10 +829,18 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, return -EDEADLK; /* - * Surprise - we got the lock. Just return to userspace: + * Surprise - we got the lock, but we do not trust user space at all. */ - if (unlikely(!curval)) - return 1; + if (unlikely(!curval)) { + /* + * We verify whether there is kernel state for this + * futex. If not, we can safely assume, that the 0 -> + * TID transition is correct. If state exists, we do + * not bother to fixup the user space state as it was + * corrupted already. + */ + return futex_top_waiter(hb, key) ? -EINVAL : 1; + } uval = curval; @@ -867,6 +964,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) struct task_struct *new_owner; struct futex_pi_state *pi_state = this->pi_state; u32 curval, newval; + int ret = 0; if (!pi_state) return -EINVAL; @@ -890,23 +988,19 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) new_owner = this->task; /* - * We pass it to the next owner. (The WAITERS bit is always - * kept enabled while there is PI state around. We must also - * preserve the owner died bit.) + * We pass it to the next owner. The WAITERS bit is always + * kept enabled while there is PI state around. We cleanup the + * owner died bit, because we are the owner. */ - if (!(uval & FUTEX_OWNER_DIED)) { - int ret = 0; + newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) - ret = -EFAULT; - else if (curval != uval) - ret = -EINVAL; - if (ret) { - raw_spin_unlock(&pi_state->pi_mutex.wait_lock); - return ret; - } + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) + ret = -EFAULT; + else if (curval != uval) + ret = -EINVAL; + if (ret) { + raw_spin_unlock(&pi_state->pi_mutex.wait_lock); + return ret; } raw_spin_lock_irq(&pi_state->owner->pi_lock); @@ -1254,6 +1348,13 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 curval2; if (requeue_pi) { + /* + * Requeue PI only works on two distinct uaddrs. This + * check is only valid for private futexes. See below. + */ + if (uaddr1 == uaddr2) + return -EINVAL; + /* * requeue_pi requires a pi_state, try to allocate it now * without any locks in case it fails. @@ -1292,6 +1393,15 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, if (unlikely(ret != 0)) goto out_put_key1; + /* + * The check above which compares uaddrs is not sufficient for + * shared futexes. We need to compare the keys: + */ + if (requeue_pi && match_futex(&key1, &key2)) { + ret = -EINVAL; + goto out_put_keys; + } + hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); @@ -1887,7 +1997,7 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); hrtimer_set_expires_range_ns(&to->timer, *abs_time, - current->timer_slack_ns); + task_get_effective_timer_slack(current)); } retry: @@ -2112,9 +2222,10 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) /* * To avoid races, try to do the TID -> 0 atomic transition * again. If it succeeds then we can return without waking - * anyone else up: + * anyone else up. We only try this if neither the waiters nor + * the owner died bit are set. */ - if (!(uval & FUTEX_OWNER_DIED) && + if (!(uval & ~FUTEX_TID_MASK) && cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0)) goto pi_faulted; /* @@ -2146,11 +2257,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) /* * No waiters - kernel unlocks the futex: */ - if (!(uval & FUTEX_OWNER_DIED)) { - ret = unlock_futex_pi(uaddr, uval); - if (ret == -EFAULT) - goto pi_faulted; - } + ret = unlock_futex_pi(uaddr, uval); + if (ret == -EFAULT) + goto pi_faulted; out_unlock: spin_unlock(&hb->lock); @@ -2281,7 +2390,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); hrtimer_set_expires_range_ns(&to->timer, *abs_time, - current->timer_slack_ns); + task_get_effective_timer_slack(current)); } /* @@ -2307,6 +2416,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (ret) goto out_key2; + /* + * The check above which compares uaddrs is not sufficient for + * shared futexes. We need to compare the keys: + */ + if (match_futex(&q.key, &key2)) { + ret = -EINVAL; + goto out_put_keys; + } + /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 2043c08d36c..2bb6b7f4a32 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1564,7 +1564,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, int ret = 0; unsigned long slack; - slack = current->timer_slack_ns; + slack = task_get_effective_timer_slack(current); if (rt_task(current)) slack = 0; diff --git a/kernel/irq_work.c b/kernel/irq_work.c index c58fa7da8ae..0e2cde4f380 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -17,54 +17,34 @@ * claimed NULL, 3 -> {pending} : claimed to be enqueued * pending next, 3 -> {busy} : queued, pending callback * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed - * - * We use the lower two bits of the next pointer to keep PENDING and BUSY - * flags. */ #define IRQ_WORK_PENDING 1UL #define IRQ_WORK_BUSY 2UL #define IRQ_WORK_FLAGS 3UL -static inline bool irq_work_is_set(struct irq_work *entry, int flags) -{ - return (unsigned long)entry->next & flags; -} - -static inline struct irq_work *irq_work_next(struct irq_work *entry) -{ - unsigned long next = (unsigned long)entry->next; - next &= ~IRQ_WORK_FLAGS; - return (struct irq_work *)next; -} - -static inline struct irq_work *next_flags(struct irq_work *entry, int flags) -{ - unsigned long next = (unsigned long)entry; - next |= flags; - return (struct irq_work *)next; -} - -static DEFINE_PER_CPU(struct irq_work *, irq_work_list); +static DEFINE_PER_CPU(struct llist_head, irq_work_list); /* * Claim the entry so that no one else will poke at it. */ -static bool irq_work_claim(struct irq_work *entry) +static bool irq_work_claim(struct irq_work *work) { - struct irq_work *next, *nflags; + unsigned long flags, nflags; - do { - next = entry->next; - if ((unsigned long)next & IRQ_WORK_PENDING) + for (;;) { + flags = work->flags; + if (flags & IRQ_WORK_PENDING) return false; - nflags = next_flags(next, IRQ_WORK_FLAGS); - } while (cmpxchg(&entry->next, next, nflags) != next); + nflags = flags | IRQ_WORK_FLAGS; + if (cmpxchg(&work->flags, flags, nflags) == flags) + break; + cpu_relax(); + } return true; } - void __weak arch_irq_work_raise(void) { /* @@ -75,20 +55,15 @@ void __weak arch_irq_work_raise(void) /* * Queue the entry and raise the IPI if needed. */ -static void __irq_work_queue(struct irq_work *entry) +static void __irq_work_queue(struct irq_work *work) { - struct irq_work *next; + bool empty; preempt_disable(); - do { - next = __this_cpu_read(irq_work_list); - /* Can assign non-atomic because we keep the flags set. */ - entry->next = next_flags(next, IRQ_WORK_FLAGS); - } while (this_cpu_cmpxchg(irq_work_list, next, entry) != next); - + empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); /* The list was empty, raise self-interrupt to start processing. */ - if (!irq_work_next(entry)) + if (empty) arch_irq_work_raise(); preempt_enable(); @@ -100,16 +75,16 @@ static void __irq_work_queue(struct irq_work *entry) * * Can be re-enqueued while the callback is still in progress. */ -bool irq_work_queue(struct irq_work *entry) +bool irq_work_queue(struct irq_work *work) { - if (!irq_work_claim(entry)) { + if (!irq_work_claim(work)) { /* * Already enqueued, can't do! */ return false; } - __irq_work_queue(entry); + __irq_work_queue(work); return true; } EXPORT_SYMBOL_GPL(irq_work_queue); @@ -120,34 +95,34 @@ EXPORT_SYMBOL_GPL(irq_work_queue); */ void irq_work_run(void) { - struct irq_work *list; + struct irq_work *work; + struct llist_head *this_list; + struct llist_node *llnode; - if (this_cpu_read(irq_work_list) == NULL) + this_list = &__get_cpu_var(irq_work_list); + if (llist_empty(this_list)) return; BUG_ON(!in_irq()); BUG_ON(!irqs_disabled()); - list = this_cpu_xchg(irq_work_list, NULL); - - while (list != NULL) { - struct irq_work *entry = list; + llnode = llist_del_all(this_list); + while (llnode != NULL) { + work = llist_entry(llnode, struct irq_work, llnode); - list = irq_work_next(list); + llnode = llist_next(llnode); /* - * Clear the PENDING bit, after this point the @entry + * Clear the PENDING bit, after this point the @work * can be re-used. */ - entry->next = next_flags(NULL, IRQ_WORK_BUSY); - entry->func(entry); + work->flags = IRQ_WORK_BUSY; + work->func(work); /* * Clear the BUSY bit and return to the free state if * no-one else claimed it meanwhile. */ - (void)cmpxchg(&entry->next, - next_flags(NULL, IRQ_WORK_BUSY), - NULL); + (void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0); } } EXPORT_SYMBOL_GPL(irq_work_run); @@ -156,11 +131,11 @@ EXPORT_SYMBOL_GPL(irq_work_run); * Synchronize against the irq_work @entry, ensures the entry is not * currently in use. */ -void irq_work_sync(struct irq_work *entry) +void irq_work_sync(struct irq_work *work) { WARN_ON_ONCE(irqs_disabled()); - while (irq_work_is_set(entry, IRQ_WORK_BUSY)) + while (work->flags & IRQ_WORK_BUSY) cpu_relax(); } EXPORT_SYMBOL_GPL(irq_work_sync); diff --git a/kernel/kexec.c b/kernel/kexec.c index 296fbc84d65..2e2f1df2794 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1005,6 +1005,10 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, if (flags & KEXEC_PRESERVE_CONTEXT) image->preserve_context = 1; +#ifdef CONFIG_KEXEC_HARDBOOT + if (flags & KEXEC_HARDBOOT) + image->hardboot = 1; +#endif result = machine_kexec_prepare(image); if (result) goto out; diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 447960603fb..b7e4460a710 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -1130,10 +1130,11 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, if (debug_locks_silent) return 0; - printk("\n=======================================================\n"); - printk( "[ INFO: possible circular locking dependency detected ]\n"); + printk("\n"); + printk("======================================================\n"); + printk("[ INFO: possible circular locking dependency detected ]\n"); print_kernel_version(); - printk( "-------------------------------------------------------\n"); + printk("-------------------------------------------------------\n"); printk("%s/%d is trying to acquire lock:\n", curr->comm, task_pid_nr(curr)); print_lock(check_src); @@ -1464,11 +1465,12 @@ print_bad_irq_dependency(struct task_struct *curr, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - printk("\n======================================================\n"); - printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", + printk("\n"); + printk("======================================================\n"); + printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", irqclass, irqclass); print_kernel_version(); - printk( "------------------------------------------------------\n"); + printk("------------------------------------------------------\n"); printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", curr->comm, task_pid_nr(curr), curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, @@ -1693,10 +1695,11 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - printk("\n=============================================\n"); - printk( "[ INFO: possible recursive locking detected ]\n"); + printk("\n"); + printk("=============================================\n"); + printk("[ INFO: possible recursive locking detected ]\n"); print_kernel_version(); - printk( "---------------------------------------------\n"); + printk("---------------------------------------------\n"); printk("%s/%d is trying to acquire lock:\n", curr->comm, task_pid_nr(curr)); print_lock(next); @@ -2178,10 +2181,11 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - printk("\n=================================\n"); - printk( "[ INFO: inconsistent lock state ]\n"); + printk("\n"); + printk("=================================\n"); + printk("[ INFO: inconsistent lock state ]\n"); print_kernel_version(); - printk( "---------------------------------\n"); + printk("---------------------------------\n"); printk("inconsistent {%s} -> {%s} usage.\n", usage_str[prev_bit], usage_str[new_bit]); @@ -2242,10 +2246,11 @@ print_irq_inversion_bug(struct task_struct *curr, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - printk("\n=========================================================\n"); - printk( "[ INFO: possible irq lock inversion dependency detected ]\n"); + printk("\n"); + printk("=========================================================\n"); + printk("[ INFO: possible irq lock inversion dependency detected ]\n"); print_kernel_version(); - printk( "---------------------------------------------------------\n"); + printk("---------------------------------------------------------\n"); printk("%s/%d just changed the state of lock:\n", curr->comm, task_pid_nr(curr)); print_lock(this); @@ -3071,9 +3076,10 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, if (debug_locks_silent) return 0; - printk("\n=====================================\n"); - printk( "[ BUG: bad unlock balance detected! ]\n"); - printk( "-------------------------------------\n"); + printk("\n"); + printk("=====================================\n"); + printk("[ BUG: bad unlock balance detected! ]\n"); + printk("-------------------------------------\n"); printk("%s/%d is trying to release lock (", curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); @@ -3484,9 +3490,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, if (debug_locks_silent) return 0; - printk("\n=================================\n"); - printk( "[ BUG: bad contention detected! ]\n"); - printk( "---------------------------------\n"); + printk("\n"); + printk("=================================\n"); + printk("[ BUG: bad contention detected! ]\n"); + printk("---------------------------------\n"); printk("%s/%d is trying to contend lock (", curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); @@ -3845,9 +3852,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, if (debug_locks_silent) return; - printk("\n=========================\n"); - printk( "[ BUG: held lock freed! ]\n"); - printk( "-------------------------\n"); + printk("\n"); + printk("=========================\n"); + printk("[ BUG: held lock freed! ]\n"); + printk("-------------------------\n"); printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", curr->comm, task_pid_nr(curr), mem_from, mem_to-1); print_lock(hlock); @@ -3901,9 +3909,10 @@ static void print_held_locks_bug(struct task_struct *curr) if (debug_locks_silent) return; - printk("\n=====================================\n"); - printk( "[ BUG: lock held at task exit time! ]\n"); - printk( "-------------------------------------\n"); + printk("\n"); + printk("=====================================\n"); + printk("[ BUG: lock held at task exit time! ]\n"); + printk("-------------------------------------\n"); printk("%s/%d is exiting with locks still held!\n", curr->comm, task_pid_nr(curr)); lockdep_print_held_locks(curr); @@ -3997,16 +4006,17 @@ void lockdep_sys_exit(void) if (unlikely(curr->lockdep_depth)) { if (!debug_locks_off()) return; - printk("\n================================================\n"); - printk( "[ BUG: lock held when returning to user space! ]\n"); - printk( "------------------------------------------------\n"); + printk("\n"); + printk("================================================\n"); + printk("[ BUG: lock held when returning to user space! ]\n"); + printk("------------------------------------------------\n"); printk("%s/%d is leaving the kernel with locks still held!\n", curr->comm, curr->pid); lockdep_print_held_locks(curr); } } -void lockdep_rcu_dereference(const char *file, const int line) +void lockdep_rcu_suspicious(const char *file, const int line, const char *s) { struct task_struct *curr = current; @@ -4015,15 +4025,15 @@ void lockdep_rcu_dereference(const char *file, const int line) return; #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ /* Note: the following can be executed concurrently, so be careful. */ - printk("\n===================================================\n"); - printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); - printk( "---------------------------------------------------\n"); - printk("%s:%d invoked rcu_dereference_check() without protection!\n", - file, line); + printk("\n"); + printk("===============================\n"); + printk("[ INFO: suspicious RCU usage. ]\n"); + printk("-------------------------------\n"); + printk("%s:%d %s!\n", file, line, s); printk("\nother info that might help us debug this:\n\n"); printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); lockdep_print_held_locks(curr); printk("\nstack backtrace:\n"); dump_stack(); } -EXPORT_SYMBOL_GPL(lockdep_rcu_dereference); +EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); diff --git a/kernel/pid.c b/kernel/pid.c index e432057f3b2..8cafe7e72ad 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -418,7 +418,9 @@ EXPORT_SYMBOL(pid_task); */ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) { - rcu_lockdep_assert(rcu_read_lock_held()); + rcu_lockdep_assert(rcu_read_lock_held(), + "find_task_by_pid_ns() needs rcu_read_lock()" + " protection"); return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); } diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 9b224e16b19..a6ef0bed68d 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -16,3 +16,5 @@ obj-$(CONFIG_FB_EARLYSUSPEND) += fbearlysuspend.o obj-$(CONFIG_SUSPEND_TIME) += suspend_time.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o + +obj-$(CONFIG_SUSPEND) += wakeup_reason.o diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c new file mode 100644 index 00000000000..9823d9ccde4 --- /dev/null +++ b/kernel/power/wakeup_reason.c @@ -0,0 +1,132 @@ +/* + * kernel/power/wakeup_reason.c + * + * Logs the reasons which caused the kernel to resume from + * the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define MAX_WAKEUP_REASON_IRQS 32 +static int irq_list[MAX_WAKEUP_REASON_IRQS]; +static int irq_count; +static struct kobject *wakeup_reason; +static spinlock_t resume_reason_lock; + +static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + int irq_no, buf_offset = 0; + struct irq_desc *desc; + spin_lock(&resume_reason_lock); + for (irq_no = 0; irq_no < irq_count; irq_no++) { + desc = irq_to_desc(irq_list[irq_no]); + if (desc && desc->action && desc->action->name) + buf_offset += sprintf(buf + buf_offset, "%d %s\n", + irq_list[irq_no], desc->action->name); + else + buf_offset += sprintf(buf + buf_offset, "%d\n", + irq_list[irq_no]); + } + spin_unlock(&resume_reason_lock); + return buf_offset; +} + +static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason); + +static struct attribute *attrs[] = { + &resume_reason.attr, + NULL, +}; +static struct attribute_group attr_group = { + .attrs = attrs, +}; + +/* + * logs all the wake up reasons to the kernel + * stores the irqs to expose them to the userspace via sysfs + */ +void log_wakeup_reason(int irq) +{ + struct irq_desc *desc; + desc = irq_to_desc(irq); + if (desc && desc->action && desc->action->name) + printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq, + desc->action->name); + else + printk(KERN_INFO "Resume caused by IRQ %d\n", irq); + + spin_lock(&resume_reason_lock); + irq_list[irq_count++] = irq; + spin_unlock(&resume_reason_lock); +} + +/* Detects a suspend and clears all the previous wake up reasons*/ +static int wakeup_reason_pm_event(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + switch (pm_event) { + case PM_SUSPEND_PREPARE: + spin_lock(&resume_reason_lock); + irq_count = 0; + spin_unlock(&resume_reason_lock); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block wakeup_reason_pm_notifier_block = { + .notifier_call = wakeup_reason_pm_event, +}; + +/* Initializes the sysfs parameter + * registers the pm_event notifier + */ +int __init wakeup_reason_init(void) +{ + int retval; + spin_lock_init(&resume_reason_lock); + retval = register_pm_notifier(&wakeup_reason_pm_notifier_block); + if (retval) + printk(KERN_WARNING "[%s] failed to register PM notifier %d\n", + __func__, retval); + + wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj); + if (!wakeup_reason) { + printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n", + __func__); + return 1; + } + retval = sysfs_create_group(wakeup_reason, &attr_group); + if (retval) { + kobject_put(wakeup_reason); + printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n", + __func__, retval); + } + return 0; +} + +late_initcall(wakeup_reason_init); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 67d1fdd3c55..822f701e6d7 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -45,6 +45,36 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) child->parent = new_parent; } +/* Ensure that nothing can wake it up, even SIGKILL */ +static bool ptrace_freeze_traced(struct task_struct *task) +{ + bool ret = false; + + spin_lock_irq(&task->sighand->siglock); + if (task_is_traced(task) && !__fatal_signal_pending(task)) { + task->state = __TASK_TRACED; + ret = true; + } + spin_unlock_irq(&task->sighand->siglock); + + return ret; +} + +static void ptrace_unfreeze_traced(struct task_struct *task) +{ + if (task->state != __TASK_TRACED) + return; + + WARN_ON(!task->ptrace || task->parent != current); + + spin_lock_irq(&task->sighand->siglock); + if (__fatal_signal_pending(task)) + wake_up_state(task, __TASK_TRACED); + else + task->state = TASK_TRACED; + spin_unlock_irq(&task->sighand->siglock); +} + /** * __ptrace_unlink - unlink ptracee and restore its execution state * @child: ptracee to be unlinked @@ -117,7 +147,7 @@ void __ptrace_unlink(struct task_struct *child) * TASK_KILLABLE sleeps. */ if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child)) - signal_wake_up(child, task_is_traced(child)); + ptrace_signal_wake_up(child, true); spin_unlock(&child->sighand->siglock); } @@ -151,24 +181,30 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state) * be changed by us so it's not changing right after this. */ read_lock(&tasklist_lock); - if ((child->ptrace & PT_PTRACED) && child->parent == current) { + if (child->ptrace && child->parent == current) { + WARN_ON(child->state == __TASK_TRACED); /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). */ - spin_lock_irq(&child->sighand->siglock); - WARN_ON_ONCE(task_is_stopped(child)); - if (ignore_state || (task_is_traced(child) && + if (ignore_state || (ptrace_freeze_traced(child) && !(child->jobctl & JOBCTL_LISTENING))) ret = 0; - spin_unlock_irq(&child->sighand->siglock); } read_unlock(&tasklist_lock); - if (!ret && !ignore_state) - ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; + if (!ret && !ignore_state) { + if (!wait_task_inactive(child, __TASK_TRACED)) { + /* + * This can only happen if may_ptrace_stop() fails and + * ptrace_stop() changes ->state back to TASK_RUNNING, + * so we should not worry about leaking __TASK_TRACED. + */ + WARN_ON(child->state == __TASK_TRACED); + ret = -ESRCH; + } + } - /* All systems go.. */ return ret; } @@ -307,7 +343,7 @@ static int ptrace_attach(struct task_struct *task, long request, */ if (task_is_stopped(task) && task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) - signal_wake_up(task, 1); + signal_wake_up_state(task, __TASK_STOPPED); spin_unlock(&task->sighand->siglock); @@ -899,6 +935,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, goto out_put_task_struct; ret = arch_ptrace(child, request, addr, data); + if (ret || request != PTRACE_DETACH) + ptrace_unfreeze_traced(child); out_put_task_struct: put_task_struct(child); @@ -1038,8 +1076,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, ret = ptrace_check_attach(child, request == PTRACE_KILL || request == PTRACE_INTERRUPT); - if (!ret) + if (!ret) { ret = compat_arch_ptrace(child, request, addr, data); + if (ret || request != PTRACE_DETACH) + ptrace_unfreeze_traced(child); + } out_put_task_struct: put_task_struct(child); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ba06207b1dd..bfa24b08686 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -360,13 +360,6 @@ void rcu_enter_nohz(void) smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); local_irq_restore(flags); - - /* If the interrupt queued a callback, get out of dyntick mode. */ - if (in_irq() && - (__get_cpu_var(rcu_sched_data).nxtlist || - __get_cpu_var(rcu_bh_data).nxtlist || - rcu_preempt_needs_cpu(smp_processor_id()))) - set_need_resched(); } /* diff --git a/kernel/sched.c b/kernel/sched.c index 6121c2ce14b..5377f277072 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include @@ -197,10 +198,28 @@ static inline int rt_bandwidth_enabled(void) return sysctl_sched_rt_runtime >= 0; } -static void start_rt_bandwidth(struct rt_bandwidth *rt_b) +static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) { - ktime_t now; + unsigned long delta; + ktime_t soft, hard, now; + + for (;;) { + if (hrtimer_active(period_timer)) + break; + + now = hrtimer_cb_get_time(period_timer); + hrtimer_forward(period_timer, now, period); + soft = hrtimer_get_softexpires(period_timer); + hard = hrtimer_get_expires(period_timer); + delta = ktime_to_ns(ktime_sub(hard, soft)); + __hrtimer_start_range_ns(period_timer, soft, delta, + HRTIMER_MODE_ABS_PINNED, 0); + } +} + +static void start_rt_bandwidth(struct rt_bandwidth *rt_b) +{ if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return; @@ -208,22 +227,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) return; raw_spin_lock(&rt_b->rt_runtime_lock); - for (;;) { - unsigned long delta; - ktime_t soft, hard; - - if (hrtimer_active(&rt_b->rt_period_timer)) - break; - - now = hrtimer_cb_get_time(&rt_b->rt_period_timer); - hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); - - soft = hrtimer_get_softexpires(&rt_b->rt_period_timer); - hard = hrtimer_get_expires(&rt_b->rt_period_timer); - delta = ktime_to_ns(ktime_sub(hard, soft)); - __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, - HRTIMER_MODE_ABS_PINNED, 0); - } + start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period); raw_spin_unlock(&rt_b->rt_runtime_lock); } @@ -248,6 +252,24 @@ struct cfs_rq; static LIST_HEAD(task_groups); +struct cfs_bandwidth { +#ifdef CONFIG_CFS_BANDWIDTH + raw_spinlock_t lock; + ktime_t period; + u64 quota, runtime; + s64 hierarchal_quota; + u64 runtime_expires; + + int idle, timer_active; + struct hrtimer period_timer, slack_timer; + struct list_head throttled_cfs_rq; + + /* statistics */ + int nr_periods, nr_throttled; + u64 throttled_time; +#endif +}; + /* task group related information */ struct task_group { struct cgroup_subsys_state css; @@ -279,6 +301,8 @@ struct task_group { #ifdef CONFIG_SCHED_AUTOGROUP struct autogroup *autogroup; #endif + + struct cfs_bandwidth cfs_bandwidth; }; /* task_group_lock serializes the addition/removal of task groups */ @@ -312,7 +336,7 @@ struct task_group root_task_group; /* CFS-related fields in a runqueue */ struct cfs_rq { struct load_weight load; - unsigned long nr_running; + unsigned long nr_running, h_nr_running; u64 exec_clock; u64 min_vruntime; @@ -378,9 +402,120 @@ struct cfs_rq { unsigned long load_contribution; #endif +#ifdef CONFIG_CFS_BANDWIDTH + int runtime_enabled; + u64 runtime_expires; + s64 runtime_remaining; + + u64 throttled_timestamp; + int throttled, throttle_count; + struct list_head throttled_list; +#endif #endif }; +#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_CFS_BANDWIDTH +static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) +{ + return &tg->cfs_bandwidth; +} + +static inline u64 default_cfs_period(void); +static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); +static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b); + +static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) +{ + struct cfs_bandwidth *cfs_b = + container_of(timer, struct cfs_bandwidth, slack_timer); + do_sched_cfs_slack_timer(cfs_b); + + return HRTIMER_NORESTART; +} + +static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) +{ + struct cfs_bandwidth *cfs_b = + container_of(timer, struct cfs_bandwidth, period_timer); + ktime_t now; + int overrun; + int idle = 0; + + for (;;) { + now = hrtimer_cb_get_time(timer); + overrun = hrtimer_forward(timer, now, cfs_b->period); + + if (!overrun) + break; + + idle = do_sched_cfs_period_timer(cfs_b, overrun); + } + + return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; +} + +static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) +{ + raw_spin_lock_init(&cfs_b->lock); + cfs_b->runtime = 0; + cfs_b->quota = RUNTIME_INF; + cfs_b->period = ns_to_ktime(default_cfs_period()); + + INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); + hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cfs_b->period_timer.function = sched_cfs_period_timer; + hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cfs_b->slack_timer.function = sched_cfs_slack_timer; +} + +static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) +{ + cfs_rq->runtime_enabled = 0; + INIT_LIST_HEAD(&cfs_rq->throttled_list); +} + +/* requires cfs_b->lock, may release to reprogram timer */ +static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) +{ + /* + * The timer may be active because we're trying to set a new bandwidth + * period or because we're racing with the tear-down path + * (timer_active==0 becomes visible before the hrtimer call-back + * terminates). In either case we ensure that it's re-programmed + */ + while (unlikely(hrtimer_active(&cfs_b->period_timer))) { + raw_spin_unlock(&cfs_b->lock); + /* ensure cfs_b->lock is available while we wait */ + hrtimer_cancel(&cfs_b->period_timer); + + raw_spin_lock(&cfs_b->lock); + /* if someone else restarted the timer then we're done */ + if (cfs_b->timer_active) + return; + } + + cfs_b->timer_active = 1; + start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period); +} + +static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) +{ + hrtimer_cancel(&cfs_b->period_timer); + hrtimer_cancel(&cfs_b->slack_timer); +} +#else +static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} +static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} +static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} + +static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) +{ + return NULL; +} +#endif /* CONFIG_CFS_BANDWIDTH */ +#endif /* CONFIG_FAIR_GROUP_SCHED */ + /* Real-Time classes' related field in a runqueue: */ struct rt_rq { struct rt_prio_array active; @@ -472,6 +607,11 @@ struct rq { #endif int skip_clock_update; + /* time-based average load */ + u64 nr_last_stamp; + unsigned int ave_nr_running; + seqcount_t ave_seqcnt; + /* capture load from *all* tasks on this cpu: */ struct load_weight load; unsigned long nr_load_updates; @@ -511,7 +651,7 @@ struct rq { unsigned long cpu_power; - unsigned char idle_at_tick; + unsigned char idle_balance; /* For active balancing */ int post_schedule; int active_balance; @@ -521,8 +661,6 @@ struct rq { int cpu; int online; - unsigned long avg_load_per_task; - u64 rt_avg; u64 age_stamp; u64 idle_stamp; @@ -571,7 +709,7 @@ struct rq { #endif #ifdef CONFIG_SMP - struct task_struct *wake_list; + struct llist_head wake_list; #endif }; @@ -614,35 +752,36 @@ static inline int cpu_of(struct rq *rq) /* * Return the group to which this tasks belongs. * - * We use task_subsys_state_check() and extend the RCU verification with - * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each - * task it moves into the cgroup. Therefore by holding either of those locks, - * we pin the task to the current cgroup. + * We cannot use task_subsys_state() and friends because the cgroup + * subsystem changes that value before the cgroup_subsys::attach() method + * is called, therefore we cannot pin it and might observe the wrong value. + * + * The same is true for autogroup's p->signal->autogroup->tg, the autogroup + * core changes this before calling sched_move_task(). + * + * Instead we use a 'copy' which is updated from sched_move_task() while + * holding both task_struct::pi_lock and rq::lock. */ static inline struct task_group *task_group(struct task_struct *p) { - struct task_group *tg; - struct cgroup_subsys_state *css; - - css = task_subsys_state_check(p, cpu_cgroup_subsys_id, - lockdep_is_held(&p->pi_lock) || - lockdep_is_held(&task_rq(p)->lock)); - tg = container_of(css, struct task_group, css); - - return autogroup_task_group(p, tg); + return p->sched_task_group; } /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { +#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED) + struct task_group *tg = task_group(p); +#endif + #ifdef CONFIG_FAIR_GROUP_SCHED - p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; - p->se.parent = task_group(p)->se[cpu]; + p->se.cfs_rq = tg->cfs_rq[cpu]; + p->se.parent = tg->se[cpu]; #endif #ifdef CONFIG_RT_GROUP_SCHED - p->rt.rt_rq = task_group(p)->rt_rq[cpu]; - p->rt.parent = task_group(p)->rt_se[cpu]; + p->rt.rt_rq = tg->rt_rq[cpu]; + p->rt.parent = tg->rt_se[cpu]; #endif } @@ -1273,6 +1412,18 @@ void wake_up_idle_cpu(int cpu) smp_send_reschedule(cpu); } +static inline bool got_nohz_idle_kick(void) +{ + return idle_cpu(smp_processor_id()) && this_rq()->nohz_balance_kick; +} + +#else /* CONFIG_NO_HZ */ + +static inline bool got_nohz_idle_kick(void) +{ + return false; +} + #endif /* CONFIG_NO_HZ */ static u64 sched_avg_period(void) @@ -1472,24 +1623,28 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) update_load_sub(&rq->load, load); } -#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED) +#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \ + (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH))) typedef int (*tg_visitor)(struct task_group *, void *); /* - * Iterate the full tree, calling @down when first entering a node and @up when - * leaving it for the final time. + * Iterate task_group tree rooted at *from, calling @down when first entering a + * node and @up when leaving it for the final time. + * + * Caller must hold rcu_lock or sufficient equivalent. */ -static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) +static int walk_tg_tree_from(struct task_group *from, + tg_visitor down, tg_visitor up, void *data) { struct task_group *parent, *child; int ret; - rcu_read_lock(); - parent = &root_task_group; + parent = from; + down: ret = (*down)(parent, data); if (ret) - goto out_unlock; + goto out; list_for_each_entry_rcu(child, &parent->children, siblings) { parent = child; goto down; @@ -1498,19 +1653,29 @@ static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) continue; } ret = (*up)(parent, data); - if (ret) - goto out_unlock; + if (ret || parent == from) + goto out; child = parent; parent = parent->parent; if (parent) goto up; -out_unlock: - rcu_read_unlock(); - +out: return ret; } +/* + * Iterate the full tree, calling @down when first entering a node and @up when + * leaving it for the final time. + * + * Caller must hold rcu_lock or sufficient equivalent. + */ + +static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) +{ + return walk_tg_tree_from(&root_task_group, down, up, data); +} + static int tg_nop(struct task_group *tg, void *data) { return 0; @@ -1570,11 +1735,9 @@ static unsigned long cpu_avg_load_per_task(int cpu) unsigned long nr_running = ACCESS_ONCE(rq->nr_running); if (nr_running) - rq->avg_load_per_task = rq->load.weight / nr_running; - else - rq->avg_load_per_task = 0; + return rq->load.weight / nr_running; - return rq->avg_load_per_task; + return 0; } #ifdef CONFIG_PREEMPT @@ -1729,10 +1892,8 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) #endif -static void calc_load_account_idle(struct rq *this_rq); static void update_sysctl(void); static int get_update_sysctl_factor(void); -static void update_cpu_load(struct rq *this_rq); static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) { @@ -1740,7 +1901,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) #ifdef CONFIG_SMP /* * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be - * successfuly executed on another CPU. We must ensure that updates of + * successfully executed on another CPU. We must ensure that updates of * per-task data have been completed by this moment. */ smp_wmb(); @@ -1756,14 +1917,49 @@ static const struct sched_class rt_sched_class; #include "sched_stats.h" +/* 27 ~= 134217728ns = 134.2ms + * 26 ~= 67108864ns = 67.1ms + * 25 ~= 33554432ns = 33.5ms + * 24 ~= 16777216ns = 16.8ms + */ +#define NR_AVE_PERIOD_EXP 27 +#define NR_AVE_SCALE(x) ((x) << FSHIFT) +#define NR_AVE_PERIOD (1 << NR_AVE_PERIOD_EXP) +#define NR_AVE_DIV_PERIOD(x) ((x) >> NR_AVE_PERIOD_EXP) + +static inline unsigned int do_avg_nr_running(struct rq *rq) +{ + s64 nr, deltax; + unsigned int ave_nr_running = rq->ave_nr_running; + + deltax = rq->clock_task - rq->nr_last_stamp; + nr = NR_AVE_SCALE(rq->nr_running); + + if (deltax > NR_AVE_PERIOD) + ave_nr_running = nr; + else + ave_nr_running += + NR_AVE_DIV_PERIOD(deltax * (nr - ave_nr_running)); + + return ave_nr_running; +} + static void inc_nr_running(struct rq *rq) { + write_seqcount_begin(&rq->ave_seqcnt); + rq->ave_nr_running = do_avg_nr_running(rq); + rq->nr_last_stamp = rq->clock_task; rq->nr_running++; + write_seqcount_end(&rq->ave_seqcnt); } static void dec_nr_running(struct rq *rq) { + write_seqcount_begin(&rq->ave_seqcnt); + rq->ave_nr_running = do_avg_nr_running(rq); + rq->nr_last_stamp = rq->clock_task; rq->nr_running--; + write_seqcount_end(&rq->ave_seqcnt); } static void set_load_weight(struct task_struct *p) @@ -1807,7 +2003,6 @@ static void activate_task(struct rq *rq, struct task_struct *p, int flags) rq->nr_uninterruptible--; enqueue_task(rq, p, flags); - inc_nr_running(rq); } /* @@ -1819,7 +2014,6 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) rq->nr_uninterruptible++; dequeue_task(rq, p, flags); - dec_nr_running(rq); } #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -2219,7 +2413,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. * * sched_move_task() holds both and thus holding either pins the cgroup, - * see set_task_rq(). + * see task_group(). * * Furthermore, all task_rq users should acquire both locks, see * task_rq_lock(). @@ -2391,11 +2585,11 @@ static int select_fallback_rq(int cpu, struct task_struct *p) /* Look for allowed, online CPU in same node. */ for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) - if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) return dest_cpu; /* Any allowed, online CPU? */ - dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); + dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); if (dest_cpu < nr_cpu_ids) return dest_cpu; @@ -2432,7 +2626,7 @@ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) * [ this allows ->select_task() to simply return task_cpu(p) and * not worry about this generic constraint ] */ - if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || + if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) || !cpu_online(cpu))) cpu = select_fallback_rq(task_cpu(p), p); @@ -2557,42 +2751,26 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) } #ifdef CONFIG_SMP -static void sched_ttwu_do_pending(struct task_struct *list) +static void sched_ttwu_pending(void) { struct rq *rq = this_rq(); + struct llist_node *llist = llist_del_all(&rq->wake_list); + struct task_struct *p; raw_spin_lock(&rq->lock); - while (list) { - struct task_struct *p = list; - list = list->wake_entry; + while (llist) { + p = llist_entry(llist, struct task_struct, wake_entry); + llist = llist_next(llist); ttwu_do_activate(rq, p, 0); } raw_spin_unlock(&rq->lock); } -#ifdef CONFIG_HOTPLUG_CPU - -static void sched_ttwu_pending(void) -{ - struct rq *rq = this_rq(); - struct task_struct *list = xchg(&rq->wake_list, NULL); - - if (!list) - return; - - sched_ttwu_do_pending(list); -} - -#endif /* CONFIG_HOTPLUG_CPU */ - void scheduler_ipi(void) { - struct rq *rq = this_rq(); - struct task_struct *list = xchg(&rq->wake_list, NULL); - - if (!list) + if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) return; /* @@ -2609,25 +2787,21 @@ void scheduler_ipi(void) * somewhat pessimize the simple resched case. */ irq_enter(); - sched_ttwu_do_pending(list); + sched_ttwu_pending(); + + /* + * Check if someone kicked us for doing the nohz idle load balance. + */ + if (unlikely(got_nohz_idle_kick() && !need_resched())) { + this_rq()->idle_balance = 1; + raise_softirq_irqoff(SCHED_SOFTIRQ); + } irq_exit(); } static void ttwu_queue_remote(struct task_struct *p, int cpu) { - struct rq *rq = cpu_rq(cpu); - struct task_struct *next = rq->wake_list; - - for (;;) { - struct task_struct *old = next; - - p->wake_entry = next; - next = cmpxchg(&rq->wake_list, old, p); - if (next == old) - break; - } - - if (!next) + if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) smp_send_reschedule(cpu); } @@ -2794,7 +2968,8 @@ static void try_to_wake_up_local(struct task_struct *p) */ int wake_up_process(struct task_struct *p) { - return try_to_wake_up(p, TASK_ALL, 0); + WARN_ON(task_is_stopped_or_traced(p)); + return try_to_wake_up(p, TASK_NORMAL, 0); } EXPORT_SYMBOL(wake_up_process); @@ -2848,20 +3023,24 @@ void sched_fork(struct task_struct *p) */ p->state = TASK_RUNNING; + /* + * Make sure we do not leak PI boosting priority to the child. + */ + p->prio = current->normal_prio; + /* * Revert to default priority/policy on fork if requested. */ if (unlikely(p->sched_reset_on_fork)) { - if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { + if (task_has_rt_policy(p)) { p->policy = SCHED_NORMAL; - p->normal_prio = p->static_prio; - } - - if (PRIO_TO_NICE(p->static_prio) < 0) { p->static_prio = NICE_TO_PRIO(0); - p->normal_prio = p->static_prio; - set_load_weight(p); - } + p->rt_priority = 0; + } else if (PRIO_TO_NICE(p->static_prio) < 0) + p->static_prio = NICE_TO_PRIO(0); + + p->prio = p->normal_prio = __normal_prio(p); + set_load_weight(p); /* * We don't need the reset flag anymore after the fork. It has @@ -2870,11 +3049,6 @@ void sched_fork(struct task_struct *p) p->sched_reset_on_fork = 0; } - /* - * Make sure we do not leak PI boosting priority to the child. - */ - p->prio = current->normal_prio; - if (!rt_prio(p->prio)) p->sched_class = &fair_sched_class; @@ -3255,6 +3429,45 @@ unsigned long nr_iowait(void) return sum; } +unsigned long avg_nr_running(void) +{ + unsigned long i, sum = 0; + unsigned int seqcnt, ave_nr_running; + + for_each_online_cpu(i) { + struct rq *q = cpu_rq(i); + + /* + * Update average to avoid reading stalled value if there were + * no run-queue changes for a long time. On the other hand if + * the changes are happening right now, just read current value + * directly. + */ + seqcnt = read_seqcount_begin(&q->ave_seqcnt); + ave_nr_running = do_avg_nr_running(q); + if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) { + read_seqcount_begin(&q->ave_seqcnt); + ave_nr_running = q->ave_nr_running; + } + + sum += ave_nr_running; + } + + return sum; +} + +unsigned long get_avg_nr_running(unsigned int cpu) +{ + struct rq *q; + + if (cpu >= nr_cpu_ids) + return 0; + + q = cpu_rq(cpu); + + return q->ave_nr_running; +} + unsigned long nr_iowait_cpu(int cpu) { struct rq *this = cpu_rq(cpu); @@ -3268,11 +3481,73 @@ unsigned long this_cpu_load(void) } +/* + * Global load-average calculations + * + * We take a distributed and async approach to calculating the global load-avg + * in order to minimize overhead. + * + * The global load average is an exponentially decaying average of nr_running + + * nr_uninterruptible. + * + * Once every LOAD_FREQ: + * + * nr_active = 0; + * for_each_possible_cpu(cpu) + * nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible; + * + * avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n) + * + * Due to a number of reasons the above turns in the mess below: + * + * - for_each_possible_cpu() is prohibitively expensive on machines with + * serious number of cpus, therefore we need to take a distributed approach + * to calculating nr_active. + * + * \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0 + * = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) } + * + * So assuming nr_active := 0 when we start out -- true per definition, we + * can simply take per-cpu deltas and fold those into a global accumulate + * to obtain the same result. See calc_load_fold_active(). + * + * Furthermore, in order to avoid synchronizing all per-cpu delta folding + * across the machine, we assume 10 ticks is sufficient time for every + * cpu to have completed this task. + * + * This places an upper-bound on the IRQ-off latency of the machine. Then + * again, being late doesn't loose the delta, just wrecks the sample. + * + * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because + * this would add another cross-cpu cacheline miss and atomic operation + * to the wakeup path. Instead we increment on whatever cpu the task ran + * when it went into uninterruptible state and decrement on whatever cpu + * did the wakeup. This means that only the sum of nr_uninterruptible over + * all cpus yields the correct result. + * + * This covers the NO_HZ=n code, for extra head-aches, see the comment below. + */ + /* Variables and functions for calc_load */ static atomic_long_t calc_load_tasks; static unsigned long calc_load_update; unsigned long avenrun[3]; -EXPORT_SYMBOL(avenrun); +EXPORT_SYMBOL(avenrun); /* should be removed */ + +/** + * get_avenrun - get the load average array + * @loads: pointer to dest load array + * @offset: offset to add + * @shift: shift count to shift the result left + * + * These values are estimates at best, so no need for locking. + */ +void get_avenrun(unsigned long *loads, unsigned long offset, int shift) +{ + loads[0] = (avenrun[0] + offset) << shift; + loads[1] = (avenrun[1] + offset) << shift; + loads[2] = (avenrun[2] + offset) << shift; +} static long calc_load_fold_active(struct rq *this_rq) { @@ -3289,6 +3564,9 @@ static long calc_load_fold_active(struct rq *this_rq) return delta; } +/* + * a1 = a0 * e + a * (1 - e) + */ static unsigned long calc_load(unsigned long load, unsigned long exp, unsigned long active) { @@ -3300,30 +3578,118 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) #ifdef CONFIG_NO_HZ /* - * For NO_HZ we delay the active fold to the next LOAD_FREQ update. + * Handle NO_HZ for the global load-average. + * + * Since the above described distributed algorithm to compute the global + * load-average relies on per-cpu sampling from the tick, it is affected by + * NO_HZ. + * + * The basic idea is to fold the nr_active delta into a global idle-delta upon + * entering NO_HZ state such that we can include this as an 'extra' cpu delta + * when we read the global state. + * + * Obviously reality has to ruin such a delightfully simple scheme: + * + * - When we go NO_HZ idle during the window, we can negate our sample + * contribution, causing under-accounting. + * + * We avoid this by keeping two idle-delta counters and flipping them + * when the window starts, thus separating old and new NO_HZ load. + * + * The only trick is the slight shift in index flip for read vs write. + * + * 0s 5s 10s 15s + * +10 +10 +10 +10 + * |-|-----------|-|-----------|-|-----------|-| + * r:0 0 1 1 0 0 1 1 0 + * w:0 1 1 0 0 1 1 0 0 + * + * This ensures we'll fold the old idle contribution in this window while + * accumlating the new one. + * + * - When we wake up from NO_HZ idle during the window, we push up our + * contribution, since we effectively move our sample point to a known + * busy state. + * + * This is solved by pushing the window forward, and thus skipping the + * sample, for this cpu (effectively using the idle-delta for this cpu which + * was in effect at the time the window opened). This also solves the issue + * of having to deal with a cpu having been in NOHZ idle for multiple + * LOAD_FREQ intervals. * * When making the ILB scale, we should try to pull this in as well. */ -static atomic_long_t calc_load_tasks_idle; +static atomic_long_t calc_load_idle[2]; +static int calc_load_idx; -static void calc_load_account_idle(struct rq *this_rq) +static inline int calc_load_write_idx(void) { + int idx = calc_load_idx; + + /* + * See calc_global_nohz(), if we observe the new index, we also + * need to observe the new update time. + */ + smp_rmb(); + + /* + * If the folding window started, make sure we start writing in the + * next idle-delta. + */ + if (!time_before(jiffies, calc_load_update)) + idx++; + + return idx & 1; +} + +static inline int calc_load_read_idx(void) +{ + return calc_load_idx & 1; +} + +void calc_load_enter_idle(void) +{ + struct rq *this_rq = this_rq(); long delta; + /* + * We're going into NOHZ mode, if there's any pending delta, fold it + * into the pending idle delta. + */ delta = calc_load_fold_active(this_rq); - if (delta) - atomic_long_add(delta, &calc_load_tasks_idle); + if (delta) { + int idx = calc_load_write_idx(); + atomic_long_add(delta, &calc_load_idle[idx]); + } } -static long calc_load_fold_idle(void) +void calc_load_exit_idle(void) { - long delta = 0; + struct rq *this_rq = this_rq(); + + /* + * If we're still before the sample window, we're done. + */ + if (time_before(jiffies, this_rq->calc_load_update)) + return; /* - * Its got a race, we don't care... + * We woke inside or after the sample window, this means we're already + * accounted through the nohz accounting, so skip the entire deal and + * sync up for the next window. */ - if (atomic_long_read(&calc_load_tasks_idle)) - delta = atomic_long_xchg(&calc_load_tasks_idle, 0); + this_rq->calc_load_update = calc_load_update; + if (time_before(jiffies, this_rq->calc_load_update + 10)) + this_rq->calc_load_update += LOAD_FREQ; +} + +static long calc_load_fold_idle(void) +{ + int idx = calc_load_read_idx(); + long delta = 0; + + if (atomic_long_read(&calc_load_idle[idx])) + delta = atomic_long_xchg(&calc_load_idle[idx], 0); return delta; } @@ -3405,28 +3771,16 @@ calc_load_n(unsigned long load, unsigned long exp, * Once we've updated the global active value, we need to apply the exponential * weights adjusted to the number of cycles missed. */ -static void calc_global_nohz(unsigned long ticks) +static void calc_global_nohz(void) { long delta, active, n; - if (time_before(jiffies, calc_load_update)) - return; - - /* - * If we crossed a calc_load_update boundary, make sure to fold - * any pending idle changes, the respective CPUs might have - * missed the tick driven calc_load_account_active() update - * due to NO_HZ. - */ - delta = calc_load_fold_idle(); - if (delta) - atomic_long_add(delta, &calc_load_tasks); - - /* - * If we were idle for multiple load cycles, apply them. - */ - if (ticks >= LOAD_FREQ) { - n = ticks / LOAD_FREQ; + if (!time_before(jiffies, calc_load_update + 10)) { + /* + * Catch-up, fold however many we are behind still + */ + delta = jiffies - calc_load_update - 10; + n = 1 + (delta / LOAD_FREQ); active = atomic_long_read(&calc_load_tasks); active = active > 0 ? active * FIXED_1 : 0; @@ -3439,45 +3793,21 @@ static void calc_global_nohz(unsigned long ticks) } /* - * Its possible the remainder of the above division also crosses - * a LOAD_FREQ period, the regular check in calc_global_load() - * which comes after this will take care of that. + * Flip the idle index... * - * Consider us being 11 ticks before a cycle completion, and us - * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will - * age us 4 cycles, and the test in calc_global_load() will - * pick up the final one. + * Make sure we first write the new time then flip the index, so that + * calc_load_write_idx() will see the new time when it reads the new + * index, this avoids a double flip messing things up. */ + smp_wmb(); + calc_load_idx++; } -#else -static void calc_load_account_idle(struct rq *this_rq) -{ -} - -static inline long calc_load_fold_idle(void) -{ - return 0; -} +#else /* !CONFIG_NO_HZ */ -static void calc_global_nohz(unsigned long ticks) -{ -} -#endif +static inline long calc_load_fold_idle(void) { return 0; } +static inline void calc_global_nohz(void) { } -/** - * get_avenrun - get the load average array - * @loads: pointer to dest load array - * @offset: offset to add - * @shift: shift count to shift the result left - * - * These values are estimates at best, so no need for locking. - */ -void get_avenrun(unsigned long *loads, unsigned long offset, int shift) -{ - loads[0] = (avenrun[0] + offset) << shift; - loads[1] = (avenrun[1] + offset) << shift; - loads[2] = (avenrun[2] + offset) << shift; -} +#endif /* CONFIG_NO_HZ */ /* * calc_load - update the avenrun load estimates 10 ticks after the @@ -3485,13 +3815,18 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) */ void calc_global_load(unsigned long ticks) { - long active; - - calc_global_nohz(ticks); + long active, delta; if (time_before(jiffies, calc_load_update + 10)) return; + /* + * Fold the 'old' idle-delta to include all NO_HZ cpus. + */ + delta = calc_load_fold_idle(); + if (delta) + atomic_long_add(delta, &calc_load_tasks); + active = atomic_long_read(&calc_load_tasks); active = active > 0 ? active * FIXED_1 : 0; @@ -3500,6 +3835,11 @@ void calc_global_load(unsigned long ticks) avenrun[2] = calc_load(avenrun[2], EXP_15, active); calc_load_update += LOAD_FREQ; + + /* + * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. + */ + calc_global_nohz(); } /* @@ -3514,13 +3854,16 @@ static void calc_load_account_active(struct rq *this_rq) return; delta = calc_load_fold_active(this_rq); - delta += calc_load_fold_idle(); if (delta) atomic_long_add(delta, &calc_load_tasks); this_rq->calc_load_update += LOAD_FREQ; } +/* + * End of global load-average stuff + */ + /* * The exact cpuload at various idx values, calculated at every tick would be * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load @@ -3593,22 +3936,13 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) * scheduler tick (TICK_NSEC). With tickless idle this will not be called * every tick. We fix it up based on jiffies. */ -static void update_cpu_load(struct rq *this_rq) +static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, + unsigned long pending_updates) { - unsigned long this_load = this_rq->load.weight; - unsigned long curr_jiffies = jiffies; - unsigned long pending_updates; int i, scale; this_rq->nr_load_updates++; - /* Avoid repeated calls on same jiffy, when moving in and out of idle */ - if (curr_jiffies == this_rq->last_load_update_tick) - return; - - pending_updates = curr_jiffies - this_rq->last_load_update_tick; - this_rq->last_load_update_tick = curr_jiffies; - /* Update our load: */ this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { @@ -3616,26 +3950,95 @@ static void update_cpu_load(struct rq *this_rq) /* scale is effectively 1 << i now, and >> i divides by scale */ - old_load = this_rq->cpu_load[i]; - old_load = decay_load_missed(old_load, pending_updates - 1, i); - new_load = this_load; + old_load = this_rq->cpu_load[i]; + old_load = decay_load_missed(old_load, pending_updates - 1, i); + new_load = this_load; + /* + * Round up the averaging division if load is increasing. This + * prevents us from getting stuck on 9 if the load is 10, for + * example. + */ + if (new_load > old_load) + new_load += scale - 1; + + this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; + } + + sched_avg_update(this_rq); +} + +#ifdef CONFIG_NO_HZ +/* + * There is no sane way to deal with nohz on smp when using jiffies because the + * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading + * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. + * + * Therefore we cannot use the delta approach from the regular tick since that + * would seriously skew the load calculation. However we'll make do for those + * updates happening while idle (nohz_idle_balance) or coming out of idle + * (tick_nohz_idle_exit). + * + * This means we might still be one tick off for nohz periods. + */ + +/* + * Called from nohz_idle_balance() to update the load ratings before doing the + * idle balance. + */ +void update_idle_cpu_load(struct rq *this_rq) +{ + unsigned long curr_jiffies = ACCESS_ONCE(jiffies); + unsigned long load = this_rq->load.weight; + unsigned long pending_updates; + + /* + * bail if there's load or we're actually up-to-date. + */ + if (load || curr_jiffies == this_rq->last_load_update_tick) + return; + + pending_updates = curr_jiffies - this_rq->last_load_update_tick; + this_rq->last_load_update_tick = curr_jiffies; + + __update_cpu_load(this_rq, load, pending_updates); +} + +/* + * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. + */ +void update_cpu_load_nohz(void) +{ + struct rq *this_rq = this_rq(); + unsigned long curr_jiffies = ACCESS_ONCE(jiffies); + unsigned long pending_updates; + + if (curr_jiffies == this_rq->last_load_update_tick) + return; + + raw_spin_lock(&this_rq->lock); + pending_updates = curr_jiffies - this_rq->last_load_update_tick; + if (pending_updates) { + this_rq->last_load_update_tick = curr_jiffies; /* - * Round up the averaging division if load is increasing. This - * prevents us from getting stuck on 9 if the load is 10, for - * example. + * We were idle, this means load 0, the current load might be + * !0 due to remote wakeups and the sort. */ - if (new_load > old_load) - new_load += scale - 1; - - this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; + __update_cpu_load(this_rq, 0, pending_updates); } - - sched_avg_update(this_rq); + raw_spin_unlock(&this_rq->lock); } +#endif /* CONFIG_NO_HZ */ +/* + * Called from scheduler_tick() + */ static void update_cpu_load_active(struct rq *this_rq) { - update_cpu_load(this_rq); + /* + * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). + */ + this_rq->last_load_update_tick = jiffies; + __update_cpu_load(this_rq, this_rq->load.weight, 1); calc_load_account_active(this_rq); } @@ -4036,6 +4439,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) #endif +static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) +{ + u64 temp = (__force u64) rtime; + + temp *= (__force u64) utime; + + if (sizeof(cputime_t) == 4) + temp = div_u64(temp, (__force u32) total); + else + temp = div64_u64(temp, (__force u64) total); + + return (__force cputime_t) temp; +} + void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime); @@ -4045,13 +4462,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) */ rtime = nsecs_to_cputime(p->se.sum_exec_runtime); - if (total) { - u64 temp = rtime; - - temp *= utime; - do_div(temp, total); - utime = (cputime_t)temp; - } else + if (total) + utime = scale_utime(utime, rtime, total); + else utime = rtime; /* @@ -4078,13 +4491,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) total = cputime_add(cputime.utime, cputime.stime); rtime = nsecs_to_cputime(cputime.sum_exec_runtime); - if (total) { - u64 temp = rtime; - - temp *= cputime.utime; - do_div(temp, total); - utime = (cputime_t)temp; - } else + if (total) + utime = scale_utime(cputime.utime, rtime, total); + else utime = rtime; sig->prev_utime = max(sig->prev_utime, utime); @@ -4117,7 +4526,7 @@ void scheduler_tick(void) perf_event_task_tick(); #ifdef CONFIG_SMP - rq->idle_at_tick = idle_cpu(cpu); + rq->idle_balance = idle_cpu(cpu); trigger_load_balance(rq, cpu); #endif } @@ -4214,6 +4623,7 @@ static inline void schedule_debug(struct task_struct *prev) */ if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) __schedule_bug(prev); + rcu_sleep_check(); profile_hit(SCHED_PROFILING, __builtin_return_address(0)); @@ -4240,7 +4650,7 @@ pick_next_task(struct rq *rq) * Optimization: we know that if all tasks are in * the fair class we can call that function directly: */ - if (likely(rq->nr_running == rq->cfs.nr_running)) { + if (likely(rq->nr_running == rq->cfs.h_nr_running)) { p = fair_sched_class.pick_next_task(rq); if (likely(p)) return p; @@ -4677,6 +5087,9 @@ EXPORT_SYMBOL(wait_for_completion); * This waits for either a completion of a specific task to be signaled or for a * specified timeout to expire. The timeout is in jiffies. It is not * interruptible. + * + * The return value is 0 if timed out, and positive (at least 1, or number of + * jiffies left till timeout) if completed. */ unsigned long __sched wait_for_completion_timeout(struct completion *x, unsigned long timeout) @@ -4691,6 +5104,8 @@ EXPORT_SYMBOL(wait_for_completion_timeout); * * This waits for completion of a specific task to be signaled. It is * interruptible. + * + * The return value is -ERESTARTSYS if interrupted, 0 if completed. */ int __sched wait_for_completion_interruptible(struct completion *x) { @@ -4708,6 +5123,9 @@ EXPORT_SYMBOL(wait_for_completion_interruptible); * * This waits for either a completion of a specific task to be signaled or for a * specified timeout to expire. It is interruptible. The timeout is in jiffies. + * + * The return value is -ERESTARTSYS if interrupted, 0 if timed out, + * positive (at least 1, or number of jiffies left till timeout) if completed. */ long __sched wait_for_completion_interruptible_timeout(struct completion *x, @@ -4723,6 +5141,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); * * This waits to be signaled for completion of a specific task. It can be * interrupted by a kill signal. + * + * The return value is -ERESTARTSYS if interrupted, 0 if completed. */ int __sched wait_for_completion_killable(struct completion *x) { @@ -4741,6 +5161,9 @@ EXPORT_SYMBOL(wait_for_completion_killable); * This waits for either a completion of a specific task to be * signaled or for a specified timeout to expire. It can be * interrupted by a kill signal. The timeout is in jiffies. + * + * The return value is -ERESTARTSYS if interrupted, 0 if timed out, + * positive (at least 1, or number of jiffies left till timeout) if completed. */ long __sched wait_for_completion_killable_timeout(struct completion *x, @@ -5026,7 +5449,20 @@ EXPORT_SYMBOL(task_nice); */ int idle_cpu(int cpu) { - return cpu_curr(cpu) == cpu_rq(cpu)->idle; + struct rq *rq = cpu_rq(cpu); + + if (rq->curr != rq->idle) + return 0; + + if (rq->nr_running) + return 0; + +#ifdef CONFIG_SMP + if (!llist_empty(&rq->wake_list)) + return 0; +#endif + + return 1; } /** @@ -5876,7 +6312,7 @@ void show_state_filter(unsigned long state_filter) printk(KERN_INFO " task PC stack pid father\n"); #endif - read_lock(&tasklist_lock); + rcu_read_lock(); do_each_thread(g, p) { /* * reset the NMI-timeout, listing all files on a slow @@ -5892,7 +6328,7 @@ void show_state_filter(unsigned long state_filter) #ifdef CONFIG_SCHED_DEBUG sysrq_sched_debug_show(); #endif - read_unlock(&tasklist_lock); + rcu_read_unlock(); /* * Only show locks if all tasks are dumped: */ @@ -5953,17 +6389,11 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) */ idle->sched_class = &idle_sched_class; ftrace_graph_init_idle_task(idle, cpu); +#if defined(CONFIG_SMP) + sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); +#endif } -/* - * In a system that switches off the HZ timer nohz_cpu_mask - * indicates which cpus entered this state. This is used - * in the rcu update to wait only for active cpus. For system - * which do not switch off the HZ timer nohz_cpu_mask should - * always be CPU_BITS_NONE. - */ -cpumask_var_t nohz_cpu_mask; - /* * Increase the granularity value when there are more CPUs, * because with more CPUs the 'effective latency' as visible @@ -6016,10 +6446,9 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { if (p->sched_class && p->sched_class->set_cpus_allowed) p->sched_class->set_cpus_allowed(p, new_mask); - else { - cpumask_copy(&p->cpus_allowed, new_mask); - p->rt.nr_cpus_allowed = cpumask_weight(new_mask); - } + + cpumask_copy(&p->cpus_allowed, new_mask); + p->rt.nr_cpus_allowed = cpumask_weight(new_mask); } /* @@ -6117,7 +6546,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) if (task_cpu(p) != src_cpu) goto done; /* Affinity changed (again). */ - if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) goto fail; /* @@ -6198,6 +6627,30 @@ static void calc_global_load_remove(struct rq *rq) rq->calc_load_active = 0; } +#ifdef CONFIG_CFS_BANDWIDTH +static void unthrottle_offline_cfs_rqs(struct rq *rq) +{ + struct cfs_rq *cfs_rq; + + for_each_leaf_cfs_rq(rq, cfs_rq) { + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + + if (!cfs_rq->runtime_enabled) + continue; + + /* + * clock_task is not advancing so we just need to make sure + * there's some valid quota amount + */ + cfs_rq->runtime_remaining = cfs_b->quota; + if (cfs_rq_throttled(cfs_rq)) + unthrottle_cfs_rq(cfs_rq); + } +} +#else +static void unthrottle_offline_cfs_rqs(struct rq *rq) {} +#endif + /* * Migrate all tasks from the rq, sleeping tasks will be migrated by * try_to_wake_up()->select_task_rq(). @@ -6229,6 +6682,9 @@ static void migrate_tasks(unsigned int dead_cpu) */ rq->rt.rt_throttled = 0; + /* Ensure any throttled groups are reachable by pick_next_task */ + unthrottle_offline_cfs_rqs(rq); + for ( ; ; ) { /* * There's this thread running, bail when that's the only @@ -6930,8 +7386,6 @@ static int __init isolated_cpu_setup(char *str) __setup("isolcpus=", isolated_cpu_setup); -#define SD_NODES_PER_DOMAIN 16 - #ifdef CONFIG_NUMA /** @@ -7258,11 +7712,8 @@ int sched_domain_level_max; static int __init setup_relax_domain_level(char *str) { - unsigned long val; - - val = simple_strtoul(str, NULL, 0); - if (val < sched_domain_level_max) - default_relax_domain_level = val; + if (kstrtoint(str, 0, &default_relax_domain_level)) + pr_warn("Unable to set relax_domain_level\n"); return 1; } @@ -7433,16 +7884,26 @@ static void __sdt_free(const struct cpumask *cpu_map) struct sd_data *sdd = &tl->data; for_each_cpu(j, cpu_map) { - struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); - if (sd && (sd->flags & SD_OVERLAP)) - free_sched_groups(sd->groups, 0); - kfree(*per_cpu_ptr(sdd->sd, j)); - kfree(*per_cpu_ptr(sdd->sg, j)); - kfree(*per_cpu_ptr(sdd->sgp, j)); + struct sched_domain *sd; + + if (sdd->sd) { + sd = *per_cpu_ptr(sdd->sd, j); + if (sd && (sd->flags & SD_OVERLAP)) + free_sched_groups(sd->groups, 0); + kfree(*per_cpu_ptr(sdd->sd, j)); + } + + if (sdd->sg) + kfree(*per_cpu_ptr(sdd->sg, j)); + if (sdd->sgp) + kfree(*per_cpu_ptr(sdd->sgp, j)); } free_percpu(sdd->sd); + sdd->sd = NULL; free_percpu(sdd->sg); + sdd->sg = NULL; free_percpu(sdd->sgp); + sdd->sgp = NULL; } } @@ -7455,7 +7916,6 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, if (!sd) return child; - set_domain_attribute(sd, attr); cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); if (child) { sd->level = child->level + 1; @@ -7463,6 +7923,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, child->parent = sd; } sd->child = child; + set_domain_attribute(sd, attr); return sd; } @@ -7821,34 +8282,66 @@ int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) } #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ +static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */ + /* * Update cpusets according to cpu_active mask. If cpusets are * disabled, cpuset_update_active_cpus() becomes a simple wrapper * around partition_sched_domains(). + * + * If we come here as part of a suspend/resume, don't touch cpusets because we + * want to restore it back to its original state upon resume anyway. */ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, void *hcpu) { - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { + case CPU_ONLINE_FROZEN: + case CPU_DOWN_FAILED_FROZEN: + + /* + * num_cpus_frozen tracks how many CPUs are involved in suspend + * resume sequence. As long as this is not the last online + * operation in the resume sequence, just build a single sched + * domain, ignoring cpusets. + */ + num_cpus_frozen--; + if (likely(num_cpus_frozen)) { + partition_sched_domains(1, NULL, NULL); + break; + } + + /* + * This is the last CPU online operation. So fall through and + * restore the original sched domains by considering the + * cpuset configurations. + */ + case CPU_ONLINE: case CPU_DOWN_FAILED: cpuset_update_active_cpus(); - return NOTIFY_OK; + break; default: return NOTIFY_DONE; } + return NOTIFY_OK; } static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, void *hcpu) { - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { case CPU_DOWN_PREPARE: cpuset_update_active_cpus(); - return NOTIFY_OK; + break; + case CPU_DOWN_PREPARE_FROZEN: + num_cpus_frozen++; + partition_sched_domains(1, NULL, NULL); + break; default: return NOTIFY_DONE; } + return NOTIFY_OK; } static int update_runtime(struct notifier_block *nfb, @@ -7972,6 +8465,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, /* allow initial update_cfs_load() to truncate */ cfs_rq->load_stamp = 1; #endif + init_cfs_rq_runtime(cfs_rq); tg->cfs_rq[cpu] = cfs_rq; tg->se[cpu] = se; @@ -8111,6 +8605,7 @@ void __init sched_init(void) * We achieve this by letting root_task_group's tasks sit * directly in rq->cfs (i.e root_task_group->se[] = NULL). */ + init_cfs_bandwidth(&root_task_group.cfs_bandwidth); init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -8140,7 +8635,6 @@ void __init sched_init(void) rq_attach_root(rq, &def_root_domain); #ifdef CONFIG_NO_HZ rq->nohz_balance_kick = 0; - init_sched_softirq_csd(&per_cpu(remote_sched_softirq_cb, i)); #endif #endif init_rq_hrtick(rq); @@ -8182,8 +8676,6 @@ void __init sched_init(void) */ current->sched_class = &fair_sched_class; - /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ - zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); #ifdef CONFIG_SMP zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); #ifdef CONFIG_NO_HZ @@ -8222,6 +8714,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) { static unsigned long prev_jiffy; /* ratelimiting */ + rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || oops_in_progress) return; @@ -8364,6 +8857,8 @@ static void free_fair_sched_group(struct task_group *tg) { int i; + destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); + for_each_possible_cpu(i) { if (tg->cfs_rq) kfree(tg->cfs_rq[i]); @@ -8391,6 +8886,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) tg->shares = NICE_0_LOAD; + init_cfs_bandwidth(tg_cfs_bandwidth(tg)); + for_each_possible_cpu(i) { cfs_rq = kzalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, cpu_to_node(i)); @@ -8592,6 +9089,7 @@ void sched_destroy_group(struct task_group *tg) */ void sched_move_task(struct task_struct *tsk) { + struct task_group *tg; int on_rq, running; unsigned long flags; struct rq *rq; @@ -8606,6 +9104,12 @@ void sched_move_task(struct task_struct *tsk) if (unlikely(running)) tsk->sched_class->put_prev_task(rq, tsk); + tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id, + lockdep_is_held(&tsk->sighand->siglock)), + struct task_group, css); + tg = autogroup_task_group(tsk, tg); + tsk->sched_task_group = tg; + #ifdef CONFIG_FAIR_GROUP_SCHED if (tsk->sched_class->task_move_group) tsk->sched_class->task_move_group(tsk, on_rq); @@ -8666,12 +9170,7 @@ unsigned long sched_group_shares(struct task_group *tg) } #endif -#ifdef CONFIG_RT_GROUP_SCHED -/* - * Ensure that the real time constraints are schedulable. - */ -static DEFINE_MUTEX(rt_constraints_mutex); - +#if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH) static unsigned long to_ratio(u64 period, u64 runtime) { if (runtime == RUNTIME_INF) @@ -8679,6 +9178,13 @@ static unsigned long to_ratio(u64 period, u64 runtime) return div64_u64(runtime << 20, period); } +#endif + +#ifdef CONFIG_RT_GROUP_SCHED +/* + * Ensure that the real time constraints are schedulable. + */ +static DEFINE_MUTEX(rt_constraints_mutex); /* Must be called with tasklist_lock held */ static inline int tg_has_rt_tasks(struct task_group *tg) @@ -8699,7 +9205,7 @@ struct rt_schedulable_data { u64 rt_runtime; }; -static int tg_schedulable(struct task_group *tg, void *data) +static int tg_rt_schedulable(struct task_group *tg, void *data) { struct rt_schedulable_data *d = data; struct task_group *child; @@ -8757,16 +9263,22 @@ static int tg_schedulable(struct task_group *tg, void *data) static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) { + int ret; + struct rt_schedulable_data data = { .tg = tg, .rt_period = period, .rt_runtime = runtime, }; - return walk_tg_tree(tg_schedulable, tg_nop, &data); + rcu_read_lock(); + ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data); + rcu_read_unlock(); + + return ret; } -static int tg_set_bandwidth(struct task_group *tg, +static int tg_set_rt_bandwidth(struct task_group *tg, u64 rt_period, u64 rt_runtime) { int i, err = 0; @@ -8805,7 +9317,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) if (rt_runtime_us < 0) rt_runtime = RUNTIME_INF; - return tg_set_bandwidth(tg, rt_period, rt_runtime); + return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); } long sched_group_rt_runtime(struct task_group *tg) @@ -8830,7 +9342,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) if (rt_period == 0) return -EINVAL; - return tg_set_bandwidth(tg, rt_period, rt_runtime); + return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); } long sched_group_rt_period(struct task_group *tg) @@ -9034,6 +9546,238 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) return (u64) scale_load_down(tg->shares); } + +#ifdef CONFIG_CFS_BANDWIDTH +static DEFINE_MUTEX(cfs_constraints_mutex); + +const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ +const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ + +static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); + +static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) +{ + int i, ret = 0, runtime_enabled; + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); + + if (tg == &root_task_group) + return -EINVAL; + + /* + * Ensure we have at some amount of bandwidth every period. This is + * to prevent reaching a state of large arrears when throttled via + * entity_tick() resulting in prolonged exit starvation. + */ + if (quota < min_cfs_quota_period || period < min_cfs_quota_period) + return -EINVAL; + + /* + * Likewise, bound things on the otherside by preventing insane quota + * periods. This also allows us to normalize in computing quota + * feasibility. + */ + if (period > max_cfs_quota_period) + return -EINVAL; + + mutex_lock(&cfs_constraints_mutex); + ret = __cfs_schedulable(tg, period, quota); + if (ret) + goto out_unlock; + + runtime_enabled = quota != RUNTIME_INF; + raw_spin_lock_irq(&cfs_b->lock); + cfs_b->period = ns_to_ktime(period); + cfs_b->quota = quota; + + __refill_cfs_bandwidth_runtime(cfs_b); + /* restart the period timer (if active) to handle new period expiry */ + if (runtime_enabled && cfs_b->timer_active) { + /* force a reprogram */ + cfs_b->timer_active = 0; + __start_cfs_bandwidth(cfs_b); + } + raw_spin_unlock_irq(&cfs_b->lock); + + for_each_possible_cpu(i) { + struct cfs_rq *cfs_rq = tg->cfs_rq[i]; + struct rq *rq = rq_of(cfs_rq); + + raw_spin_lock_irq(&rq->lock); + cfs_rq->runtime_enabled = runtime_enabled; + cfs_rq->runtime_remaining = 0; + + if (cfs_rq_throttled(cfs_rq)) + unthrottle_cfs_rq(cfs_rq); + raw_spin_unlock_irq(&rq->lock); + } +out_unlock: + mutex_unlock(&cfs_constraints_mutex); + + return ret; +} + +int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) +{ + u64 quota, period; + + period = ktime_to_ns(tg_cfs_bandwidth(tg)->period); + if (cfs_quota_us < 0) + quota = RUNTIME_INF; + else + quota = (u64)cfs_quota_us * NSEC_PER_USEC; + + return tg_set_cfs_bandwidth(tg, period, quota); +} + +long tg_get_cfs_quota(struct task_group *tg) +{ + u64 quota_us; + + if (tg_cfs_bandwidth(tg)->quota == RUNTIME_INF) + return -1; + + quota_us = tg_cfs_bandwidth(tg)->quota; + do_div(quota_us, NSEC_PER_USEC); + + return quota_us; +} + +int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) +{ + u64 quota, period; + + period = (u64)cfs_period_us * NSEC_PER_USEC; + quota = tg_cfs_bandwidth(tg)->quota; + + if (period <= 0) + return -EINVAL; + + return tg_set_cfs_bandwidth(tg, period, quota); +} + +long tg_get_cfs_period(struct task_group *tg) +{ + u64 cfs_period_us; + + cfs_period_us = ktime_to_ns(tg_cfs_bandwidth(tg)->period); + do_div(cfs_period_us, NSEC_PER_USEC); + + return cfs_period_us; +} + +static s64 cpu_cfs_quota_read_s64(struct cgroup *cgrp, struct cftype *cft) +{ + return tg_get_cfs_quota(cgroup_tg(cgrp)); +} + +static int cpu_cfs_quota_write_s64(struct cgroup *cgrp, struct cftype *cftype, + s64 cfs_quota_us) +{ + return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us); +} + +static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft) +{ + return tg_get_cfs_period(cgroup_tg(cgrp)); +} + +static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype, + u64 cfs_period_us) +{ + return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us); +} + +struct cfs_schedulable_data { + struct task_group *tg; + u64 period, quota; +}; + +/* + * normalize group quota/period to be quota/max_period + * note: units are usecs + */ +static u64 normalize_cfs_quota(struct task_group *tg, + struct cfs_schedulable_data *d) +{ + u64 quota, period; + + if (tg == d->tg) { + period = d->period; + quota = d->quota; + } else { + period = tg_get_cfs_period(tg); + quota = tg_get_cfs_quota(tg); + } + + /* note: these should typically be equivalent */ + if (quota == RUNTIME_INF || quota == -1) + return RUNTIME_INF; + + return to_ratio(period, quota); +} + +static int tg_cfs_schedulable_down(struct task_group *tg, void *data) +{ + struct cfs_schedulable_data *d = data; + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); + s64 quota = 0, parent_quota = -1; + + if (!tg->parent) { + quota = RUNTIME_INF; + } else { + struct cfs_bandwidth *parent_b = tg_cfs_bandwidth(tg->parent); + + quota = normalize_cfs_quota(tg, d); + parent_quota = parent_b->hierarchal_quota; + + /* + * ensure max(child_quota) <= parent_quota, inherit when no + * limit is set + */ + if (quota == RUNTIME_INF) + quota = parent_quota; + else if (parent_quota != RUNTIME_INF && quota > parent_quota) + return -EINVAL; + } + cfs_b->hierarchal_quota = quota; + + return 0; +} + +static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) +{ + int ret; + struct cfs_schedulable_data data = { + .tg = tg, + .period = period, + .quota = quota, + }; + + if (quota != RUNTIME_INF) { + do_div(data.period, NSEC_PER_USEC); + do_div(data.quota, NSEC_PER_USEC); + } + + rcu_read_lock(); + ret = walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data); + rcu_read_unlock(); + + return ret; +} + +static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft, + struct cgroup_map_cb *cb) +{ + struct task_group *tg = cgroup_tg(cgrp); + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); + + cb->fill(cb, "nr_periods", cfs_b->nr_periods); + cb->fill(cb, "nr_throttled", cfs_b->nr_throttled); + cb->fill(cb, "throttled_time", cfs_b->throttled_time); + + return 0; +} +#endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_RT_GROUP_SCHED @@ -9068,6 +9812,22 @@ static struct cftype cpu_files[] = { .write_u64 = cpu_shares_write_u64, }, #endif +#ifdef CONFIG_CFS_BANDWIDTH + { + .name = "cfs_quota_us", + .read_s64 = cpu_cfs_quota_read_s64, + .write_s64 = cpu_cfs_quota_write_s64, + }, + { + .name = "cfs_period_us", + .read_u64 = cpu_cfs_period_read_u64, + .write_u64 = cpu_cfs_period_write_u64, + }, + { + .name = "stat", + .read_map = cpu_stats_show, + }, +#endif #ifdef CONFIG_RT_GROUP_SCHED { .name = "rt_runtime_us", @@ -9446,4 +10206,3 @@ struct cgroup_subsys cpuacct_subsys = { .subsys_id = cpuacct_subsys_id, }; #endif /* CONFIG_CGROUP_CPUACCT */ - diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 9d8af0b3fb6..63ce8c3d05c 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -74,7 +74,7 @@ */ unsigned long long __attribute__((weak)) sched_clock(void) { - return (unsigned long long)(jiffies - INITIAL_JIFFIES) + return (unsigned long long)(get_jiffies_64() - INITIAL_JIFFIES) * (NSEC_PER_SEC / HZ); } EXPORT_SYMBOL_GPL(sched_clock); diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index a6710a112b4..6371af0e461 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -264,6 +264,9 @@ static void print_cpu(struct seq_file *m, int cpu) SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) P(nr_running); + SEQ_printf(m, " .%-30s: %d.%03d \n", "ave_nr_running", + rq->ave_nr_running / FIXED_1, + ((rq->ave_nr_running % FIXED_1) * 1000) / FIXED_1); SEQ_printf(m, " .%-30s: %lu\n", "load", rq->load.weight); P(nr_switches); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index bc8ee999381..2bc9fb5dde3 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -36,8 +36,8 @@ * (to see the precise effective timeslice length of your workload, * run vmstat and monitor the context-switches (cs) field) */ -unsigned int sysctl_sched_latency = 6000000ULL; -unsigned int normalized_sysctl_sched_latency = 6000000ULL; +unsigned int sysctl_sched_latency = 4000000ULL; +unsigned int normalized_sysctl_sched_latency = 4000000ULL; /* * The initial- and re-scaling of tunables is configurable @@ -77,8 +77,8 @@ unsigned int sysctl_sched_child_runs_first __read_mostly; * and reduces their over-scheduling. Synchronous workloads will still * have immediate wakeup/sleep latencies. */ -unsigned int sysctl_sched_wakeup_granularity = 1000000UL; -unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; +unsigned int sysctl_sched_wakeup_granularity = 4000000UL; +unsigned int normalized_sysctl_sched_wakeup_granularity = 4000000UL; const_debug unsigned int sysctl_sched_migration_cost = 500000UL; @@ -89,6 +89,20 @@ const_debug unsigned int sysctl_sched_migration_cost = 500000UL; */ unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; +#ifdef CONFIG_CFS_BANDWIDTH +/* + * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool + * each time a cfs_rq requests quota. + * + * Note: in the case that the slice exceeds the runtime remaining (either due + * to consumption or the quota being specified to be smaller than the slice) + * we will always only issue the remaining available time. + * + * default: 5 msec, units: microseconds + */ +unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; +#endif + static const struct sched_class fair_sched_class; /************************************************************** @@ -292,6 +306,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) #endif /* CONFIG_FAIR_GROUP_SCHED */ +static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, + unsigned long delta_exec); /************************************************************** * Scheduling class tree data structure manipulation methods: @@ -583,6 +599,8 @@ static void update_curr(struct cfs_rq *cfs_rq) cpuacct_charge(curtask, delta_exec); account_group_exec_runtime(curtask, delta_exec); } + + account_cfs_rq_runtime(cfs_rq, delta_exec); } static inline void @@ -688,6 +706,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) } #ifdef CONFIG_FAIR_GROUP_SCHED +/* we need this in update_cfs_load and load-balance functions below */ +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq); # ifdef CONFIG_SMP static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, int global_update) @@ -710,7 +730,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) u64 now, delta; unsigned long load = cfs_rq->load.weight; - if (cfs_rq->tg == &root_task_group) + if (cfs_rq->tg == &root_task_group || throttled_hierarchy(cfs_rq)) return; now = rq_of(cfs_rq)->clock_task; @@ -752,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) list_del_leaf_cfs_rq(cfs_rq); } +static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) +{ + long tg_weight; + + /* + * Use this CPU's actual weight instead of the last load_contribution + * to gain a more accurate current total weight. See + * update_cfs_rq_load_contribution(). + */ + tg_weight = atomic_read(&tg->load_weight); + tg_weight -= cfs_rq->load_contribution; + tg_weight += cfs_rq->load.weight; + + return tg_weight; +} + static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) { - long load_weight, load, shares; + long tg_weight, load, shares; + tg_weight = calc_tg_weight(tg, cfs_rq); load = cfs_rq->load.weight; - load_weight = atomic_read(&tg->load_weight); - load_weight += load; - load_weight -= cfs_rq->load_contribution; - shares = (tg->shares * load); - if (load_weight) - shares /= load_weight; + if (tg_weight) + shares /= tg_weight; if (shares < MIN_SHARES) shares = MIN_SHARES; @@ -819,7 +852,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq) tg = cfs_rq->tg; se = tg->se[cpu_of(rq_of(cfs_rq))]; - if (!se) + if (!se || throttled_hierarchy(cfs_rq)) return; #ifndef CONFIG_SMP if (likely(se->load.weight == tg->shares)) @@ -1028,6 +1061,8 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) __clear_buddies_skip(se); } +static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); + static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { @@ -1066,6 +1101,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if (!(flags & DEQUEUE_SLEEP)) se->vruntime -= cfs_rq->min_vruntime; + /* return excess runtime on last dequeue */ + return_cfs_rq_runtime(cfs_rq); + update_min_vruntime(cfs_rq); update_cfs_shares(cfs_rq); } @@ -1077,6 +1115,8 @@ static void check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) { unsigned long ideal_runtime, delta_exec; + struct sched_entity *se; + s64 delta; ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; @@ -1095,22 +1135,17 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) * narrow margin doesn't have to wait for a full slice. * This also mitigates buddy induced latencies under load. */ - if (!sched_feat(WAKEUP_PREEMPT)) - return; - if (delta_exec < sysctl_sched_min_granularity) return; - if (cfs_rq->nr_running > 1) { - struct sched_entity *se = __pick_first_entity(cfs_rq); - s64 delta = curr->vruntime - se->vruntime; + se = __pick_first_entity(cfs_rq); + delta = curr->vruntime - se->vruntime; - if (delta < 0) - return; + if (delta < 0) + return; - if (delta > ideal_runtime) - resched_task(rq_of(cfs_rq)->curr); - } + if (delta > ideal_runtime) + resched_task(rq_of(cfs_rq)->curr); } static void @@ -1233,10 +1268,583 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) return; #endif - if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT)) + if (cfs_rq->nr_running > 1) check_preempt_tick(cfs_rq, curr); } + +/************************************************** + * CFS bandwidth control machinery + */ + +#ifdef CONFIG_CFS_BANDWIDTH +/* + * default period for cfs group bandwidth. + * default: 0.1s, units: nanoseconds + */ +static inline u64 default_cfs_period(void) +{ + return 100000000ULL; +} + +static inline u64 sched_cfs_bandwidth_slice(void) +{ + return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC; +} + +/* + * Replenish runtime according to assigned quota and update expiration time. + * We use sched_clock_cpu directly instead of rq->clock to avoid adding + * additional synchronization around rq->lock. + * + * requires cfs_b->lock + */ +static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) +{ + u64 now; + + if (cfs_b->quota == RUNTIME_INF) + return; + + now = sched_clock_cpu(smp_processor_id()); + cfs_b->runtime = cfs_b->quota; + cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); +} + +/* returns 0 on failure to allocate runtime */ +static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) +{ + struct task_group *tg = cfs_rq->tg; + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); + u64 amount = 0, min_amount, expires; + + /* note: this is a positive sum as runtime_remaining <= 0 */ + min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; + + raw_spin_lock(&cfs_b->lock); + if (cfs_b->quota == RUNTIME_INF) + amount = min_amount; + else { + /* + * If the bandwidth pool has become inactive, then at least one + * period must have elapsed since the last consumption. + * Refresh the global state and ensure bandwidth timer becomes + * active. + */ + if (!cfs_b->timer_active) { + __refill_cfs_bandwidth_runtime(cfs_b); + __start_cfs_bandwidth(cfs_b); + } + + if (cfs_b->runtime > 0) { + amount = min(cfs_b->runtime, min_amount); + cfs_b->runtime -= amount; + cfs_b->idle = 0; + } + } + expires = cfs_b->runtime_expires; + raw_spin_unlock(&cfs_b->lock); + + cfs_rq->runtime_remaining += amount; + /* + * we may have advanced our local expiration to account for allowed + * spread between our sched_clock and the one on which runtime was + * issued. + */ + if ((s64)(expires - cfs_rq->runtime_expires) > 0) + cfs_rq->runtime_expires = expires; + + return cfs_rq->runtime_remaining > 0; +} + +/* + * Note: This depends on the synchronization provided by sched_clock and the + * fact that rq->clock snapshots this value. + */ +static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) +{ + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct rq *rq = rq_of(cfs_rq); + + /* if the deadline is ahead of our clock, nothing to do */ + if (likely((s64)(rq->clock - cfs_rq->runtime_expires) < 0)) + return; + + if (cfs_rq->runtime_remaining < 0) + return; + + /* + * If the local deadline has passed we have to consider the + * possibility that our sched_clock is 'fast' and the global deadline + * has not truly expired. + * + * Fortunately we can check determine whether this the case by checking + * whether the global deadline has advanced. + */ + + if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) { + /* extend local deadline, drift is bounded above by 2 ticks */ + cfs_rq->runtime_expires += TICK_NSEC; + } else { + /* global deadline is ahead, expiration has passed */ + cfs_rq->runtime_remaining = 0; + } +} + +static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, + unsigned long delta_exec) +{ + /* dock delta_exec before expiring quota (as it could span periods) */ + cfs_rq->runtime_remaining -= delta_exec; + expire_cfs_rq_runtime(cfs_rq); + + if (likely(cfs_rq->runtime_remaining > 0)) + return; + + /* + * if we're unable to extend our runtime we resched so that the active + * hierarchy can be throttled + */ + if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) + resched_task(rq_of(cfs_rq)->curr); +} + +static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, + unsigned long delta_exec) +{ + if (!cfs_rq->runtime_enabled) + return; + + __account_cfs_rq_runtime(cfs_rq, delta_exec); +} + +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) +{ + return cfs_rq->throttled; +} + +/* check whether cfs_rq, or any parent, is throttled */ +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq) +{ + return cfs_rq->throttle_count; +} + +/* + * Ensure that neither of the group entities corresponding to src_cpu or + * dest_cpu are members of a throttled hierarchy when performing group + * load-balance operations. + */ +static inline int throttled_lb_pair(struct task_group *tg, + int src_cpu, int dest_cpu) +{ + struct cfs_rq *src_cfs_rq, *dest_cfs_rq; + + src_cfs_rq = tg->cfs_rq[src_cpu]; + dest_cfs_rq = tg->cfs_rq[dest_cpu]; + + return throttled_hierarchy(src_cfs_rq) || + throttled_hierarchy(dest_cfs_rq); +} + +/* updated child weight may affect parent so we have to do this bottom up */ +static int tg_unthrottle_up(struct task_group *tg, void *data) +{ + struct rq *rq = data; + struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; + + cfs_rq->throttle_count--; +#ifdef CONFIG_SMP + if (!cfs_rq->throttle_count) { + u64 delta = rq->clock_task - cfs_rq->load_stamp; + + /* leaving throttled state, advance shares averaging windows */ + cfs_rq->load_stamp += delta; + cfs_rq->load_last += delta; + + /* update entity weight now that we are on_rq again */ + update_cfs_shares(cfs_rq); + } +#endif + + return 0; +} + +static int tg_throttle_down(struct task_group *tg, void *data) +{ + struct rq *rq = data; + struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; + + /* group is entering throttled state, record last load */ + if (!cfs_rq->throttle_count) + update_cfs_load(cfs_rq, 0); + cfs_rq->throttle_count++; + + return 0; +} + +static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) +{ + struct rq *rq = rq_of(cfs_rq); + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct sched_entity *se; + long task_delta, dequeue = 1; + + se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; + + /* account load preceding throttle */ + rcu_read_lock(); + walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq); + rcu_read_unlock(); + + task_delta = cfs_rq->h_nr_running; + for_each_sched_entity(se) { + struct cfs_rq *qcfs_rq = cfs_rq_of(se); + /* throttled entity or throttle-on-deactivate */ + if (!se->on_rq) + break; + + if (dequeue) + dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP); + qcfs_rq->h_nr_running -= task_delta; + + if (qcfs_rq->load.weight) + dequeue = 0; + } + + if (!se) + rq->nr_running -= task_delta; + + cfs_rq->throttled = 1; + cfs_rq->throttled_timestamp = rq->clock; + raw_spin_lock(&cfs_b->lock); + list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + raw_spin_unlock(&cfs_b->lock); +} + +static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) +{ + struct rq *rq = rq_of(cfs_rq); + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct sched_entity *se; + int enqueue = 1; + long task_delta; + + se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; + + cfs_rq->throttled = 0; + raw_spin_lock(&cfs_b->lock); + cfs_b->throttled_time += rq->clock - cfs_rq->throttled_timestamp; + list_del_rcu(&cfs_rq->throttled_list); + raw_spin_unlock(&cfs_b->lock); + cfs_rq->throttled_timestamp = 0; + + update_rq_clock(rq); + /* update hierarchical throttle state */ + walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq); + + if (!cfs_rq->load.weight) + return; + + task_delta = cfs_rq->h_nr_running; + for_each_sched_entity(se) { + if (se->on_rq) + enqueue = 0; + + cfs_rq = cfs_rq_of(se); + if (enqueue) + enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); + cfs_rq->h_nr_running += task_delta; + + if (cfs_rq_throttled(cfs_rq)) + break; + } + + if (!se) + rq->nr_running += task_delta; + + /* determine whether we need to wake up potentially idle cpu */ + if (rq->curr == rq->idle && rq->cfs.nr_running) + resched_task(rq->curr); +} + +static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, + u64 remaining, u64 expires) +{ + struct cfs_rq *cfs_rq; + u64 runtime = remaining; + + rcu_read_lock(); + list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, + throttled_list) { + struct rq *rq = rq_of(cfs_rq); + + raw_spin_lock(&rq->lock); + if (!cfs_rq_throttled(cfs_rq)) + goto next; + + runtime = -cfs_rq->runtime_remaining + 1; + if (runtime > remaining) + runtime = remaining; + remaining -= runtime; + + cfs_rq->runtime_remaining += runtime; + cfs_rq->runtime_expires = expires; + + /* we check whether we're throttled above */ + if (cfs_rq->runtime_remaining > 0) + unthrottle_cfs_rq(cfs_rq); + +next: + raw_spin_unlock(&rq->lock); + + if (!remaining) + break; + } + rcu_read_unlock(); + + return remaining; +} + +/* + * Responsible for refilling a task_group's bandwidth and unthrottling its + * cfs_rqs as appropriate. If there has been no activity within the last + * period the timer is deactivated until scheduling resumes; cfs_b->idle is + * used to track this state. + */ +static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) +{ + u64 runtime, runtime_expires; + int idle = 1, throttled; + + raw_spin_lock(&cfs_b->lock); + /* no need to continue the timer with no bandwidth constraint */ + if (cfs_b->quota == RUNTIME_INF) + goto out_unlock; + + throttled = !list_empty(&cfs_b->throttled_cfs_rq); + /* idle depends on !throttled (for the case of a large deficit) */ + idle = cfs_b->idle && !throttled; + cfs_b->nr_periods += overrun; + + /* if we're going inactive then everything else can be deferred */ + if (idle) + goto out_unlock; + + __refill_cfs_bandwidth_runtime(cfs_b); + + if (!throttled) { + /* mark as potentially idle for the upcoming period */ + cfs_b->idle = 1; + goto out_unlock; + } + + /* account preceding periods in which throttling occurred */ + cfs_b->nr_throttled += overrun; + + /* + * There are throttled entities so we must first use the new bandwidth + * to unthrottle them before making it generally available. This + * ensures that all existing debts will be paid before a new cfs_rq is + * allowed to run. + */ + runtime = cfs_b->runtime; + runtime_expires = cfs_b->runtime_expires; + cfs_b->runtime = 0; + + /* + * This check is repeated as we are holding onto the new bandwidth + * while we unthrottle. This can potentially race with an unthrottled + * group trying to acquire new bandwidth from the global pool. + */ + while (throttled && runtime > 0) { + raw_spin_unlock(&cfs_b->lock); + /* we can't nest cfs_b->lock while distributing bandwidth */ + runtime = distribute_cfs_runtime(cfs_b, runtime, + runtime_expires); + raw_spin_lock(&cfs_b->lock); + + throttled = !list_empty(&cfs_b->throttled_cfs_rq); + } + + /* return (any) remaining runtime */ + cfs_b->runtime = runtime; + /* + * While we are ensured activity in the period following an + * unthrottle, this also covers the case in which the new bandwidth is + * insufficient to cover the existing bandwidth deficit. (Forcing the + * timer to remain active while there are any throttled entities.) + */ + cfs_b->idle = 0; +out_unlock: + if (idle) + cfs_b->timer_active = 0; + raw_spin_unlock(&cfs_b->lock); + + return idle; +} + +/* a cfs_rq won't donate quota below this amount */ +static const u64 min_cfs_rq_runtime = 1 * NSEC_PER_MSEC; +/* minimum remaining period time to redistribute slack quota */ +static const u64 min_bandwidth_expiration = 2 * NSEC_PER_MSEC; +/* how long we wait to gather additional slack before distributing */ +static const u64 cfs_bandwidth_slack_period = 5 * NSEC_PER_MSEC; + +/* are we near the end of the current quota period? */ +static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire) +{ + struct hrtimer *refresh_timer = &cfs_b->period_timer; + u64 remaining; + + /* if the call-back is running a quota refresh is already occurring */ + if (hrtimer_callback_running(refresh_timer)) + return 1; + + /* is a quota refresh about to occur? */ + remaining = ktime_to_ns(hrtimer_expires_remaining(refresh_timer)); + if (remaining < min_expire) + return 1; + + return 0; +} + +static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b) +{ + u64 min_left = cfs_bandwidth_slack_period + min_bandwidth_expiration; + + /* if there's a quota refresh soon don't bother with slack */ + if (runtime_refresh_within(cfs_b, min_left)) + return; + + start_bandwidth_timer(&cfs_b->slack_timer, + ns_to_ktime(cfs_bandwidth_slack_period)); +} + +/* we know any runtime found here is valid as update_curr() precedes return */ +static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) +{ + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + s64 slack_runtime = cfs_rq->runtime_remaining - min_cfs_rq_runtime; + + if (slack_runtime <= 0) + return; + + raw_spin_lock(&cfs_b->lock); + if (cfs_b->quota != RUNTIME_INF && + cfs_rq->runtime_expires == cfs_b->runtime_expires) { + cfs_b->runtime += slack_runtime; + + /* we are under rq->lock, defer unthrottling using a timer */ + if (cfs_b->runtime > sched_cfs_bandwidth_slice() && + !list_empty(&cfs_b->throttled_cfs_rq)) + start_cfs_slack_bandwidth(cfs_b); + } + raw_spin_unlock(&cfs_b->lock); + + /* even if it's not valid for return we don't want to try again */ + cfs_rq->runtime_remaining -= slack_runtime; +} + +static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) +{ + if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) + return; + + __return_cfs_rq_runtime(cfs_rq); +} + +/* + * This is done with a timer (instead of inline with bandwidth return) since + * it's necessary to juggle rq->locks to unthrottle their respective cfs_rqs. + */ +static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) +{ + u64 runtime = 0, slice = sched_cfs_bandwidth_slice(); + u64 expires; + + /* confirm we're still not at a refresh boundary */ + if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) + return; + + raw_spin_lock(&cfs_b->lock); + if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) { + runtime = cfs_b->runtime; + cfs_b->runtime = 0; + } + expires = cfs_b->runtime_expires; + raw_spin_unlock(&cfs_b->lock); + + if (!runtime) + return; + + runtime = distribute_cfs_runtime(cfs_b, runtime, expires); + + raw_spin_lock(&cfs_b->lock); + if (expires == cfs_b->runtime_expires) + cfs_b->runtime = runtime; + raw_spin_unlock(&cfs_b->lock); +} + +/* + * When a group wakes up we want to make sure that its quota is not already + * expired/exceeded, otherwise it may be allowed to steal additional ticks of + * runtime as update_curr() throttling can not not trigger until it's on-rq. + */ +static void check_enqueue_throttle(struct cfs_rq *cfs_rq) +{ + /* an active group must be handled by the update_curr()->put() path */ + if (!cfs_rq->runtime_enabled || cfs_rq->curr) + return; + + /* ensure the group is not already throttled */ + if (cfs_rq_throttled(cfs_rq)) + return; + + /* update runtime allocation */ + account_cfs_rq_runtime(cfs_rq, 0); + if (cfs_rq->runtime_remaining <= 0) + throttle_cfs_rq(cfs_rq); +} + +/* conditionally throttle active cfs_rq's from put_prev_entity() */ +static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) +{ + if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0)) + return; + + /* + * it's possible for a throttled entity to be forced into a running + * state (e.g. set_curr_task), in this case we're finished. + */ + if (cfs_rq_throttled(cfs_rq)) + return; + + throttle_cfs_rq(cfs_rq); +} +#else +static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, + unsigned long delta_exec) {} +static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} +static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} +static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} + +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) +{ + return 0; +} + +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq) +{ + return 0; +} + +static inline int throttled_lb_pair(struct task_group *tg, + int src_cpu, int dest_cpu) +{ + return 0; +} +#endif + /************************************************** * CFS operations on tasks: */ @@ -1313,16 +1921,33 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) break; cfs_rq = cfs_rq_of(se); enqueue_entity(cfs_rq, se, flags); + + /* + * end evaluation on encountering a throttled cfs_rq + * + * note: in the case of encountering a throttled cfs_rq we will + * post the final h_nr_running increment below. + */ + if (cfs_rq_throttled(cfs_rq)) + break; + cfs_rq->h_nr_running++; + flags = ENQUEUE_WAKEUP; } for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); + cfs_rq->h_nr_running++; + + if (cfs_rq_throttled(cfs_rq)) + break; update_cfs_load(cfs_rq, 0); update_cfs_shares(cfs_rq); } + if (!se) + inc_nr_running(rq); hrtick_update(rq); } @@ -1343,6 +1968,16 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); + /* + * end evaluation on encountering a throttled cfs_rq + * + * note: in the case of encountering a throttled cfs_rq we will + * post the final h_nr_running decrement below. + */ + if (cfs_rq_throttled(cfs_rq)) + break; + cfs_rq->h_nr_running--; + /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) { /* @@ -1361,11 +1996,17 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); + cfs_rq->h_nr_running--; + + if (cfs_rq_throttled(cfs_rq)) + break; update_cfs_load(cfs_rq, 0); update_cfs_shares(cfs_rq); } + if (!se) + dec_nr_running(rq); hrtick_update(rq); } @@ -1399,42 +2040,105 @@ static void task_waking_fair(struct task_struct *p) * Adding load to a group doesn't make a group heavier, but can cause movement * of group shares between cpus. Assuming the shares were perfectly aligned one * can calculate the shift in shares. + * + * Calculate the effective load difference if @wl is added (subtracted) to @tg + * on this @cpu and results in a total addition (subtraction) of @wg to the + * total group weight. + * + * Given a runqueue weight distribution (rw_i) we can compute a shares + * distribution (s_i) using: + * + * s_i = rw_i / \Sum rw_j (1) + * + * Suppose we have 4 CPUs and our @tg is a direct child of the root group and + * has 7 equal weight tasks, distributed as below (rw_i), with the resulting + * shares distribution (s_i): + * + * rw_i = { 2, 4, 1, 0 } + * s_i = { 2/7, 4/7, 1/7, 0 } + * + * As per wake_affine() we're interested in the load of two CPUs (the CPU the + * task used to run on and the CPU the waker is running on), we need to + * compute the effect of waking a task on either CPU and, in case of a sync + * wakeup, compute the effect of the current task going to sleep. + * + * So for a change of @wl to the local @cpu with an overall group weight change + * of @wl we can compute the new shares distribution (s'_i) using: + * + * s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2) + * + * Suppose we're interested in CPUs 0 and 1, and want to compute the load + * differences in waking a task to CPU 0. The additional task changes the + * weight and shares distributions like: + * + * rw'_i = { 3, 4, 1, 0 } + * s'_i = { 3/8, 4/8, 1/8, 0 } + * + * We can then compute the difference in effective weight by using: + * + * dw_i = S * (s'_i - s_i) (3) + * + * Where 'S' is the group weight as seen by its parent. + * + * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7) + * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 - + * 4/7) times the weight of the group. */ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) { struct sched_entity *se = tg->se[cpu]; - if (!tg->parent) + if (!tg->parent) /* the trivial, non-cgroup case */ return wl; for_each_sched_entity(se) { - long lw, w; + long w, W; tg = se->my_q->tg; - w = se->my_q->load.weight; - /* use this cpu's instantaneous contribution */ - lw = atomic_read(&tg->load_weight); - lw -= se->my_q->load_contribution; - lw += w + wg; + /* + * W = @wg + \Sum rw_j + */ + W = wg + calc_tg_weight(tg, se->my_q); - wl += w; + /* + * w = rw_i + @wl + */ + w = se->my_q->load.weight + wl; - if (lw > 0 && wl < lw) - wl = (wl * tg->shares) / lw; + /* + * wl = S * s'_i; see (2) + */ + if (W > 0 && w < W) + wl = (w * tg->shares) / W; else wl = tg->shares; - /* zero point is MIN_SHARES */ + /* + * Per the above, wl is the new se->load.weight value; since + * those are clipped to [MIN_SHARES, ...) do so now. See + * calc_cfs_shares(). + */ if (wl < MIN_SHARES) wl = MIN_SHARES; + + /* + * wl = dw_i = S * (s'_i - s_i); see (3) + */ wl -= se->load.weight; + + /* + * Recursively apply this logic to all parent groups to compute + * the final effective load change on the root group. Since + * only the @tg group gets extra weight, all parent groups can + * only redistribute existing shares. @wl is the shift in shares + * resulting from this level per the above. + */ wg = 0; } return wl; } - #else static inline unsigned long effective_load(struct task_group *tg, int cpu, @@ -1547,7 +2251,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, /* Skip over this group if it has no CPUs allowed */ if (!cpumask_intersects(sched_group_cpus(group), - &p->cpus_allowed)) + tsk_cpus_allowed(p))) continue; local_group = cpumask_test_cpu(this_cpu, @@ -1593,7 +2297,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) int i; /* Traverse only the allowed CPUs */ - for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { + for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) { load = weighted_cpuload(i); if (load < min_load || (load == min_load && i == this_cpu)) { @@ -1637,7 +2341,7 @@ static int select_idle_sibling(struct task_struct *p, int target) if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) break; - for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { + for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) { if (idle_cpu(i)) { target = i; break; @@ -1680,7 +2384,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) int sync = wake_flags & WF_SYNC; if (sd_flag & SD_BALANCE_WAKE) { - if (cpumask_test_cpu(cpu, &p->cpus_allowed)) + if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) want_affine = 1; new_cpu = prev_cpu; } @@ -1899,10 +2603,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (unlikely(p->policy != SCHED_NORMAL)) return; - - if (!sched_feat(WAKEUP_PREEMPT)) - return; - find_matching_se(&se, &pse); update_curr(cfs_rq_of(se)); BUG_ON(!pse); @@ -2049,7 +2749,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, * 2) cannot be migrated to this CPU due to cpus_allowed, or * 3) are cache-hot on their current CPU. */ - if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) { + if (!cpumask_test_cpu(this_cpu, tsk_cpus_allowed(p))) { schedstat_inc(p, se.statistics.nr_failed_migrations_affine); return 0; } @@ -2102,6 +2802,9 @@ move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, for_each_leaf_cfs_rq(busiest, cfs_rq) { list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) { + if (throttled_lb_pair(task_group(p), + busiest->cpu, this_cpu)) + break; if (!can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) @@ -2217,8 +2920,13 @@ static void update_shares(int cpu) * Iterates the task_group tree in a bottom up fashion, see * list_add_leaf_cfs_rq() for details. */ - for_each_leaf_cfs_rq(rq, cfs_rq) + for_each_leaf_cfs_rq(rq, cfs_rq) { + /* throttled entities do not contribute to load */ + if (throttled_hierarchy(cfs_rq)) + continue; + update_shares_cpu(cfs_rq->tg, cpu); + } rcu_read_unlock(); } @@ -2268,9 +2976,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, u64 rem_load, moved_load; /* - * empty group + * empty group or part of a throttled hierarchy */ - if (!busiest_cfs_rq->task_weight) + if (!busiest_cfs_rq->task_weight || + throttled_lb_pair(busiest_cfs_rq->tg, cpu_of(busiest), this_cpu)) continue; rem_load = (u64)rem_load_move * busiest_weight; @@ -2727,7 +3436,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, int local_group, const struct cpumask *cpus, int *balance, struct sg_lb_stats *sgs) { - unsigned long load, max_cpu_load, min_cpu_load, max_nr_running; + unsigned long nr_running, max_nr_running, min_nr_running; + unsigned long load, max_cpu_load, min_cpu_load; int i; unsigned int balance_cpu = -1, first_idle_cpu = 0; unsigned long avg_load_per_task = 0; @@ -2739,10 +3449,13 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, max_cpu_load = 0; min_cpu_load = ~0UL; max_nr_running = 0; + min_nr_running = ~0UL; for_each_cpu_and(i, sched_group_cpus(group), cpus) { struct rq *rq = cpu_rq(i); + nr_running = rq->nr_running; + /* Bias balancing toward cpus of our domain */ if (local_group) { if (idle_cpu(i) && !first_idle_cpu) { @@ -2753,16 +3466,19 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, load = target_load(i, load_idx); } else { load = source_load(i, load_idx); - if (load > max_cpu_load) { + if (load > max_cpu_load) max_cpu_load = load; - max_nr_running = rq->nr_running; - } if (min_cpu_load > load) min_cpu_load = load; + + if (nr_running > max_nr_running) + max_nr_running = nr_running; + if (min_nr_running > nr_running) + min_nr_running = nr_running; } sgs->group_load += load; - sgs->sum_nr_running += rq->nr_running; + sgs->sum_nr_running += nr_running; sgs->sum_weighted_load += weighted_cpuload(i); if (idle_cpu(i)) sgs->idle_cpus++; @@ -2797,7 +3513,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, if (sgs->sum_nr_running) avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; - if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) + if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && + (max_nr_running - min_nr_running) > 1) sgs->group_imb = 1; sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power, @@ -2854,7 +3571,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd, } /** - * update_sd_lb_stats - Update sched_group's statistics for load balancing. + * update_sd_lb_stats - Update sched_domain's statistics for load balancing. * @sd: sched_domain whose statistics are to be updated. * @this_cpu: Cpu for which load balance is currently performed. * @idle: Idle status of this_cpu @@ -3430,7 +4147,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, * moved to this_cpu */ if (!cpumask_test_cpu(this_cpu, - &busiest->curr->cpus_allowed)) { + tsk_cpus_allowed(busiest->curr))) { raw_spin_unlock_irqrestore(&busiest->lock, flags); all_pinned = 1; @@ -3612,22 +4329,6 @@ static int active_load_balance_cpu_stop(void *data) } #ifdef CONFIG_NO_HZ - -static DEFINE_PER_CPU(struct call_single_data, remote_sched_softirq_cb); - -static void trigger_sched_softirq(void *data) -{ - raise_softirq_irqoff(SCHED_SOFTIRQ); -} - -static inline void init_sched_softirq_csd(struct call_single_data *csd) -{ - csd->func = trigger_sched_softirq; - csd->info = NULL; - csd->flags = 0; - csd->priv = 0; -} - /* * idle load balancing details * - One of the idle CPUs nominates itself as idle load_balancer, while @@ -3667,7 +4368,7 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) struct sched_domain *sd; for_each_domain(cpu, sd) - if (sd && (sd->flags & flag)) + if (sd->flags & flag) break; return sd; @@ -3793,11 +4494,16 @@ static void nohz_balancer_kick(int cpu) } if (!cpu_rq(ilb_cpu)->nohz_balance_kick) { - struct call_single_data *cp; - cpu_rq(ilb_cpu)->nohz_balance_kick = 1; - cp = &per_cpu(remote_sched_softirq_cb, cpu); - __smp_call_function_single(ilb_cpu, cp, 0); + + smp_mb(); + /* + * Use smp_send_reschedule() instead of resched_cpu(). + * This way we generate a sched IPI on the target cpu which + * is idle. And the softirq performing nohz idle load balance + * will be run before returning from the IPI. + */ + smp_send_reschedule(ilb_cpu); } return; } @@ -3967,6 +4673,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) } #ifdef CONFIG_NO_HZ +extern void update_idle_cpu_load(struct rq *this_rq); + /* * In CONFIG_NO_HZ case, the idle balance kickee will do the * rebalancing for all the cpus for whom scheduler ticks are stopped. @@ -3996,7 +4704,7 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) raw_spin_lock_irq(&this_rq->lock); update_rq_clock(this_rq); - update_cpu_load(this_rq); + update_idle_cpu_load(this_rq); raw_spin_unlock_irq(&this_rq->lock); rebalance_domains(balance_cpu, CPU_IDLE); @@ -4030,7 +4738,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) if (time_before(now, nohz.next_balance)) return 0; - if (rq->idle_at_tick) + if (idle_cpu(cpu)) return 0; first_pick_cpu = atomic_read(&nohz.first_pick_cpu); @@ -4066,7 +4774,7 @@ static void run_rebalance_domains(struct softirq_action *h) { int this_cpu = smp_processor_id(); struct rq *this_rq = cpu_rq(this_cpu); - enum cpu_idle_type idle = this_rq->idle_at_tick ? + enum cpu_idle_type idle = this_rq->idle_balance ? CPU_IDLE : CPU_NOT_IDLE; rebalance_domains(this_cpu, idle); @@ -4251,8 +4959,13 @@ static void set_curr_task_fair(struct rq *rq) { struct sched_entity *se = &rq->curr->se; - for_each_sched_entity(se) - set_next_entity(cfs_rq_of(se), se); + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + set_next_entity(cfs_rq, se); + /* ensure bandwidth has been allocated on our new cfs_rq */ + account_cfs_rq_runtime(cfs_rq, 0); + } } #ifdef CONFIG_FAIR_GROUP_SCHED diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 2e74677cb04..f36224da1bb 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -3,7 +3,7 @@ * them to run sooner, but does not allow tons of sleepers to * rip the spread apart. */ -SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1) +SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 0) /* * Place new tasks ahead so that they do not starve already running @@ -11,11 +11,6 @@ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1) */ SCHED_FEAT(START_DEBIT, 1) -/* - * Should wakeups try to preempt running tasks. - */ -SCHED_FEAT(WAKEUP_PREEMPT, 1) - /* * Based on load and program behaviour, see if it makes sense to place * a newly woken task on the same cpu as the task that woke it -- diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 0a51882534e..be92bfe3929 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -23,7 +23,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl static struct task_struct *pick_next_task_idle(struct rq *rq) { schedstat_inc(rq, sched_goidle); - calc_load_account_idle(rq); return rq->idle; } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index af1177858be..e2671631b96 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -497,6 +497,7 @@ static void __disable_runtime(struct rq *rq) * runtime - in which case borrowing doesn't make sense. */ rt_rq->rt_runtime = RUNTIME_INF; + rt_rq->rt_throttled = 0; raw_spin_unlock(&rt_rq->rt_runtime_lock); raw_spin_unlock(&rt_b->rt_runtime_lock); } @@ -961,6 +962,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); + + inc_nr_running(rq); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) @@ -971,6 +974,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) dequeue_rt_entity(rt_se); dequeue_pushable_task(rq, p); + + dec_nr_running(rq); } /* @@ -1198,7 +1203,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && + (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && (p->rt.nr_cpus_allowed > 1)) return 1; return 0; @@ -1343,7 +1348,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) */ if (unlikely(task_rq(task) != rq || !cpumask_test_cpu(lowest_rq->cpu, - &task->cpus_allowed) || + tsk_cpus_allowed(task)) || task_running(rq, task) || !task->on_rq)) { @@ -1626,9 +1631,6 @@ static void set_cpus_allowed_rt(struct task_struct *p, update_rt_migration(&rq->rt); } - - cpumask_copy(&p->cpus_allowed, new_mask); - p->rt.nr_cpus_allowed = weight; } /* Assumes rq->lock is held */ @@ -1863,4 +1865,3 @@ static void print_rt_stats(struct seq_file *m, int cpu) rcu_read_unlock(); } #endif /* CONFIG_SCHED_DEBUG */ - diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 6f437632afa..8b44e7fa7fb 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c @@ -34,11 +34,13 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) static void enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags) { + inc_nr_running(rq); } static void dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags) { + dec_nr_running(rq); } static void yield_task_stop(struct rq *rq) diff --git a/kernel/signal.c b/kernel/signal.c index 195331c56ad..cc243da3acc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -481,6 +481,9 @@ flush_signal_handlers(struct task_struct *t, int force_default) if (force_default || ka->sa.sa_handler != SIG_IGN) ka->sa.sa_handler = SIG_DFL; ka->sa.sa_flags = 0; +#ifdef SA_RESTORER + ka->sa.sa_restorer = NULL; +#endif sigemptyset(&ka->sa.sa_mask); ka++; } @@ -676,23 +679,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) * No need to set need_resched since signal event passing * goes through ->blocked */ -void signal_wake_up(struct task_struct *t, int resume) +void signal_wake_up_state(struct task_struct *t, unsigned int state) { - unsigned int mask; - set_tsk_thread_flag(t, TIF_SIGPENDING); - /* - * For SIGKILL, we want to wake it up in the stopped/traced/killable + * TASK_WAKEKILL also means wake it up in the stopped/traced/killable * case. We don't check t->state here because there is a race with it * executing another processor and just now entering stopped state. * By using wake_up_state, we ensure the process will wake up and * handle its death signal. */ - mask = TASK_INTERRUPTIBLE; - if (resume) - mask |= TASK_WAKEKILL; - if (!wake_up_state(t, mask)) + if (!wake_up_state(t, state | TASK_INTERRUPTIBLE)) kick_process(t); } @@ -1748,6 +1745,10 @@ static inline int may_ptrace_stop(void) * If SIGKILL was already sent before the caller unlocked * ->siglock we must see ->core_state != NULL. Otherwise it * is safe to enter schedule(). + * + * This is almost outdated, a task with the pending SIGKILL can't + * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported + * after SIGKILL was already dequeued. */ if (unlikely(current->mm->core_state) && unlikely(current->mm == current->parent->mm)) @@ -1873,6 +1874,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) if (gstop_done) do_notify_parent_cldstop(current, false, why); + /* tasklist protects us from ptrace_freeze_traced() */ __set_current_state(TASK_RUNNING); if (clear_code) current->exit_code = 0; diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 1ca7b0407c9..e45ffcc9190 100755 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -452,7 +452,7 @@ static int stop_machine_cpu_stop(void *data) is_active = cpu == cpumask_first(cpu_online_mask); else is_active = cpumask_test_cpu(cpu, smdata->active_cpus); - printk("stop_machine_cpu_stop smp=%u\n",cpu); + pr_debug("stop_machine_cpu_stop smp=%u\n",cpu); /* Simple state machine */ do { /* Chill out and ensure we re-read stopmachine_state. */ diff --git a/kernel/sys.c b/kernel/sys.c index f24794e7652..0136e44fea8 100755 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -125,7 +126,10 @@ EXPORT_SYMBOL(cad_pid); void (*pm_power_off_prepare)(void); - extern void disable_auto_hotplug(void); +#ifndef CONFIG_CPUQUIET_FRAMEWORK +extern void disable_auto_hotplug(void); +#endif + /* * Returns true if current's euid is same as p's uid or euid, * or has CAP_SYS_NICE to p's user_ns. @@ -367,7 +371,9 @@ EXPORT_SYMBOL(unregister_reboot_notifier); */ void kernel_restart(char *cmd) { +#ifndef CONFIG_CPUQUIET_FRAMEWORK disable_auto_hotplug(); +#endif kernel_restart_prepare(cmd); if (!cmd) printk(KERN_EMERG "Restarting system.\n"); @@ -418,7 +424,9 @@ void kernel_power_off(void) kernel_restart(cmd); } +#ifndef CONFIG_CPUQUIET_FRAMEWORK disable_auto_hotplug(); +#endif kernel_shutdown_prepare(SYSTEM_POWER_OFF); if (pm_power_off_prepare) pm_power_off_prepare(); @@ -655,6 +663,7 @@ static int set_user(struct cred *new) free_uid(new->user); new->user = new_user; + sched_autogroup_create_attach(current); return 0; } @@ -1164,7 +1173,7 @@ SYSCALL_DEFINE0(setsid) write_unlock_irq(&tasklist_lock); if (err > 0) { proc_sid_connector(group_leader); - sched_autogroup_create_attach(group_leader); + } return err; } @@ -1710,6 +1719,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { struct task_struct *me = current; + struct task_struct *tsk; unsigned char comm[sizeof(me->comm)]; long error; @@ -1811,6 +1821,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_TIMERSLACK: error = current->timer_slack_ns; break; + case PR_GET_EFFECTIVE_TIMERSLACK: + error = task_get_effective_timer_slack(current); + break; case PR_SET_TIMERSLACK: if (arg2 <= 0) current->timer_slack_ns = @@ -1854,6 +1867,26 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, else error = PR_MCE_KILL_DEFAULT; break; + case PR_SET_TIMERSLACK_PID: + if (current->pid != (pid_t)arg3 && + !capable(CAP_SYS_NICE)) + return -EPERM; + rcu_read_lock(); + tsk = find_task_by_pid_ns((pid_t)arg3, &init_pid_ns); + if (tsk == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + get_task_struct(tsk); + rcu_read_unlock(); + if (arg2 <= 0) + tsk->timer_slack_ns = + tsk->default_timer_slack_ns; + else + tsk->timer_slack_ns = arg2; + put_task_struct(tsk); + error = 0; + break; default: error = -EINVAL; break; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fd15163f360..094db256922 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -375,11 +375,21 @@ static struct ctl_table kern_table[] = { .data = &sysctl_sched_autogroup_enabled, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec, .extra1 = &zero, .extra2 = &one, }, #endif +#ifdef CONFIG_CFS_BANDWIDTH + { + .procname = "sched_cfs_bandwidth_slice_us", + .data = &sysctl_sched_cfs_bandwidth_slice, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + }, +#endif #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d5097c44b40..793548cb5a9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -139,7 +139,6 @@ static void tick_nohz_update_jiffies(ktime_t now) struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long flags; - cpumask_clear_cpu(cpu, nohz_cpu_mask); ts->idle_waketime = now; local_irq_save(flags); @@ -159,9 +158,10 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda if (ts->idle_active) { delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); if (nr_iowait_cpu(cpu) > 0) ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); + else + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); ts->idle_entrytime = now; } @@ -197,11 +197,11 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) /** * get_cpu_idle_time_us - get the total idle time of a cpu * @cpu: CPU number to query - * @last_update_time: variable to store update time in + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. * * Return the cummulative idle time (since boot) for a given - * CPU, in microseconds. The idle time returned includes - * the iowait time (unlike what "top" and co report). + * CPU, in microseconds. * * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. @@ -211,20 +211,35 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, idle; if (!tick_nohz_enabled) return -1; - update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); + now = ktime_get(); + if (last_update_time) { + update_ts_time_stats(cpu, ts, now, last_update_time); + idle = ts->idle_sleeptime; + } else { + if (ts->idle_active && !nr_iowait_cpu(cpu)) { + ktime_t delta = ktime_sub(now, ts->idle_entrytime); + + idle = ktime_add(ts->idle_sleeptime, delta); + } else { + idle = ts->idle_sleeptime; + } + } + + return ktime_to_us(idle); - return ktime_to_us(ts->idle_sleeptime); } EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); -/* +/** * get_cpu_iowait_time_us - get the total iowait time of a cpu * @cpu: CPU number to query - * @last_update_time: variable to store update time in + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. * * Return the cummulative iowait time (since boot) for a given * CPU, in microseconds. @@ -237,13 +252,26 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, iowait; if (!tick_nohz_enabled) return -1; - update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); + now = ktime_get(); + if (last_update_time) { + update_ts_time_stats(cpu, ts, now, last_update_time); + iowait = ts->iowait_sleeptime; + } else { + if (ts->idle_active && nr_iowait_cpu(cpu) > 0) { + ktime_t delta = ktime_sub(now, ts->idle_entrytime); + + iowait = ktime_add(ts->iowait_sleeptime, delta); + } else { + iowait = ts->iowait_sleeptime; + } + } - return ktime_to_us(ts->iowait_sleeptime); + return ktime_to_us(iowait); } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); @@ -389,9 +417,6 @@ void tick_nohz_stop_sched_tick(int inidle) else expires.tv64 = KTIME_MAX; - if (delta_jiffies > 1) - cpumask_set_cpu(cpu, nohz_cpu_mask); - /* Skip reprogram of event if its not changed */ if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) goto out; @@ -405,6 +430,7 @@ void tick_nohz_stop_sched_tick(int inidle) */ if (!ts->tick_stopped) { select_nohz_load_balancer(1); + calc_load_enter_idle(); ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; @@ -441,7 +467,6 @@ void tick_nohz_stop_sched_tick(int inidle) * softirq. */ tick_do_update_jiffies64(ktime_get()); - cpumask_clear_cpu(cpu, nohz_cpu_mask); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: @@ -484,9 +509,9 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) hrtimer_get_expires(&ts->sched_timer), 0)) break; } - /* Update jiffies and reread time */ - tick_do_update_jiffies64(now); + /* Reread time and update jiffies */ now = ktime_get(); + tick_do_update_jiffies64(now); } } @@ -524,7 +549,7 @@ void tick_nohz_restart_sched_tick(void) /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); - cpumask_clear_cpu(cpu, nohz_cpu_mask); + update_cpu_load_nohz(); #ifndef CONFIG_VIRT_CPU_ACCOUNTING /* @@ -540,6 +565,7 @@ void tick_nohz_restart_sched_tick(void) account_idle_ticks(ticks); #endif + calc_load_exit_idle(); touch_softlockup_watchdog(); /* * Cancel the scheduled timer and restore the tick @@ -640,8 +666,6 @@ static void tick_nohz_switch_to_nohz(void) next = ktime_add(next, tick_period); } local_irq_enable(); - - printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); } /* @@ -793,10 +817,8 @@ void tick_setup_sched_timer(void) } #ifdef CONFIG_NO_HZ - if (tick_nohz_enabled) { + if (tick_nohz_enabled) ts->nohz_mode = NOHZ_MODE_HIGHRES; - printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); - } #endif } #endif /* HIGH_RES_TIMERS */ diff --git a/kernel/timer.c b/kernel/timer.c index 8cff36119e4..b7474f32e11 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -763,7 +763,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) if (mask == 0) return expires; - bit = find_last_bit(&mask, BITS_PER_LONG); + bit = __fls(mask); mask = (1 << bit) - 1; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1783aabc612..9948537be0d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -253,11 +253,13 @@ struct workqueue_struct *system_long_wq __read_mostly; struct workqueue_struct *system_nrt_wq __read_mostly; struct workqueue_struct *system_unbound_wq __read_mostly; struct workqueue_struct *system_freezable_wq __read_mostly; +struct workqueue_struct *system_nrt_freezable_wq __read_mostly; EXPORT_SYMBOL_GPL(system_wq); EXPORT_SYMBOL_GPL(system_long_wq); EXPORT_SYMBOL_GPL(system_nrt_wq); EXPORT_SYMBOL_GPL(system_unbound_wq); EXPORT_SYMBOL_GPL(system_freezable_wq); +EXPORT_SYMBOL_GPL(system_nrt_freezable_wq); #define CREATE_TRACE_POINTS #include @@ -1213,8 +1215,13 @@ static void worker_enter_idle(struct worker *worker) } else wake_up_all(&gcwq->trustee_wait); - /* sanity check nr_running */ - WARN_ON_ONCE(gcwq->nr_workers == gcwq->nr_idle && + /* + * Sanity check nr_running. Because trustee releases gcwq->lock + * between setting %WORKER_ROGUE and zapping nr_running, the + * warning may trigger spuriously. Check iff trustee is idle. + */ + WARN_ON_ONCE(gcwq->trustee_state == TRUSTEE_DONE && + gcwq->nr_workers == gcwq->nr_idle && atomic_read(get_gcwq_nr_running(gcwq->cpu))); } @@ -1719,10 +1726,9 @@ static void move_linked_works(struct work_struct *work, struct list_head *head, *nextp = n; } -static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) +static void cwq_activate_delayed_work(struct work_struct *work) { - struct work_struct *work = list_first_entry(&cwq->delayed_works, - struct work_struct, entry); + struct cpu_workqueue_struct *cwq = get_work_cwq(work); struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); trace_workqueue_activate_work(work); @@ -1731,6 +1737,14 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) cwq->nr_active++; } +static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) +{ + struct work_struct *work = list_first_entry(&cwq->delayed_works, + struct work_struct, entry); + + cwq_activate_delayed_work(work); +} + /** * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight * @cwq: cwq of interest @@ -1862,7 +1876,9 @@ __acquires(&gcwq->lock) spin_unlock_irq(&gcwq->lock); + smp_wmb(); /* paired with test_and_set_bit(PENDING) */ work_clear_pending(work); + lock_map_acquire_read(&cwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); trace_workqueue_execute_start(work); @@ -2619,6 +2635,18 @@ static int try_to_grab_pending(struct work_struct *work) smp_rmb(); if (gcwq == get_work_gcwq(work)) { debug_work_deactivate(work); + + /* + * A delayed work item cannot be grabbed directly + * because it might have linked NO_COLOR work items + * which, if left on the delayed_list, will confuse + * cwq->nr_active management later on and cause + * stall. Make sure the work item is activated + * before grabbing. + */ + if (*work_data_bits(work) & WORK_STRUCT_DELAYED) + cwq_activate_delayed_work(work); + list_del_init(&work->entry); cwq_dec_nr_in_flight(get_work_cwq(work), get_work_color(work), @@ -3430,14 +3458,17 @@ static int __cpuinit trustee_thread(void *__gcwq) for_each_busy_worker(worker, i, pos, gcwq) { struct work_struct *rebind_work = &worker->rebind_work; + unsigned long worker_flags = worker->flags; /* * Rebind_work may race with future cpu hotplug * operations. Use a separate flag to mark that - * rebinding is scheduled. + * rebinding is scheduled. The morphing should + * be atomic. */ - worker->flags |= WORKER_REBIND; - worker->flags &= ~WORKER_ROGUE; + worker_flags |= WORKER_REBIND; + worker_flags &= ~WORKER_ROGUE; + ACCESS_ONCE(worker->flags) = worker_flags; /* queue rebind_work, wq doesn't matter, use the default one */ if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, @@ -3579,21 +3610,55 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, return notifier_from_errno(0); } +/* + * Workqueues should be brought up before normal priority CPU notifiers. + * This will be registered high priority CPU notifier. + */ +static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + case CPU_UP_CANCELED: + case CPU_DOWN_FAILED: + case CPU_ONLINE: + return workqueue_cpu_callback(nfb, action, hcpu); + } + return NOTIFY_OK; +} + +/* + * Workqueues should be brought down after normal priority CPU notifiers. + * This will be registered as low priority CPU notifier. + */ +static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DOWN_PREPARE: + case CPU_DYING: + case CPU_POST_DEAD: + return workqueue_cpu_callback(nfb, action, hcpu); + } + return NOTIFY_OK; +} + #ifdef CONFIG_SMP struct work_for_cpu { - struct completion completion; + struct work_struct work; long (*fn)(void *); void *arg; long ret; }; -static int do_work_for_cpu(void *_wfc) +static void work_for_cpu_fn(struct work_struct *work) { - struct work_for_cpu *wfc = _wfc; + struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work); + wfc->ret = wfc->fn(wfc->arg); - complete(&wfc->completion); - return 0; } /** @@ -3608,19 +3673,11 @@ static int do_work_for_cpu(void *_wfc) */ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) { - struct task_struct *sub_thread; - struct work_for_cpu wfc = { - .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion), - .fn = fn, - .arg = arg, - }; + struct work_for_cpu wfc = { .fn = fn, .arg = arg }; - sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); - if (IS_ERR(sub_thread)) - return PTR_ERR(sub_thread); - kthread_bind(sub_thread, cpu); - wake_up_process(sub_thread); - wait_for_completion(&wfc.completion); + INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); + schedule_work_on(cpu, &wfc.work); + flush_work(&wfc.work); return wfc.ret; } EXPORT_SYMBOL_GPL(work_on_cpu); @@ -3772,7 +3829,8 @@ static int __init init_workqueues(void) unsigned int cpu; int i; - cpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE); + cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); + cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); /* initialize gcwqs */ for_each_gcwq_cpu(cpu) { @@ -3821,8 +3879,11 @@ static int __init init_workqueues(void) WQ_UNBOUND_MAX_ACTIVE); system_freezable_wq = alloc_workqueue("events_freezable", WQ_FREEZABLE, 0); + system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable", + WQ_NON_REENTRANT | WQ_FREEZABLE, 0); BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || - !system_unbound_wq || !system_freezable_wq); + !system_unbound_wq || !system_freezable_wq || + !system_nrt_freezable_wq); return 0; } early_initcall(init_workqueues); diff --git a/lib/Kconfig b/lib/Kconfig index 6c695ff9cab..b80c866dd5b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -106,6 +106,15 @@ config LZO_COMPRESS config LZO_DECOMPRESS tristate +config LZ4_COMPRESS + tristate + +config LZ4HC_COMPRESS + tristate + +config LZ4_DECOMPRESS + tristate + source "lib/xz/Kconfig" # @@ -130,6 +139,10 @@ config DECOMPRESS_LZO select LZO_DECOMPRESS tristate +config DECOMPRESS_LZ4 + select LZ4_DECOMPRESS + tristate + # # Generic allocator support is selected if needed # @@ -276,7 +289,4 @@ config CORDIC so its calculations are in fixed point. Modules can select this when they require this function. Module will be called cordic. -config LLIST - bool - endmenu diff --git a/lib/Makefile b/lib/Makefile index 3f5bc6d903e..4feda350a71 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o decompress.o + is_single_threaded.o plist.o decompress.o memcopy.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -22,7 +22,7 @@ lib-y += kobject.o kref.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \ - bsearch.o find_last_bit.o find_next_bit.o + bsearch.o find_last_bit.o find_next_bit.o llist.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o @@ -70,6 +70,9 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_LZ4_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ obj-$(CONFIG_XZ_DEC) += xz/ obj-$(CONFIG_RAID6_PQ) += raid6/ @@ -78,6 +81,7 @@ lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o +lib-$(CONFIG_DECOMPRESS_LZ4) += decompress_unlz4.o obj-$(CONFIG_TEXTSEARCH) += textsearch.o obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o @@ -115,8 +119,6 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o obj-$(CONFIG_CORDIC) += cordic.o -obj-$(CONFIG_LLIST) += llist.o - hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/decompress.c b/lib/decompress.c index 3d766b7f60a..fc3f2dda8c3 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,9 @@ #ifndef CONFIG_DECOMPRESS_LZO # define unlzo NULL #endif +#ifndef CONFIG_DECOMPRESS_LZ4 +# define unlz4 NULL +#endif static const struct compress_format { unsigned char magic[2]; @@ -42,6 +46,7 @@ static const struct compress_format { { {0x5d, 0x00}, "lzma", unlzma }, { {0xfd, 0x37}, "xz", unxz }, { {0x89, 0x4c}, "lzo", unlzo }, + { {0x02, 0x21}, "lz4", unlz4 }, { {0, 0}, NULL, NULL } }; diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c new file mode 100644 index 00000000000..84346c47c5c --- /dev/null +++ b/lib/decompress_unlz4.c @@ -0,0 +1,190 @@ +/* + * Wrapper for decompressing LZ4-compressed kernel, initramfs, and initrd + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifdef STATIC +#define PREBOOT +#include "lz4/lz4_decompress.c" +#else +#include +#endif +#include +#include +#include +#include + +#include + + +#define LZ4_CHUNK_SIZE (8<<20) +#define ARCHIVE_MAGICNUMBER 0x184C2102 + +STATIC inline int INIT unlz4(u8 *input, int in_len, + int (*fill) (void *, unsigned int), + int (*flush) (void *, unsigned int), + u8 *output, int *posp, + void (*error) (char *x)) +{ + int ret = -1; + size_t chunksize = 0; + u8 *inp; + u8 *inp_start; + u8 *outp; + int size = in_len; +#ifdef PREBOOT + size_t out_len = get_unaligned_le32(input + in_len); +#endif + size_t dest_len; + + + if (output) { + outp = output; + } else if (!flush) { + error("NULL output pointer and no flush function provided"); + goto exit_0; + } else { + outp = large_malloc(LZ4_CHUNK_SIZE); + if (!outp) { + error("Could not allocate output buffer"); + goto exit_0; + } + } + + if (input && fill) { + error("Both input pointer and fill function provided,"); + goto exit_1; + } else if (input) { + inp = input; + } else if (!fill) { + error("NULL input pointer and missing fill function"); + goto exit_1; + } else { + inp = large_malloc(LZ4_COMPRESSBOUND(LZ4_CHUNK_SIZE)); + if (!inp) { + error("Could not allocate input buffer"); + goto exit_1; + } + } + inp_start = inp; + + if (posp) + *posp = 0; + + if (fill) + fill(inp, 4); + + chunksize = get_unaligned_le32(inp); + if (chunksize == ARCHIVE_MAGICNUMBER) { + inp += 4; + size -= 4; + } else { + error("invalid header"); + goto exit_2; + } + + if (posp) + *posp += 4; + + for (;;) { + + if (fill) + fill(inp, 4); + + chunksize = get_unaligned_le32(inp); + if (chunksize == ARCHIVE_MAGICNUMBER) { + inp += 4; + size -= 4; + if (posp) + *posp += 4; + continue; + } + inp += 4; + size -= 4; + + if (posp) + *posp += 4; + + if (fill) { + if (chunksize > LZ4_COMPRESSBOUND(LZ4_CHUNK_SIZE)) { + error("chunk length is longer than allocated"); + goto exit_2; + } + fill(inp, chunksize); + } +#ifdef PREBOOT + if (out_len >= LZ4_CHUNK_SIZE) { + dest_len = LZ4_CHUNK_SIZE; + out_len -= dest_len; + } else + dest_len = out_len; + ret = lz4_decompress(inp, &chunksize, outp, dest_len); +#else + dest_len = LZ4_CHUNK_SIZE; + ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp, + &dest_len); +#endif + if (ret < 0) { + error("Decoding failed"); + goto exit_2; + } + + if (flush && flush(outp, dest_len) != dest_len) + goto exit_2; + if (output) + outp += dest_len; + if (posp) + *posp += chunksize; + + size -= chunksize; + + if (size == 0) + break; + else if (size < 0) { + error("data corrupted"); + goto exit_2; + } + + inp += chunksize; + if (fill) + inp = inp_start; + } + + ret = 0; +exit_2: + if (!input) + large_free(inp_start); +exit_1: + if (!output) + large_free(outp); +exit_0: + return ret; +} + +#ifdef PREBOOT +STATIC int INIT decompress(unsigned char *buf, int in_len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error)(char *x) + ) +{ + return unlz4(buf, in_len - 4, fill, flush, output, posp, error); +} +#endif diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c index 5a7a2adf4c4..26f89ad0330 100644 --- a/lib/decompress_unlzo.c +++ b/lib/decompress_unlzo.c @@ -31,7 +31,7 @@ */ #ifdef STATIC -#include "lzo/lzo1x_decompress.c" +#include "lzo/lzo1x_decompress_safe.c" #else #include #endif diff --git a/lib/llist.c b/lib/llist.c index da445724fa1..700cff77a38 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -3,8 +3,8 @@ * * The basic atomic operation of this list is cmpxchg on long. On * architectures that don't have NMI-safe cmpxchg implementation, the - * list can NOT be used in NMI handler. So code uses the list in NMI - * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. + * list can NOT be used in NMI handlers. So code that uses the list in + * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. * * Copyright 2010,2011 Intel Corp. * Author: Huang Ying @@ -29,49 +29,29 @@ #include -/** - * llist_add - add a new entry - * @new: new entry to be added - * @head: the head for your lock-less list - */ -void llist_add(struct llist_node *new, struct llist_head *head) -{ - struct llist_node *entry, *old_entry; - -#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - BUG_ON(in_nmi()); -#endif - - entry = head->first; - do { - old_entry = entry; - new->next = entry; - cpu_relax(); - } while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry); -} -EXPORT_SYMBOL_GPL(llist_add); - /** * llist_add_batch - add several linked entries in batch * @new_first: first entry in batch to be added * @new_last: last entry in batch to be added * @head: the head for your lock-less list + * + * Return whether list is empty before adding. */ -void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, +bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, struct llist_head *head) { struct llist_node *entry, *old_entry; -#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - BUG_ON(in_nmi()); -#endif - entry = head->first; - do { + for (;;) { old_entry = entry; new_last->next = entry; - cpu_relax(); - } while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry); + entry = cmpxchg(&head->first, old_entry, new_first); + if (entry == old_entry) + break; + } + + return old_entry == NULL; } EXPORT_SYMBOL_GPL(llist_add_batch); @@ -93,37 +73,17 @@ struct llist_node *llist_del_first(struct llist_head *head) { struct llist_node *entry, *old_entry, *next; -#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - BUG_ON(in_nmi()); -#endif - entry = head->first; - do { + for (;;) { if (entry == NULL) return NULL; old_entry = entry; next = entry->next; - cpu_relax(); - } while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry); + entry = cmpxchg(&head->first, old_entry, next); + if (entry == old_entry) + break; + } return entry; } EXPORT_SYMBOL_GPL(llist_del_first); - -/** - * llist_del_all - delete all entries from lock-less list - * @head: the head of lock-less list to delete all entries - * - * If list is empty, return NULL, otherwise, delete all entries and - * return the pointer to the first entry. The order of entries - * deleted is from the newest to the oldest added one. - */ -struct llist_node *llist_del_all(struct llist_head *head) -{ -#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - BUG_ON(in_nmi()); -#endif - - return xchg(&head->first, NULL); -} -EXPORT_SYMBOL_GPL(llist_del_all); diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile new file mode 100644 index 00000000000..8085d04e930 --- /dev/null +++ b/lib/lz4/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c new file mode 100644 index 00000000000..10762814137 --- /dev/null +++ b/lib/lz4/lz4_compress.c @@ -0,0 +1,443 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min + */ + +#include +#include +#include +#include +#include "lz4defs.h" + +/* + * LZ4_compressCtx : + * ----------------- + * Compress 'isize' bytes from 'source' into an output buffer 'dest' of + * maximum size 'maxOutputSize'. * If it cannot achieve it, compression + * will stop, and result of the function will be zero. + * return : the number of bytes written in buffer 'dest', or 0 if the + * compression fails + */ +static inline int lz4_compressctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + HTYPE *hashtable = (HTYPE *)ctx; + const u8 *ip = (u8 *)source; +#if LZ4_ARCH64 + const BYTE * const base = ip; +#else + const int base = 0; +#endif + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + ip++; + forwardh = LZ4_HASH_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (unlikely(forwardip > mflimit)) + goto _last_literals; + + forwardh = LZ4_HASH_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = ip - base; + } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) && + unlikely(ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + + if (length >= (int)RUN_MASK) { + int len; + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + /* Encode MatchLength */ + length = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend)) + return 0; + if (length >= (int)ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for (; length > 509 ; length -= 510) { + *op++ = 255; + *op++ = 255; + } + if (length > 254) { + length -= 255; + *op++ = 255; + } + *op++ = (u8)length; + } else + *token += length; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base; + + /* Test next position */ + ref = base + hashtable[LZ4_HASH_VALUE(ip)]; + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (((char *)op - dest) + lastrun + 1 + + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize) + return 0; + + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + + /* End */ + return (int)(((char *)op) - dest); +} + +static inline int lz4_compress64kctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + u16 *hashtable = (u16 *)ctx; + const u8 *ip = (u8 *) source; + const u8 *anchor = ip; + const u8 *const base = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int len, length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (forwardip > mflimit) + goto _last_literals; + + forwardh = LZ4_HASH64K_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = (u16)(ip - base); + } while (A32(ref) != A32(ip)); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) + && (ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* Check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); + +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + + while (ip < MATCHLIMIT - (STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + + /* Encode MatchLength */ + len = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) + return 0; + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *op++ = 255; + *op++ = 255; + } + if (len > 254) { + len -= 255; + *op++ = 255; + } + *op++ = (u8)len; + } else + *token += len; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base); + + /* Test next position */ + ref = base + hashtable[LZ4_HASH64K_VALUE(ip)]; + hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base); + if (A32(ref) == A32(ip)) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend) + return 0; + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int)(((char *)op) - dest); +} + +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + if (src_len < LZ4_64KLIMIT) + out_len = lz4_compress64kctx(wrkmem, src, dst, src_len, + LZ4_COMPRESSBOUND(src_len)); + else + out_len = lz4_compressctx(wrkmem, src, dst, src_len, + LZ4_COMPRESSBOUND(src_len)); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + + return 0; +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 compressor"); diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c new file mode 100644 index 00000000000..5e7f2dbcc53 --- /dev/null +++ b/lib/lz4/lz4_decompress.c @@ -0,0 +1,331 @@ +/* + * LZ4 Decompressor for Linux kernel + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * Based on LZ4 implementation by Yann Collet. + * + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + */ + +#ifndef STATIC +#include +#include +#endif +#include + +#include + +#include "lz4defs.h" + +static int lz4_uncompress(const char *source, char *dest, int osize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *ref; + + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + osize; + BYTE *cpy; + + unsigned token; + + size_t length; + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + while (1) { + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + size_t len; + + /* for (; (len = *ip++) == 255; length += 255){} */ + len = *ip++; + for (; len == 255; length += 255) + len = *ip++; + length += len; + } + + /* copy literals */ + cpy = op + length; + if (unlikely(cpy > oend - COPYLENGTH)) { + + /* + * Error: not enough place for another match + * (min 4) + 5 literals + */ + if (cpy != oend) + goto _output_error; + + memcpy(op, ip, length); + ip += length; + break; /* EOF */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + + /* Error: offset create reference outside destination buffer */ + if (unlikely(ref < (BYTE *const)dest)) + goto _output_error; + + /* get matchlength */ + length = token & ML_MASK; + if (length == ML_MASK) { + for (; *ip == 255; length += 255) + ip++; + length += *ip++; + } + + /* copy repeated sequence */ + if (unlikely((op - ref) < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op-ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE - 4); + if (cpy > oend - COPYLENGTH) { + + /* Error: request to write beyond destination buffer */ + if (cpy > oend) + goto _output_error; + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *)ip) - source); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *)ip) - source)); +} + +static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, + int isize, size_t maxoutputsize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *const iend = ip + isize; + const BYTE *ref; + + + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + maxoutputsize; + BYTE *cpy; + + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + /* Main Loop */ + while (ip < iend) { + + unsigned token; + size_t length; + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + int s = 255; + while ((ip < iend) && (s == 255)) { + s = *ip++; + length += s; + } + } + /* copy literals */ + cpy = op + length; + if ((cpy > oend - COPYLENGTH) || + (ip + length > iend - COPYLENGTH)) { + + if (cpy > oend) + goto _output_error;/* writes beyond buffer */ + + if (ip + length != iend) + goto _output_error;/* + * Error: LZ4 format requires + * to consume all input + * at this stage + */ + memcpy(op, ip, length); + op += length; + break;/* Necessarily EOF, due to parsing restrictions */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + if (ref < (BYTE * const)dest) + goto _output_error; + /* + * Error : offset creates reference + * outside of destination buffer + */ + + /* get matchlength */ + length = (token & ML_MASK); + if (length == ML_MASK) { + while (ip < iend) { + int s = *ip++; + length += s; + if (s == 255) + continue; + break; + } + } + + /* copy repeated sequence */ + if (unlikely(op - ref < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op - ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE-4); + if (cpy > oend - COPYLENGTH) { + if (cpy > oend) + goto _output_error; /* write outside of buf */ + + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *)op) - dest); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *)ip) - source)); +} + +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len) +{ + int ret = -1; + int input_len = 0; + + input_len = lz4_uncompress(src, dest, actual_dest_len); + if (input_len < 0) + goto exit_0; + *src_len = input_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress); +#endif + +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len) +{ + int ret = -1; + int out_len = 0; + + out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len, + *dest_len); + if (out_len < 0) + goto exit_0; + *dest_len = out_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 Decompressor"); +#endif diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h new file mode 100644 index 00000000000..1c87125c4e2 --- /dev/null +++ b/lib/lz4/lz4defs.h @@ -0,0 +1,155 @@ +/* + * lz4defs.h -- architecture specific defines + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Detects 64 bits mode + */ +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \ + || defined(__ppc64__) || defined(__LP64__)) +#define LZ4_ARCH64 1 +#else +#define LZ4_ARCH64 0 +#endif + +/* + * Architecture-specific macros + */ +#define BYTE u8 +typedef struct _U16_S { u16 v; } U16_S; +typedef struct _U32_S { u32 v; } U32_S; +typedef struct _U64_S { u64 v; } U64_S; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || defined(CONFIG_ARM) \ + && __LINUX_ARM_ARCH__ >= 6 \ + && defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + +#define A16(x) (((U16_S *)(x))->v) +#define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) + +#define PUT4(s, d) (A32(d) = A32(s)) +#define PUT8(s, d) (A64(d) = A64(s)) +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + A16(p) = v; \ + p += 2; \ + } while (0) +#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + +#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v)) +#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v)) +#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v)) + +#define PUT4(s, d) \ + put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) +#define PUT8(s, d) \ + put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) + +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + put_unaligned(v, (u16 *)(p)); \ + p += 2; \ + } while (0) +#endif + +#define COPYLENGTH 8 +#define ML_BITS 4 +#define ML_MASK ((1U << ML_BITS) - 1) +#define RUN_BITS (8 - ML_BITS) +#define RUN_MASK ((1U << RUN_BITS) - 1) +#define MEMORY_USAGE 14 +#define MINMATCH 4 +#define SKIPSTRENGTH 6 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH + MINMATCH) +#define MINLENGTH (MFLIMIT + 1) +#define MAXD_LOG 16 +#define MAXD (1 << MAXD_LOG) +#define MAXD_MASK (u32)(MAXD - 1) +#define MAX_DISTANCE (MAXD - 1) +#define HASH_LOG (MAXD_LOG - 1) +#define HASHTABLESIZE (1 << HASH_LOG) +#define MAX_NB_ATTEMPTS 256 +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) +#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1)) +#define HASHLOG64K ((MEMORY_USAGE - 2) + 1) +#define HASH64KTABLESIZE (1U << HASHLOG64K) +#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - (MEMORY_USAGE-2))) +#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASHLOG64K)) +#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASH_LOG)) + +#if LZ4_ARCH64/* 64-bit */ +#define STEPSIZE 8 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT8(s, d); \ + d += 8; \ + s += 8; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) + +#define LZ4_SECURECOPY(s, d, e) \ + do { \ + if (d < e) { \ + LZ4_WILDCOPY(s, d, e); \ + } \ + } while (0) +#define HTYPE u32 + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3) +#endif + +#else /* 32-bit */ +#define STEPSIZE 4 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT4(s, d); \ + d += 4; \ + s += 4; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) \ + do { \ + LZ4_COPYSTEP(s, d); \ + LZ4_COPYSTEP(s, d); \ + } while (0) + +#define LZ4_SECURECOPY LZ4_WILDCOPY +#define HTYPE const u8* + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3) +#endif + +#endif + +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - get_unaligned_le16(p)) +#define LZ4_WILDCOPY(s, d, e) \ + do { \ + LZ4_COPYPACKET(s, d); \ + } while (d < e) + +#define LZ4_BLINDCOPY(s, d, l) \ + do { \ + u8 *e = (d) + l; \ + LZ4_WILDCOPY(s, d, e); \ + d = e; \ + } while (0) diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c new file mode 100644 index 00000000000..a9a9c2a00c5 --- /dev/null +++ b/lib/lz4/lz4hc_compress.c @@ -0,0 +1,539 @@ +/* + * LZ4 HC - High Compression Mode of LZ4 + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min + */ + +#include +#include +#include +#include +#include "lz4defs.h" + +struct lz4hc_data { + const u8 *base; + HTYPE hashtable[HASHTABLESIZE]; + u16 chaintable[MAXD]; + const u8 *nexttoupdate; +} __attribute__((__packed__)); + +static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base) +{ + memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable)); + memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable)); + +#if LZ4_ARCH64 + hc4->nexttoupdate = base + 1; +#else + hc4->nexttoupdate = base; +#endif + hc4->base = base; + return 1; +} + +/* Update chains up to ip (excluded) */ +static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip) +{ + u16 *chaintable = hc4->chaintable; + HTYPE *hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + + while (hc4->nexttoupdate < ip) { + const u8 *p = hc4->nexttoupdate; + size_t delta = p - (hashtable[HASH_VALUE(p)] + base); + if (delta > MAX_DISTANCE) + delta = MAX_DISTANCE; + chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta; + hashtable[HASH_VALUE(p)] = (p) - base; + hc4->nexttoupdate++; + } +} + +static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2, + const u8 *const matchlimit) +{ + const u8 *p1t = p1; + + while (p1t < matchlimit - (STEPSIZE - 1)) { +#if LZ4_ARCH64 + u64 diff = A64(p2) ^ A64(p1t); +#else + u32 diff = A32(p2) ^ A32(p1t); +#endif + if (!diff) { + p1t += STEPSIZE; + p2 += STEPSIZE; + continue; + } + p1t += LZ4_NBCOMMONBYTES(diff); + return p1t - p1; + } +#if LZ4_ARCH64 + if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) { + p1t += 4; + p2 += 4; + } +#endif + + if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) { + p1t += 2; + p2 += 2; + } + if ((p1t < matchlimit) && (*p2 == *p1t)) + p1t++; + return p1t - p1; +} + +static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *const matchlimit, const u8 **matchpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; + const u8 *ref; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + int nbattempts = MAX_NB_ATTEMPTS; + size_t repl = 0, ml = 0; + u16 delta; + + /* HC4 match finder */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + /* potential repetition */ + if (ref >= ip-4) { + /* confirmed */ + if (A32(ref) == A32(ip)) { + delta = (u16)(ip-ref); + repl = ml = lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + *matchpos = ref; + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + while ((ref >= ip - MAX_DISTANCE) && nbattempts) { + nbattempts--; + if (*(ref + ml) == *(ip + ml)) { + if (A32(ref) == A32(ip)) { + size_t mlt = + lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + if (mlt > ml) { + ml = mlt; + *matchpos = ref; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + /* Complete table */ + if (repl) { + const BYTE *ptr = ip; + const BYTE *end; + end = ip + repl - (MINMATCH-1); + /* Pre-Load */ + while (ptr < end - delta) { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + ptr++; + } + do { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + /* Head of chain */ + hashtable[HASH_VALUE(ptr)] = (ptr) - base; + ptr++; + } while (ptr < end); + hc4->nexttoupdate = end; + } + + return (int)ml; +} + +static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest, + const u8 **matchpos, const u8 **startpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + const u8 *ref; + int nbattempts = MAX_NB_ATTEMPTS; + int delta = (int)(ip - startlimit); + + /* First Match */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base) + && (nbattempts)) { + nbattempts--; + if (*(startlimit + longest) == *(ref - delta + longest)) { + if (A32(ref) == A32(ip)) { + const u8 *reft = ref + MINMATCH; + const u8 *ipt = ip + MINMATCH; + const u8 *startt = ip; + + while (ipt < matchlimit-(STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(reft) ^ A64(ipt); + #else + u32 diff = A32(reft) ^ A32(ipt); + #endif + + if (!diff) { + ipt += STEPSIZE; + reft += STEPSIZE; + continue; + } + ipt += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ipt < (matchlimit - 3)) + && (A32(reft) == A32(ipt))) { + ipt += 4; + reft += 4; + } + ipt += 2; + #endif + if ((ipt < (matchlimit - 1)) + && (A16(reft) == A16(ipt))) { + reft += 2; + } + if ((ipt < matchlimit) && (*reft == *ipt)) + ipt++; +_endcount: + reft = ref; + + while ((startt > startlimit) + && (reft > hc4->base) + && (startt[-1] == reft[-1])) { + startt--; + reft--; + } + + if ((ipt - startt) > longest) { + longest = (int)(ipt - startt); + *matchpos = reft; + *startpos = startt; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + return longest; +} + +static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor, + int ml, const u8 *ref) +{ + int length, len; + u8 *token; + + /* Encode Literal length */ + length = (int)(*ip - *anchor); + token = (*op)++; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *(*op)++ = 255; + *(*op)++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(*anchor, *op, length); + + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref)); + + /* Encode MatchLength */ + len = (int)(ml - MINMATCH); + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *(*op)++ = 255; + *(*op)++ = 255; + } + if (len > 254) { + len -= 255; + *(*op)++ = 255; + } + *(*op)++ = (u8)len; + } else + *token += len; + + /* Prepare next loop */ + *ip += ml; + *anchor = *ip; + + return 0; +} + +int lz4_compresshcctx(struct lz4hc_data *ctx, + const char *source, + char *dest, + int isize) +{ + const u8 *ip = (const u8 *)source; + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + const u8 *const matchlimit = (iend - LASTLITERALS); + + u8 *op = (u8 *)dest; + + int ml, ml2, ml3, ml0; + const u8 *ref = NULL; + const u8 *start2 = NULL; + const u8 *ref2 = NULL; + const u8 *start3 = NULL; + const u8 *ref3 = NULL; + const u8 *start0; + const u8 *ref0; + int lastrun; + + ip++; + + /* Main Loop */ + while (ip < mflimit) { + ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref)); + if (!ml) { + ip++; + continue; + } + + /* saved, in case we would skip too much */ + start0 = ip; + ref0 = ref; + ml0 = ml; +_search2: + if (ip+ml < mflimit) + ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2, + ip + 1, matchlimit, ml, &ref2, &start2); + else + ml2 = ml; + /* No better match */ + if (ml2 == ml) { + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + continue; + } + + if (start0 < ip) { + /* empirical */ + if (start2 < ip + ml0) { + ip = start0; + ref = ref0; + ml = ml0; + } + } + /* + * Here, start0==ip + * First Match too small : removed + */ + if ((start2 - ip) < 3) { + ml = ml2; + ip = start2; + ref = ref2; + goto _search2; + } + +_search3: + /* + * Currently we have : + * ml2 > ml1, and + * ip1+3 <= ip2 (usually < ip1+ml1) + */ + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) + new_ml = OPTIMAL_ML; + if (ip + new_ml > start2 + ml2 - MINMATCH) + new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + /* + * Now, we have start2 = ip+new_ml, + * with new_ml=min(ml, OPTIMAL_ML=18) + */ + if (start2 + ml2 < mflimit) + ml3 = lz4hc_insertandgetwidermatch(ctx, + start2 + ml2 - 3, start2, matchlimit, + ml2, &ref3, &start3); + else + ml3 = ml2; + + /* No better match : 2 sequences to encode */ + if (ml3 == ml2) { + /* ip & ref are known; Now for ml */ + if (start2 < ip+ml) + ml = (int)(start2 - ip); + + /* Now, encode 2 sequences */ + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start2; + lz4_encodesequence(&ip, &op, &anchor, ml2, ref2); + continue; + } + + /* Not enough space for match 2 : remove it */ + if (start3 < ip + ml + 3) { + /* + * can write Seq1 immediately ==> Seq2 is removed, + * so Seq3 becomes Seq1 + */ + if (start3 >= (ip + ml)) { + if (start2 < ip + ml) { + int correction = + (int)(ip + ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _search3; + } + + /* + * OK, now we have 3 ascending matches; let's write at least + * the first one ip & ref are known; Now for ml + */ + if (start2 < ip + ml) { + if ((start2 - ip) < (int)ML_MASK) { + int correction; + if (ml > OPTIMAL_ML) + ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) + ml = (int)(start2 - ip) + ml2 + - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } else + ml = (int)(start2 - ip); + } + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + + ip = start2; + ref = ref2; + ml = ml2; + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + + goto _search3; + } + + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8) lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int) (((char *)op) - dest); +} + +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem; + lz4hc_init(hc4, (const u8 *)src); + out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src, + (char *)dst, (int)src_len); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + return 0; + +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4hc_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4HC compressor"); diff --git a/lib/lzo/Makefile b/lib/lzo/Makefile index e764116ea12..f0f7d7ca2b8 100644 --- a/lib/lzo/Makefile +++ b/lib/lzo/Makefile @@ -1,5 +1,5 @@ lzo_compress-objs := lzo1x_compress.o -lzo_decompress-objs := lzo1x_decompress.o +lzo_decompress-objs := lzo1x_decompress_safe.o obj-$(CONFIG_LZO_COMPRESS) += lzo_compress.o obj-$(CONFIG_LZO_DECOMPRESS) += lzo_decompress.o diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index a6040990a62..d42efe514aa 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -1,194 +1,217 @@ /* - * LZO1X Compressor from MiniLZO + * LZO1X Compressor from LZO * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ #include #include -#include #include +#include #include "lzodefs.h" static noinline size_t -_lzo1x_1_do_compress(const unsigned char *in, size_t in_len, - unsigned char *out, size_t *out_len, void *wrkmem) +lzo1x_1_do_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + size_t ti, void *wrkmem) { + const unsigned char *ip; + unsigned char *op; const unsigned char * const in_end = in + in_len; - const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; - const unsigned char ** const dict = wrkmem; - const unsigned char *ip = in, *ii = ip; - const unsigned char *end, *m, *m_pos; - size_t m_off, m_len, dindex; - unsigned char *op = out; + const unsigned char * const ip_end = in + in_len - 20; + const unsigned char *ii; + lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; - ip += 4; + op = out; + ip = in; + ii = ip; + ip += ti < 4 ? 4 - ti : 0; for (;;) { - dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK; - m_pos = dict[dindex]; - - if (m_pos < in) - goto literal; - - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) - goto literal; - - m_off = ip - m_pos; - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) - goto try_match; - - dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f); - m_pos = dict[dindex]; - - if (m_pos < in) - goto literal; - - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) - goto literal; - - m_off = ip - m_pos; - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) - goto try_match; - - goto literal; - -try_match: - if (get_unaligned((const unsigned short *)m_pos) - == get_unaligned((const unsigned short *)ip)) { - if (likely(m_pos[2] == ip[2])) - goto match; - } - + const unsigned char *m_pos; + size_t t, m_len, m_off; + u32 dv; literal: - dict[dindex] = ip; - ++ip; + ip += 1 + ((ip - ii) >> 5); +next: if (unlikely(ip >= ip_end)) break; - continue; - -match: - dict[dindex] = ip; - if (ip != ii) { - size_t t = ip - ii; + dv = get_unaligned_le32(ip); + t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; + m_pos = in + dict[t]; + dict[t] = (lzo_dict_t) (ip - in); + if (unlikely(dv != get_unaligned_le32(m_pos))) + goto literal; + ii -= ti; + ti = 0; + t = ip - ii; + if (t != 0) { if (t <= 3) { op[-2] |= t; - } else if (t <= 18) { + COPY4(op, ii); + op += t; + } else if (t <= 16) { *op++ = (t - 3); + COPY8(op, ii); + COPY8(op + 8, ii + 8); + op += t; } else { - size_t tt = t - 18; - - *op++ = 0; - while (tt > 255) { - tt -= 255; + if (t <= 18) { + *op++ = (t - 3); + } else { + size_t tt = t - 18; *op++ = 0; + while (unlikely(tt > 255)) { + tt -= 255; + *op++ = 0; + } + *op++ = tt; } - *op++ = tt; + do { + COPY8(op, ii); + COPY8(op + 8, ii + 8); + op += 16; + ii += 16; + t -= 16; + } while (t >= 16); + if (t > 0) do { + *op++ = *ii++; + } while (--t > 0); } - do { - *op++ = *ii++; - } while (--t > 0); } - ip += 3; - if (m_pos[3] != *ip++ || m_pos[4] != *ip++ - || m_pos[5] != *ip++ || m_pos[6] != *ip++ - || m_pos[7] != *ip++ || m_pos[8] != *ip++) { - --ip; - m_len = ip - ii; + m_len = 4; + { +#if defined(LZO_USE_CTZ64) + u64 v; + v = get_unaligned((const u64 *) (ip + m_len)) ^ + get_unaligned((const u64 *) (m_pos + m_len)); + if (unlikely(v == 0)) { + do { + m_len += 8; + v = get_unaligned((const u64 *) (ip + m_len)) ^ + get_unaligned((const u64 *) (m_pos + m_len)); + if (unlikely(ip + m_len >= ip_end)) + goto m_len_done; + } while (v == 0); + } +# if defined(__LITTLE_ENDIAN) + m_len += (unsigned) __builtin_ctzll(v) / 8; +# elif defined(__BIG_ENDIAN) + m_len += (unsigned) __builtin_clzll(v) / 8; +# else +# error "missing endian definition" +# endif +#elif defined(LZO_USE_CTZ32) + u32 v; + v = get_unaligned((const u32 *) (ip + m_len)) ^ + get_unaligned((const u32 *) (m_pos + m_len)); + if (unlikely(v == 0)) { + do { + m_len += 4; + v = get_unaligned((const u32 *) (ip + m_len)) ^ + get_unaligned((const u32 *) (m_pos + m_len)); + if (unlikely(ip + m_len >= ip_end)) + goto m_len_done; + } while (v == 0); + } +# if defined(__LITTLE_ENDIAN) + m_len += (unsigned) __builtin_ctz(v) / 8; +# elif defined(__BIG_ENDIAN) + m_len += (unsigned) __builtin_clz(v) / 8; +# else +# error "missing endian definition" +# endif +#else + if (unlikely(ip[m_len] == m_pos[m_len])) { + do { + m_len += 1; + if (unlikely(ip + m_len >= ip_end)) + goto m_len_done; + } while (ip[m_len] == m_pos[m_len]); + } +#endif + } +m_len_done: - if (m_off <= M2_MAX_OFFSET) { - m_off -= 1; - *op++ = (((m_len - 1) << 5) - | ((m_off & 7) << 2)); - *op++ = (m_off >> 3); - } else if (m_off <= M3_MAX_OFFSET) { - m_off -= 1; + m_off = ip - m_pos; + ip += m_len; + ii = ip; + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { + m_off -= 1; + *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); + *op++ = (m_off >> 3); + } else if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + if (m_len <= M3_MAX_LEN) *op++ = (M3_MARKER | (m_len - 2)); - goto m3_m4_offset; - } else { - m_off -= 0x4000; - - *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11) - | (m_len - 2)); - goto m3_m4_offset; + else { + m_len -= M3_MAX_LEN; + *op++ = M3_MARKER | 0; + while (unlikely(m_len > 255)) { + m_len -= 255; + *op++ = 0; + } + *op++ = (m_len); } + *op++ = (m_off << 2); + *op++ = (m_off >> 6); } else { - end = in_end; - m = m_pos + M2_MAX_LEN + 1; - - while (ip < end && *m == *ip) { - m++; - ip++; - } - m_len = ip - ii; - - if (m_off <= M3_MAX_OFFSET) { - m_off -= 1; - if (m_len <= 33) { - *op++ = (M3_MARKER | (m_len - 2)); - } else { - m_len -= 33; - *op++ = M3_MARKER | 0; - goto m3_m4_len; - } - } else { - m_off -= 0x4000; - if (m_len <= M4_MAX_LEN) { - *op++ = (M4_MARKER - | ((m_off & 0x4000) >> 11) + m_off -= 0x4000; + if (m_len <= M4_MAX_LEN) + *op++ = (M4_MARKER | ((m_off >> 11) & 8) | (m_len - 2)); - } else { - m_len -= M4_MAX_LEN; - *op++ = (M4_MARKER - | ((m_off & 0x4000) >> 11)); -m3_m4_len: - while (m_len > 255) { - m_len -= 255; - *op++ = 0; - } - - *op++ = (m_len); + else { + m_len -= M4_MAX_LEN; + *op++ = (M4_MARKER | ((m_off >> 11) & 8)); + while (unlikely(m_len > 255)) { + m_len -= 255; + *op++ = 0; } + *op++ = (m_len); } -m3_m4_offset: - *op++ = ((m_off & 63) << 2); + *op++ = (m_off << 2); *op++ = (m_off >> 6); } - - ii = ip; - if (unlikely(ip >= ip_end)) - break; + goto next; } - *out_len = op - out; - return in_end - ii; + return in_end - (ii - ti); } -int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, - size_t *out_len, void *wrkmem) +int lzo1x_1_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + void *wrkmem) { - const unsigned char *ii; + const unsigned char *ip = in; unsigned char *op = out; - size_t t; + size_t l = in_len; + size_t t = 0; - if (unlikely(in_len <= M2_MAX_LEN + 5)) { - t = in_len; - } else { - t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem); + while (l > 20) { + size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); + uintptr_t ll_end = (uintptr_t) ip + ll; + if ((ll_end + ((t + ll) >> 5)) <= ll_end) + break; + BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); + memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); + t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); + ip += ll; op += *out_len; + l -= ll; } + t += l; if (t > 0) { - ii = in + in_len - t; + const unsigned char *ii = in + in_len - t; if (op == out && t <= 238) { *op++ = (17 + t); @@ -198,16 +221,21 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, *op++ = (t - 3); } else { size_t tt = t - 18; - *op++ = 0; while (tt > 255) { tt -= 255; *op++ = 0; } - *op++ = tt; } - do { + if (t >= 16) do { + COPY8(op, ii); + COPY8(op + 8, ii + 8); + op += 16; + ii += 16; + t -= 16; + } while (t >= 16); + if (t > 0) do { *op++ = *ii++; } while (--t > 0); } @@ -223,4 +251,3 @@ EXPORT_SYMBOL_GPL(lzo1x_1_compress); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO1X-1 Compressor"); - diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c deleted file mode 100644 index f2fd0985022..00000000000 --- a/lib/lzo/lzo1x_decompress.c +++ /dev/null @@ -1,255 +0,0 @@ -/* - * LZO1X Decompressor from MiniLZO - * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer - * - * The full LZO package can be found at: - * http://www.oberhumer.com/opensource/lzo/ - * - * Changed for kernel use by: - * Nitin Gupta - * Richard Purdie - */ - -#ifndef STATIC -#include -#include -#endif - -#include -#include -#include "lzodefs.h" - -#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) -#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x)) -#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op) - -#define COPY4(dst, src) \ - put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) - -int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, - unsigned char *out, size_t *out_len) -{ - const unsigned char * const ip_end = in + in_len; - unsigned char * const op_end = out + *out_len; - const unsigned char *ip = in, *m_pos; - unsigned char *op = out; - size_t t; - - *out_len = 0; - - if (*ip > 17) { - t = *ip++ - 17; - if (t < 4) - goto match_next; - if (HAVE_OP(t, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 1, ip_end, ip)) - goto input_overrun; - do { - *op++ = *ip++; - } while (--t > 0); - goto first_literal_run; - } - - while ((ip < ip_end)) { - t = *ip++; - if (t >= 16) - goto match; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { - t += 255; - ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - } - t += 15 + *ip++; - } - if (HAVE_OP(t + 3, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 4, ip_end, ip)) - goto input_overrun; - - COPY4(op, ip); - op += 4; - ip += 4; - if (--t > 0) { - if (t >= 4) { - do { - COPY4(op, ip); - op += 4; - ip += 4; - t -= 4; - } while (t >= 4); - if (t > 0) { - do { - *op++ = *ip++; - } while (--t > 0); - } - } else { - do { - *op++ = *ip++; - } while (--t > 0); - } - } - -first_literal_run: - t = *ip++; - if (t >= 16) - goto match; - m_pos = op - (1 + M2_MAX_OFFSET); - m_pos -= t >> 2; - m_pos -= *ip++ << 2; - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - - if (HAVE_OP(3, op_end, op)) - goto output_overrun; - *op++ = *m_pos++; - *op++ = *m_pos++; - *op++ = *m_pos; - - goto match_done; - - do { -match: - if (t >= 64) { - m_pos = op - 1; - m_pos -= (t >> 2) & 7; - m_pos -= *ip++ << 3; - t = (t >> 5) - 1; - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(t + 3 - 1, op_end, op)) - goto output_overrun; - goto copy_match; - } else if (t >= 32) { - t &= 31; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { - t += 255; - ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - } - t += 31 + *ip++; - } - m_pos = op - 1; - m_pos -= get_unaligned_le16(ip) >> 2; - ip += 2; - } else if (t >= 16) { - m_pos = op; - m_pos -= (t & 8) << 11; - - t &= 7; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { - t += 255; - ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - } - t += 7 + *ip++; - } - m_pos -= get_unaligned_le16(ip) >> 2; - ip += 2; - if (m_pos == op) - goto eof_found; - m_pos -= 0x4000; - } else { - m_pos = op - 1; - m_pos -= t >> 2; - m_pos -= *ip++ << 2; - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(2, op_end, op)) - goto output_overrun; - - *op++ = *m_pos++; - *op++ = *m_pos; - goto match_done; - } - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(t + 3 - 1, op_end, op)) - goto output_overrun; - - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { - COPY4(op, m_pos); - op += 4; - m_pos += 4; - t -= 4 - (3 - 1); - do { - COPY4(op, m_pos); - op += 4; - m_pos += 4; - t -= 4; - } while (t >= 4); - if (t > 0) - do { - *op++ = *m_pos++; - } while (--t > 0); - } else { -copy_match: - *op++ = *m_pos++; - *op++ = *m_pos++; - do { - *op++ = *m_pos++; - } while (--t > 0); - } -match_done: - t = ip[-2] & 3; - if (t == 0) - break; -match_next: - if (HAVE_OP(t, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 1, ip_end, ip)) - goto input_overrun; - - *op++ = *ip++; - if (t > 1) { - *op++ = *ip++; - if (t > 2) - *op++ = *ip++; - } - - t = *ip++; - } while (ip < ip_end); - } - - *out_len = op - out; - return LZO_E_EOF_NOT_FOUND; - -eof_found: - *out_len = op - out; - return (ip == ip_end ? LZO_E_OK : - (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); -input_overrun: - *out_len = op - out; - return LZO_E_INPUT_OVERRUN; - -output_overrun: - *out_len = op - out; - return LZO_E_OUTPUT_OVERRUN; - -lookbehind_overrun: - *out_len = op - out; - return LZO_E_LOOKBEHIND_OVERRUN; -} -#ifndef STATIC -EXPORT_SYMBOL_GPL(lzo1x_decompress_safe); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("LZO1X Decompressor"); - -#endif diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c new file mode 100644 index 00000000000..0dba30ce1c7 --- /dev/null +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -0,0 +1,228 @@ +/* + * LZO1X Decompressor from LZO + * + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer + * + * The full LZO package can be found at: + * http://www.oberhumer.com/opensource/lzo/ + * + * Changed for Linux kernel use by: + * Nitin Gupta + * Richard Purdie + */ + +#ifndef STATIC +#include +#include +#endif +#include +#include +#include "lzodefs.h" + +#define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x)) +#define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x)) +#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun +#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun +#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun + +int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len) +{ + unsigned char *op; + const unsigned char *ip; + size_t t, next; + size_t state = 0; + const unsigned char *m_pos; + const unsigned char * const ip_end = in + in_len; + unsigned char * const op_end = out + *out_len; + + op = out; + ip = in; + + if (unlikely(in_len < 3)) + goto input_overrun; + if (*ip > 17) { + t = *ip++ - 17; + if (t < 4) { + next = t; + goto match_next; + } + goto copy_literal_run; + } + + for (;;) { + t = *ip++; + if (t < 16) { + if (likely(state == 0)) { + if (unlikely(t == 0)) { + while (unlikely(*ip == 0)) { + t += 255; + ip++; + NEED_IP(1); + } + t += 15 + *ip++; + } + t += 3; +copy_literal_run: + if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) { + const unsigned char *ie = ip + t; + unsigned char *oe = op + t; + do { + COPY8(op, ip); + op += 8; + ip += 8; + COPY8(op, ip); + op += 8; + ip += 8; + } while (ip < ie); + ip = ie; + op = oe; + } else { + NEED_OP(t); + NEED_IP(t + 3); + do { + *op++ = *ip++; + } while (--t > 0); + } + state = 4; + continue; + } else if (state != 4) { + next = t & 3; + m_pos = op - 1; + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + TEST_LB(m_pos); + NEED_OP(2); + op[0] = m_pos[0]; + op[1] = m_pos[1]; + op += 2; + goto match_next; + } else { + next = t & 3; + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + t = 3; + } + } else if (t >= 64) { + next = t & 3; + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1 + (3 - 1); + } else if (t >= 32) { + t = (t & 31) + (3 - 1); + if (unlikely(t == 2)) { + while (unlikely(*ip == 0)) { + t += 255; + ip++; + NEED_IP(1); + } + t += 31 + *ip++; + NEED_IP(2); + } + m_pos = op - 1; + next = get_unaligned_le16(ip); + ip += 2; + m_pos -= next >> 2; + next &= 3; + } else { + m_pos = op; + m_pos -= (t & 8) << 11; + t = (t & 7) + (3 - 1); + if (unlikely(t == 2)) { + while (unlikely(*ip == 0)) { + t += 255; + ip++; + NEED_IP(1); + } + t += 7 + *ip++; + NEED_IP(2); + } + next = get_unaligned_le16(ip); + ip += 2; + m_pos -= next >> 2; + next &= 3; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; + } + TEST_LB(m_pos); + if (op - m_pos >= 8) { + unsigned char *oe = op + t; + if (likely(HAVE_OP(t + 15))) { + do { + COPY8(op, m_pos); + op += 8; + m_pos += 8; + COPY8(op, m_pos); + op += 8; + m_pos += 8; + } while (op < oe); + op = oe; + if (HAVE_IP(6)) { + state = next; + COPY4(op, ip); + op += next; + ip += next; + continue; + } + } else { + NEED_OP(t); + do { + *op++ = *m_pos++; + } while (op < oe); + } + } else { + unsigned char *oe = op + t; + NEED_OP(t); + op[0] = m_pos[0]; + op[1] = m_pos[1]; + op += 2; + m_pos += 2; + do { + *op++ = *m_pos++; + } while (op < oe); + } +match_next: + state = next; + t = next; + if (likely(HAVE_IP(6) && HAVE_OP(4))) { + COPY4(op, ip); + op += t; + ip += t; + } else { + NEED_IP(t + 3); + NEED_OP(t); + while (t > 0) { + *op++ = *ip++; + t--; + } + } + } + +eof_found: + *out_len = op - out; + return (t != 3 ? LZO_E_ERROR : + ip == ip_end ? LZO_E_OK : + ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN); + +input_overrun: + *out_len = op - out; + return LZO_E_INPUT_OVERRUN; + +output_overrun: + *out_len = op - out; + return LZO_E_OUTPUT_OVERRUN; + +lookbehind_overrun: + *out_len = op - out; + return LZO_E_LOOKBEHIND_OVERRUN; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lzo1x_decompress_safe); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZO1X Decompressor"); + +#endif diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index b6d482c492e..ddc8db510d8 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -1,19 +1,37 @@ /* * lzodefs.h -- architecture, OS and compiler specific defines * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ -#define LZO_VERSION 0x2020 -#define LZO_VERSION_STRING "2.02" -#define LZO_VERSION_DATE "Oct 17 2005" + +#define COPY4(dst, src) \ + put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) +#if defined(__x86_64__) +#define COPY8(dst, src) \ + put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) +#else +#define COPY8(dst, src) \ + COPY4(dst, src); COPY4((dst) + 4, (src) + 4) +#endif + +#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) +#error "conflicting endian definitions" +#elif defined(__x86_64__) +#define LZO_USE_CTZ64 1 +#define LZO_USE_CTZ32 1 +#elif defined(__i386__) || defined(__powerpc__) +#define LZO_USE_CTZ32 1 +#else +#define LZO_USE_CTZ32 1 +#endif #define M1_MAX_OFFSET 0x0400 #define M2_MAX_OFFSET 0x0800 @@ -34,8 +52,10 @@ #define M3_MARKER 32 #define M4_MARKER 16 -#define D_BITS 14 -#define D_MASK ((1u << D_BITS) - 1) +#define lzo_dict_t unsigned short +#define D_BITS 13 +#define D_SIZE (1u << D_BITS) +#define D_MASK (D_SIZE - 1) #define D_HIGH ((D_MASK >> 1) + 1) #define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \ diff --git a/lib/memcopy.c b/lib/memcopy.c new file mode 100644 index 00000000000..70fb6b2da1c --- /dev/null +++ b/lib/memcopy.c @@ -0,0 +1,403 @@ +/* + * memcopy.c -- subroutines for memory copy functions. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * The code is derived from the GNU C Library. + * Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc. + */ + +/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ + +#include + +/* + * _wordcopy_fwd_aligned -- Copy block beginning at SRCP to block beginning + * at DSTP with LEN `op_t' words (not LEN bytes!). + * Both SRCP and DSTP should be aligned for memory operations on `op_t's. + */ +void _wordcopy_fwd_aligned (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + switch (len % 8) { + case 2: + a0 = ((op_t *) srcp)[0]; + srcp -= 6 * OPSIZ; + dstp -= 7 * OPSIZ; + len += 6; + goto do1; + case 3: + a1 = ((op_t *) srcp)[0]; + srcp -= 5 * OPSIZ; + dstp -= 6 * OPSIZ; + len += 5; + goto do2; + case 4: + a0 = ((op_t *) srcp)[0]; + srcp -= 4 * OPSIZ; + dstp -= 5 * OPSIZ; + len += 4; + goto do3; + case 5: + a1 = ((op_t *) srcp)[0]; + srcp -= 3 * OPSIZ; + dstp -= 4 * OPSIZ; + len += 3; + goto do4; + case 6: + a0 = ((op_t *) srcp)[0]; + srcp -= 2 * OPSIZ; + dstp -= 3 * OPSIZ; + len += 2; + goto do5; + case 7: + a1 = ((op_t *) srcp)[0]; + srcp -= 1 * OPSIZ; + dstp -= 2 * OPSIZ; + len += 1; + goto do6; + case 0: + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + return; + a0 = ((op_t *) srcp)[0]; + srcp -= 0 * OPSIZ; + dstp -= 1 * OPSIZ; + goto do7; + case 1: + a1 = ((op_t *) srcp)[0]; + srcp -=-1 * OPSIZ; + dstp -= 0 * OPSIZ; + len -= 1; + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + goto do0; + goto do8; /* No-op. */ + } + + do { +do8: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a1; +do7: + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[1] = a0; +do6: + a0 = ((op_t *) srcp)[2]; + ((op_t *) dstp)[2] = a1; +do5: + a1 = ((op_t *) srcp)[3]; + ((op_t *) dstp)[3] = a0; +do4: + a0 = ((op_t *) srcp)[4]; + ((op_t *) dstp)[4] = a1; +do3: + a1 = ((op_t *) srcp)[5]; + ((op_t *) dstp)[5] = a0; +do2: + a0 = ((op_t *) srcp)[6]; + ((op_t *) dstp)[6] = a1; +do1: + a1 = ((op_t *) srcp)[7]; + ((op_t *) dstp)[7] = a0; + + srcp += 8 * OPSIZ; + dstp += 8 * OPSIZ; + len -= 8; + } while (len != 0); + + /* + * This is the right position for do0. Please don't move it into + * the loop. + */ +do0: + ((op_t *) dstp)[0] = a1; +} + +/* + * _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to block + * beginning at DSTP with LEN `op_t' words (not LEN bytes!). DSTP should + * be aligned for memory operations on `op_t's, but SRCP must *not* be aligned. + */ + +void _wordcopy_fwd_dest_aligned (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1, a2, a3; + int sh_1, sh_2; + + /* + * Calculate how to shift a word read at the memory operation aligned + * srcp to make it aligned for copy. + */ + sh_1 = 8 * (srcp % OPSIZ); + sh_2 = 8 * OPSIZ - sh_1; + + /* + * Make SRCP aligned by rounding it down to the beginning of the `op_t' + * it points in the middle of. + */ + srcp &= -OPSIZ; + + switch (len % 4) { + case 2: + a1 = ((op_t *) srcp)[0]; + a2 = ((op_t *) srcp)[1]; + srcp -= 1 * OPSIZ; + dstp -= 3 * OPSIZ; + len += 2; + goto do1; + case 3: + a0 = ((op_t *) srcp)[0]; + a1 = ((op_t *) srcp)[1]; + srcp -= 0 * OPSIZ; + dstp -= 2 * OPSIZ; + len += 1; + goto do2; + case 0: + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + return; + a3 = ((op_t *) srcp)[0]; + a0 = ((op_t *) srcp)[1]; + srcp -=-1 * OPSIZ; + dstp -= 1 * OPSIZ; + len += 0; + goto do3; + case 1: + a2 = ((op_t *) srcp)[0]; + a3 = ((op_t *) srcp)[1]; + srcp -=-2 * OPSIZ; + dstp -= 0 * OPSIZ; + len -= 1; + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + goto do0; + goto do4; /* No-op. */ + } + + do { +do4: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); +do3: + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2); +do2: + a2 = ((op_t *) srcp)[2]; + ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2); +do1: + a3 = ((op_t *) srcp)[3]; + ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2); + + srcp += 4 * OPSIZ; + dstp += 4 * OPSIZ; + len -= 4; + } while (len != 0); + + /* + * This is the right position for do0. Please don't move it into + * the loop. + */ +do0: + ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); +} + +/* + * _wordcopy_bwd_aligned -- Copy block finishing right before + * SRCP to block finishing right before DSTP with LEN `op_t' words (not LEN + * bytes!). Both SRCP and DSTP should be aligned for memory operations + * on `op_t's. + */ +void _wordcopy_bwd_aligned (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + switch (len % 8) { + case 2: + srcp -= 2 * OPSIZ; + dstp -= 1 * OPSIZ; + a0 = ((op_t *) srcp)[1]; + len += 6; + goto do1; + case 3: + srcp -= 3 * OPSIZ; + dstp -= 2 * OPSIZ; + a1 = ((op_t *) srcp)[2]; + len += 5; + goto do2; + case 4: + srcp -= 4 * OPSIZ; + dstp -= 3 * OPSIZ; + a0 = ((op_t *) srcp)[3]; + len += 4; + goto do3; + case 5: + srcp -= 5 * OPSIZ; + dstp -= 4 * OPSIZ; + a1 = ((op_t *) srcp)[4]; + len += 3; + goto do4; + case 6: + srcp -= 6 * OPSIZ; + dstp -= 5 * OPSIZ; + a0 = ((op_t *) srcp)[5]; + len += 2; + goto do5; + case 7: + srcp -= 7 * OPSIZ; + dstp -= 6 * OPSIZ; + a1 = ((op_t *) srcp)[6]; + len += 1; + goto do6; + case 0: + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + return; + srcp -= 8 * OPSIZ; + dstp -= 7 * OPSIZ; + a0 = ((op_t *) srcp)[7]; + goto do7; + case 1: + srcp -= 9 * OPSIZ; + dstp -= 8 * OPSIZ; + a1 = ((op_t *) srcp)[8]; + len -= 1; + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + goto do0; + goto do8; /* No-op. */ + } + + do { +do8: + a0 = ((op_t *) srcp)[7]; + ((op_t *) dstp)[7] = a1; +do7: + a1 = ((op_t *) srcp)[6]; + ((op_t *) dstp)[6] = a0; +do6: + a0 = ((op_t *) srcp)[5]; + ((op_t *) dstp)[5] = a1; +do5: + a1 = ((op_t *) srcp)[4]; + ((op_t *) dstp)[4] = a0; +do4: + a0 = ((op_t *) srcp)[3]; + ((op_t *) dstp)[3] = a1; +do3: + a1 = ((op_t *) srcp)[2]; + ((op_t *) dstp)[2] = a0; +do2: + a0 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[1] = a1; +do1: + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a0; + + srcp -= 8 * OPSIZ; + dstp -= 8 * OPSIZ; + len -= 8; + } while (len != 0); + + /* + * This is the right position for do0. Please don't move it into + * the loop. + */ +do0: + ((op_t *) dstp)[7] = a1; +} + +/* + * _wordcopy_bwd_dest_aligned -- Copy block finishing right before SRCP to + * block finishing right before DSTP with LEN `op_t' words (not LEN bytes!). + * DSTP should be aligned for memory operations on `op_t', but SRCP must *not* + * be aligned. + */ +void _wordcopy_bwd_dest_aligned (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1, a2, a3; + int sh_1, sh_2; + + /* + * Calculate how to shift a word read at the memory operation aligned + * srcp to make it aligned for copy. + */ + + sh_1 = 8 * (srcp % OPSIZ); + sh_2 = 8 * OPSIZ - sh_1; + + /* + * Make srcp aligned by rounding it down to the beginning of the op_t + * it points in the middle of. + */ + srcp &= -OPSIZ; + srcp += OPSIZ; + + switch (len % 4) { + case 2: + srcp -= 3 * OPSIZ; + dstp -= 1 * OPSIZ; + a2 = ((op_t *) srcp)[2]; + a1 = ((op_t *) srcp)[1]; + len += 2; + goto do1; + case 3: + srcp -= 4 * OPSIZ; + dstp -= 2 * OPSIZ; + a3 = ((op_t *) srcp)[3]; + a2 = ((op_t *) srcp)[2]; + len += 1; + goto do2; + case 0: + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + return; + srcp -= 5 * OPSIZ; + dstp -= 3 * OPSIZ; + a0 = ((op_t *) srcp)[4]; + a3 = ((op_t *) srcp)[3]; + goto do3; + case 1: + srcp -= 6 * OPSIZ; + dstp -= 4 * OPSIZ; + a1 = ((op_t *) srcp)[5]; + a0 = ((op_t *) srcp)[4]; + len -= 1; + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + goto do0; + goto do4; /* No-op. */ + } + + do { +do4: + a3 = ((op_t *) srcp)[3]; + ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); +do3: + a2 = ((op_t *) srcp)[2]; + ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2); +do2: + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2); +do1: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); + + srcp -= 4 * OPSIZ; + dstp -= 4 * OPSIZ; + len -= 4; + } while (len != 0); + + /* + * This is the right position for do0. Please don't move it into + * the loop. + */ +do0: + ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); +} + diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index ffc9fc7f3b0..d437919b138 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -9,12 +9,15 @@ #include #include +enum rwsem_waiter_type { + RWSEM_WAITING_FOR_WRITE, + RWSEM_WAITING_FOR_READ +}; + struct rwsem_waiter { struct list_head list; struct task_struct *task; - unsigned int flags; -#define RWSEM_WAITING_FOR_READ 0x00000001 -#define RWSEM_WAITING_FOR_WRITE 0x00000002 + enum rwsem_waiter_type type; }; int rwsem_is_locked(struct rw_semaphore *sem) @@ -22,9 +25,9 @@ int rwsem_is_locked(struct rw_semaphore *sem) int ret = 1; unsigned long flags; - if (spin_trylock_irqsave(&sem->wait_lock, flags)) { + if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { ret = (sem->activity != 0); - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } return ret; } @@ -44,7 +47,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, lockdep_init_map(&sem->dep_map, name, key, 0); #endif sem->activity = 0; - spin_lock_init(&sem->wait_lock); + raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } EXPORT_SYMBOL(__init_rwsem); @@ -67,33 +70,17 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (!wakewrite) { - if (waiter->flags & RWSEM_WAITING_FOR_WRITE) - goto out; - goto dont_wake_writers; - } - - /* if we are allowed to wake writers try to grant a single write lock - * if there's a writer at the front of the queue - * - we leave the 'waiting count' incremented to signify potential - * contention - */ - if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { - sem->activity = -1; - list_del(&waiter->list); - tsk = waiter->task; - /* Don't touch waiter after ->task has been NULLed */ - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); + if (waiter->type == RWSEM_WAITING_FOR_WRITE) { + if (wakewrite) + /* Wake up a writer. Note that we do not grant it the + * lock - it will have to acquire it when it runs. */ + wake_up_process(waiter->task); goto out; } - /* grant an infinite number of read locks to the front of the queue */ - dont_wake_writers: + /* grant read locks to all queued readers. */ woken = 0; - while (waiter->flags & RWSEM_WAITING_FOR_READ) { + do { struct list_head *next = waiter->list.next; list_del(&waiter->list); @@ -103,10 +90,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) wake_up_process(tsk); put_task_struct(tsk); woken++; - if (list_empty(&sem->wait_list)) + if (next == &sem->wait_list) break; waiter = list_entry(next, struct rwsem_waiter, list); - } + } while (waiter->type != RWSEM_WAITING_FOR_WRITE); sem->activity += woken; @@ -121,18 +108,10 @@ static inline struct rw_semaphore * __rwsem_wake_one_writer(struct rw_semaphore *sem) { struct rwsem_waiter *waiter; - struct task_struct *tsk; - - sem->activity = -1; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - list_del(&waiter->list); + wake_up_process(waiter->task); - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); return sem; } @@ -145,12 +124,12 @@ void __sched __down_read(struct rw_semaphore *sem) struct task_struct *tsk; unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); if (sem->activity >= 0 && list_empty(&sem->wait_list)) { /* granted */ sem->activity++; - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); goto out; } @@ -159,13 +138,13 @@ void __sched __down_read(struct rw_semaphore *sem) /* set up my own style of waitqueue */ waiter.task = tsk; - waiter.flags = RWSEM_WAITING_FOR_READ; + waiter.type = RWSEM_WAITING_FOR_READ; get_task_struct(tsk); list_add_tail(&waiter.list, &sem->wait_list); /* we don't need to touch the semaphore struct anymore */ - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); /* wait to be given the lock */ for (;;) { @@ -189,7 +168,7 @@ int __down_read_trylock(struct rw_semaphore *sem) int ret = 0; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); if (sem->activity >= 0 && list_empty(&sem->wait_list)) { /* granted */ @@ -197,14 +176,13 @@ int __down_read_trylock(struct rw_semaphore *sem) ret = 1; } - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); return ret; } /* * get a write lock on the semaphore - * - we increment the waiting count anyway to indicate an exclusive lock */ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) { @@ -212,39 +190,34 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) struct task_struct *tsk; unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); - - if (sem->activity == 0 && list_empty(&sem->wait_list)) { - /* granted */ - sem->activity = -1; - spin_unlock_irqrestore(&sem->wait_lock, flags); - goto out; - } - - tsk = current; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + raw_spin_lock_irqsave(&sem->wait_lock, flags); /* set up my own style of waitqueue */ + tsk = current; waiter.task = tsk; - waiter.flags = RWSEM_WAITING_FOR_WRITE; - get_task_struct(tsk); - + waiter.type = RWSEM_WAITING_FOR_WRITE; list_add_tail(&waiter.list, &sem->wait_list); - /* we don't need to touch the semaphore struct anymore */ - spin_unlock_irqrestore(&sem->wait_lock, flags); - - /* wait to be given the lock */ + /* wait for someone to release the lock */ for (;;) { - if (!waiter.task) + /* + * That is the key to support write lock stealing: allows the + * task already on CPU to get the lock soon rather than put + * itself into sleep and waiting for system woke it or someone + * else in the head of the wait list up. + */ + if (sem->activity == 0) break; - schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + schedule(); + raw_spin_lock_irqsave(&sem->wait_lock, flags); } + /* got the lock */ + sem->activity = -1; + list_del(&waiter.list); - tsk->state = TASK_RUNNING; - out: - ; + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } void __sched __down_write(struct rw_semaphore *sem) @@ -260,7 +233,7 @@ int __down_write_trylock(struct rw_semaphore *sem) unsigned long flags; int ret = 0; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); if (sem->activity == 0 && list_empty(&sem->wait_list)) { /* granted */ @@ -268,7 +241,7 @@ int __down_write_trylock(struct rw_semaphore *sem) ret = 1; } - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); return ret; } @@ -280,12 +253,12 @@ void __up_read(struct rw_semaphore *sem) { unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); if (--sem->activity == 0 && !list_empty(&sem->wait_list)) sem = __rwsem_wake_one_writer(sem); - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } /* @@ -295,13 +268,13 @@ void __up_write(struct rw_semaphore *sem) { unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); sem->activity = 0; if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, 1); - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } /* @@ -312,12 +285,12 @@ void __downgrade_write(struct rw_semaphore *sem) { unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); sem->activity = 1; if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, 0); - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } diff --git a/lib/rwsem.c b/lib/rwsem.c index aa7c3052261..ddb234a36fc 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -2,6 +2,8 @@ * * Written by David Howells (dhowells@redhat.com). * Derived from arch/i386/kernel/semaphore.c + * + * Writer lock-stealing by Alex Shi */ #include #include @@ -22,27 +24,28 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, lockdep_init_map(&sem->dep_map, name, key, 0); #endif sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); + raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } EXPORT_SYMBOL(__init_rwsem); +enum rwsem_waiter_type { + RWSEM_WAITING_FOR_WRITE, + RWSEM_WAITING_FOR_READ +}; + struct rwsem_waiter { struct list_head list; struct task_struct *task; - unsigned int flags; -#define RWSEM_WAITING_FOR_READ 0x00000001 -#define RWSEM_WAITING_FOR_WRITE 0x00000002 + enum rwsem_waiter_type type; }; -/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and - * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held - * since the rwsem value was observed. - */ -#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ -#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ -#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ +enum rwsem_wake_type { + RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ + RWSEM_WAKE_READERS, /* Wake readers only */ + RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ +}; /* * handle the lock release when processes blocked on it that can now run @@ -55,7 +58,7 @@ struct rwsem_waiter { * - writers are only woken if downgrading is false */ static struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int wake_type) +__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) { struct rwsem_waiter *waiter; struct task_struct *tsk; @@ -63,60 +66,35 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) signed long oldcount, woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) - goto readers_only; - - if (wake_type == RWSEM_WAKE_READ_OWNED) - /* Another active reader was observed, so wakeup is not - * likely to succeed. Save the atomic op. - */ + if (waiter->type == RWSEM_WAITING_FOR_WRITE) { + if (wake_type == RWSEM_WAKE_ANY) + /* Wake writer at the front of the queue, but do not + * grant it the lock yet as we want other writers + * to be able to steal it. Readers, on the other hand, + * will block as they will notice the queued writer. + */ + wake_up_process(waiter->task); goto out; + } - /* There's a writer at the front of the queue - try to grant it the - * write lock. However, we only wake this writer if we can transition - * the active part of the count from 0 -> 1 - */ - adjustment = RWSEM_ACTIVE_WRITE_BIAS; - if (waiter->list.next == &sem->wait_list) - adjustment -= RWSEM_WAITING_BIAS; - - try_again_write: - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; - if (oldcount & RWSEM_ACTIVE_MASK) - /* Someone grabbed the sem already */ - goto undo_write; - - /* We must be careful not to touch 'waiter' after we set ->task = NULL. - * It is an allocated on the waiter's stack and may become invalid at - * any time after that point (due to a wakeup from another source). + /* Writers might steal the lock before we grant it to the next reader. + * We prefer to do the first reader grant before counting readers + * so we can bail out early if a writer stole the lock. */ - list_del(&waiter->list); - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); - goto out; - - readers_only: - /* If we come here from up_xxxx(), another thread might have reached - * rwsem_down_failed_common() before we acquired the spinlock and - * woken up a waiter, making it now active. We prefer to check for - * this first in order to not spend too much time with the spinlock - * held if we're not going to be able to wake up readers in the end. - * - * Note that we do not need to update the rwsem count: any writer - * trying to acquire rwsem will run rwsem_down_write_failed() due - * to the waiting threads and block trying to acquire the spinlock. - * - * We use a dummy atomic update in order to acquire the cache line - * exclusively since we expect to succeed and run the final rwsem - * count adjustment pretty soon. - */ - if (wake_type == RWSEM_WAKE_ANY && - rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) - /* Someone grabbed the sem for write already */ - goto out; + adjustment = 0; + if (wake_type != RWSEM_WAKE_READ_OWNED) { + adjustment = RWSEM_ACTIVE_READ_BIAS; + try_reader_grant: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { + /* A writer stole the lock. Undo our reader grant. */ + if (rwsem_atomic_update(-adjustment, sem) & + RWSEM_ACTIVE_MASK) + goto out; + /* Last active locker left. Retry waking readers. */ + goto try_reader_grant; + } + } /* Grant an infinite number of read locks to the readers at the front * of the queue. Note we increment the 'active part' of the count by @@ -132,17 +110,19 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) waiter = list_entry(waiter->list.next, struct rwsem_waiter, list); - } while (waiter->flags & RWSEM_WAITING_FOR_READ); + } while (waiter->type != RWSEM_WAITING_FOR_WRITE); - adjustment = woken * RWSEM_ACTIVE_READ_BIAS; - if (waiter->flags & RWSEM_WAITING_FOR_READ) + adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; + if (waiter->type != RWSEM_WAITING_FOR_WRITE) /* hit end of list above */ adjustment -= RWSEM_WAITING_BIAS; - rwsem_atomic_add(adjustment, sem); + if (adjustment) + rwsem_atomic_add(adjustment, sem); next = sem->wait_list.next; - for (loop = woken; loop > 0; loop--) { + loop = woken; + do { waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; tsk = waiter->task; @@ -150,41 +130,31 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) waiter->task = NULL; wake_up_process(tsk); put_task_struct(tsk); - } + } while (--loop); sem->wait_list.next = next; next->prev = &sem->wait_list; out: return sem; - - /* undo the change to the active count, but check for a transition - * 1->0 */ - undo_write: - if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) - goto out; - goto try_again_write; } /* - * wait for a lock to be granted + * wait for the read lock to be granted */ -static struct rw_semaphore __sched * -rwsem_down_failed_common(struct rw_semaphore *sem, - unsigned int flags, signed long adjustment) +struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) { + signed long adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; struct task_struct *tsk = current; signed long count; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ - spin_lock_irq(&sem->wait_lock); waiter.task = tsk; - waiter.flags = flags; + waiter.type = RWSEM_WAITING_FOR_READ; get_task_struct(tsk); + raw_spin_lock_irq(&sem->wait_lock); if (list_empty(&sem->wait_list)) adjustment += RWSEM_WAITING_BIAS; list_add_tail(&waiter.list, &sem->wait_list); @@ -192,26 +162,24 @@ rwsem_down_failed_common(struct rw_semaphore *sem, /* we're now waiting on the lock, but no longer actively locking */ count = rwsem_atomic_update(adjustment, sem); - /* If there are no active locks, wake the front queued process(es) up. + /* If there are no active locks, wake the front queued process(es). * - * Alternatively, if we're called from a failed down_write(), there - * were already threads queued before us and there are no active - * writers, the lock must be read owned; so we try to wake any read - * locks that were queued ahead of us. */ - if (count == RWSEM_WAITING_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); - else if (count > RWSEM_WAITING_BIAS && - adjustment == -RWSEM_ACTIVE_WRITE_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + * If there are no writers and we are first in the queue, + * wake our own waiter to join the existing active readers ! + */ + if (count == RWSEM_WAITING_BIAS || + (count > RWSEM_WAITING_BIAS && + adjustment != -RWSEM_ACTIVE_READ_BIAS)) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); - spin_unlock_irq(&sem->wait_lock); + raw_spin_unlock_irq(&sem->wait_lock); /* wait to be given the lock */ - for (;;) { + while (true) { + set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!waiter.task) break; schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); } tsk->state = TASK_RUNNING; @@ -220,21 +188,63 @@ rwsem_down_failed_common(struct rw_semaphore *sem, } /* - * wait for the read lock to be granted - */ -struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) -{ - return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, - -RWSEM_ACTIVE_READ_BIAS); -} - -/* - * wait for the write lock to be granted + * wait until we successfully acquire the write lock */ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) { - return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, - -RWSEM_ACTIVE_WRITE_BIAS); + signed long adjustment = -RWSEM_ACTIVE_WRITE_BIAS; + struct rwsem_waiter waiter; + struct task_struct *tsk = current; + signed long count; + + /* set up my own style of waitqueue */ + waiter.task = tsk; + waiter.type = RWSEM_WAITING_FOR_WRITE; + + raw_spin_lock_irq(&sem->wait_lock); + if (list_empty(&sem->wait_list)) + adjustment += RWSEM_WAITING_BIAS; + list_add_tail(&waiter.list, &sem->wait_list); + + /* we're now waiting on the lock, but no longer actively locking */ + count = rwsem_atomic_update(adjustment, sem); + + /* If there were already threads queued before us and there are no + * active writers, the lock must be read owned; so we try to wake + * any read locks that were queued ahead of us. */ + if (count > RWSEM_WAITING_BIAS && + adjustment == -RWSEM_ACTIVE_WRITE_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); + + /* wait until we successfully acquire the lock */ + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + while (true) { + if (!(count & RWSEM_ACTIVE_MASK)) { + /* Try acquiring the write lock. */ + count = RWSEM_ACTIVE_WRITE_BIAS; + if (!list_is_singular(&sem->wait_list)) + count += RWSEM_WAITING_BIAS; + if (cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == + RWSEM_WAITING_BIAS) + break; + } + + raw_spin_unlock_irq(&sem->wait_lock); + + /* Block until there are no active lockers. */ + do { + schedule(); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + } while ((count = sem->count) & RWSEM_ACTIVE_MASK); + + raw_spin_lock_irq(&sem->wait_lock); + } + + list_del(&waiter.list); + raw_spin_unlock_irq(&sem->wait_lock); + tsk->state = TASK_RUNNING; + + return sem; } /* @@ -245,13 +255,13 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) { unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); return sem; } @@ -265,13 +275,13 @@ struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) { unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); return sem; } diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 4689cb073da..503f087382a 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -22,7 +22,7 @@ notrace unsigned int debug_smp_processor_id(void) * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ - if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu))) + if (cpumask_equal(tsk_cpus_allowed(current), cpumask_of(this_cpu))) goto out; /* diff --git a/lib/string.c b/lib/string.c index 01fad9b203e..67caaef1475 100644 --- a/lib/string.c +++ b/lib/string.c @@ -23,6 +23,7 @@ #include #include #include +#include #ifndef __HAVE_ARCH_STRNICMP /** @@ -596,11 +597,12 @@ EXPORT_SYMBOL(memset); */ void *memcpy(void *dest, const void *src, size_t count) { - char *tmp = dest; - const char *s = src; + unsigned long dstp = (unsigned long)dest; + unsigned long srcp = (unsigned long)src; + + /* Copy from the beginning to the end */ + mem_copy_fwd(dstp, srcp, count); - while (count--) - *tmp++ = *s++; return dest; } EXPORT_SYMBOL(memcpy); @@ -617,21 +619,15 @@ EXPORT_SYMBOL(memcpy); */ void *memmove(void *dest, const void *src, size_t count) { - char *tmp; - const char *s; - - if (dest <= src) { - tmp = dest; - s = src; - while (count--) - *tmp++ = *s++; + unsigned long dstp = (unsigned long)dest; + unsigned long srcp = (unsigned long)src; + + if (dest - src >= count) { + /* Copy from the beginning to the end */ + mem_copy_fwd(dstp, srcp, count); } else { - tmp = dest; - tmp += count; - s = src; - s += count; - while (count--) - *--tmp = *--s; + /* Copy from the end to the beginning */ + mem_copy_bwd(dstp, srcp, count); } return dest; } diff --git a/mm/ashmem.c b/mm/ashmem.c index 66e3f23ee33..c1078aa2972 100644 --- a/mm/ashmem.c +++ b/mm/ashmem.c @@ -221,23 +221,30 @@ static ssize_t ashmem_read(struct file *file, char __user *buf, /* If size is not set, or set to 0, always return EOF. */ if (asma->size == 0) { - goto out; + goto out_unlock; } if (!asma->file) { ret = -EBADF; - goto out; + goto out_unlock; } + mutex_unlock(&ashmem_mutex); + + /* + * asma and asma->file are used outside the lock here. We assume + * once asma->file is set it will never be changed, and will not + * be destroyed until all references to the file are dropped and + * ashmem_release is called. + */ ret = asma->file->f_op->read(asma->file, buf, len, pos); - if (ret < 0) { - goto out; + if (ret >= 0) { + /** Update backing file pos, since f_ops->read() doesn't */ + asma->file->f_pos = *pos; } + return ret; - /** Update backing file pos, since f_ops->read() doesn't */ - asma->file->f_pos = *pos; - -out: +out_unlock: mutex_unlock(&ashmem_mutex); return ret; } @@ -406,50 +413,48 @@ static int set_prot_mask(struct ashmem_area *asma, unsigned long prot) static int set_name(struct ashmem_area *asma, void __user *name) { + char lname[ASHMEM_NAME_LEN]; + int len; int ret = 0; + len = strncpy_from_user(lname, name, ASHMEM_NAME_LEN); + if (len < 0) + return len; + if (len == ASHMEM_NAME_LEN) + lname[ASHMEM_NAME_LEN - 1] = '\0'; mutex_lock(&ashmem_mutex); /* cannot change an existing mapping's name */ - if (unlikely(asma->file)) { + if (unlikely(asma->file)) ret = -EINVAL; - goto out; - } + else + strcpy(asma->name + ASHMEM_NAME_PREFIX_LEN, lname); - if (unlikely(copy_from_user(asma->name + ASHMEM_NAME_PREFIX_LEN, - name, ASHMEM_NAME_LEN))) - ret = -EFAULT; - asma->name[ASHMEM_FULL_NAME_LEN-1] = '\0'; - -out: mutex_unlock(&ashmem_mutex); - return ret; } static int get_name(struct ashmem_area *asma, void __user *name) { int ret = 0; + char lname[ASHMEM_NAME_LEN]; + size_t len; mutex_lock(&ashmem_mutex); if (asma->name[ASHMEM_NAME_PREFIX_LEN] != '\0') { - size_t len; - /* * Copying only `len', instead of ASHMEM_NAME_LEN, bytes * prevents us from revealing one user's stack to another. */ len = strlen(asma->name + ASHMEM_NAME_PREFIX_LEN) + 1; - if (unlikely(copy_to_user(name, - asma->name + ASHMEM_NAME_PREFIX_LEN, len))) - ret = -EFAULT; + memcpy(lname, asma->name + ASHMEM_NAME_PREFIX_LEN, len); } else { - if (unlikely(copy_to_user(name, ASHMEM_NAME_DEF, - sizeof(ASHMEM_NAME_DEF)))) - ret = -EFAULT; + len = strlen(ASHMEM_NAME_DEF) + 1; + memcpy(lname, ASHMEM_NAME_DEF, len); } mutex_unlock(&ashmem_mutex); - + if (unlikely(copy_to_user(name, lname, len))) + ret = -EFAULT; return ret; } diff --git a/mm/compaction.c b/mm/compaction.c index 6cc604bd564..1ed66ec5b35 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -655,7 +655,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, /* Compact all zones within a node */ -static int compact_node(int nid) +static int compact_node(int nid, bool sync) { int zoneid; pg_data_t *pgdat; @@ -673,6 +673,7 @@ static int compact_node(int nid) .nr_freepages = 0, .nr_migratepages = 0, .order = -1, + .sync = sync, }; zone = &pgdat->node_zones[zoneid]; @@ -693,12 +694,12 @@ static int compact_node(int nid) } /* Compact all nodes in the system */ -static int compact_nodes(void) +int compact_nodes(bool sync) { int nid; for_each_online_node(nid) - compact_node(nid); + compact_node(nid, sync); return COMPACT_COMPLETE; } @@ -711,7 +712,7 @@ int sysctl_compaction_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { if (write) - return compact_nodes(); + return compact_nodes(true); return 0; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index e9a17857a20..756071f70e0 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -720,9 +720,10 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); read_lock(&tasklist_lock); - if (sysctl_oom_kill_allocating_task && + if (sysctl_oom_kill_allocating_task && current->mm && !oom_unkillable_task(current, NULL, nodemask) && - current->mm && !atomic_read(¤t->mm->oom_disable_count)) { + current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN && + !atomic_read(¤t->mm->oom_disable_count)) { /* * oom_kill_process() needs tasklist_lock held. If it returns * non-zero, current could not be killed so we must fallback to diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8859578e4bd..53e6b8a6bb7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5624,6 +5624,17 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count) bool is_pageblock_removable_nolock(struct page *page) { struct zone *zone = page_zone(page); + unsigned long pfn = page_to_pfn(page); + + /* + * We have to be careful here because we are iterating over memory + * sections which are not zone aware so we might end up outside of + * the zone but still within the section. + */ + if (!zone || zone->zone_start_pfn > pfn || + zone->zone_start_pfn + zone->spanned_pages <= pfn) + return false; + return __count_immobile_pages(zone, page, 0); } @@ -5800,6 +5811,7 @@ static struct trace_print_flags pageflag_names[] = { #ifdef CONFIG_MEMORY_FAILURE {1UL << PG_hwpoison, "hwpoison" }, #endif + {1UL << PG_readahead, "PG_readahead" }, {-1UL, NULL }, }; diff --git a/mm/readahead.c b/mm/readahead.c index 867f9dd82dc..e1bc5681a8f 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -184,6 +184,9 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, if (!page) break; page->index = page_offset; + + page->flags |= (1L << PG_readahead); + list_add(&page->lru, &page_pool); if (page_idx == nr_to_read - lookahead_size) SetPageReadahead(page); diff --git a/mm/shmem.c b/mm/shmem.c index fba53caba0d..d00a6258f94 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1988,6 +1988,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, bool remount) { char *this_char, *value, *rest; + struct mempolicy *mpol = NULL; while (options != NULL) { this_char = options; @@ -2014,7 +2015,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, printk(KERN_ERR "tmpfs: No value for mount option '%s'\n", this_char); - return 1; + goto error; } if (!strcmp(this_char,"size")) { @@ -2057,19 +2058,25 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, if (*rest) goto bad_val; } else if (!strcmp(this_char,"mpol")) { - if (mpol_parse_str(value, &sbinfo->mpol, 1)) + mpol_put(mpol); + if (mpol_parse_str(value, &mpol, 1)) { + mpol = NULL; goto bad_val; + } } else { printk(KERN_ERR "tmpfs: Bad mount option %s\n", this_char); - return 1; + goto error; } } + sbinfo->mpol = mpol; return 0; bad_val: printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n", value, this_char); +error: + mpol_put(mpol); return 1; } @@ -2081,6 +2088,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) unsigned long inodes; int error = -EINVAL; + config.mpol = NULL; if (shmem_parse_options(data, &config, true)) return error; @@ -2105,8 +2113,13 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) sbinfo->max_inodes = config.max_inodes; sbinfo->free_inodes = config.max_inodes - inodes; - mpol_put(sbinfo->mpol); - sbinfo->mpol = config.mpol; /* transfers initial ref */ + /* + * Preserve previous mempolicy unless mpol remount option was specified. + */ + if (config.mpol) { + mpol_put(sbinfo->mpol); + sbinfo->mpol = config.mpol; /* transfers initial ref */ + } out: spin_unlock(&sbinfo->stat_lock); return error; @@ -2137,6 +2150,7 @@ static void shmem_put_super(struct super_block *sb) struct shmem_sb_info *sbinfo = SHMEM_SB(sb); percpu_counter_destroy(&sbinfo->used_blocks); + mpol_put(sbinfo->mpol); kfree(sbinfo); sb->s_fs_info = NULL; } diff --git a/mm/slub.c b/mm/slub.c index f73234db904..46d61070d04 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2487,7 +2487,7 @@ EXPORT_SYMBOL(kmem_cache_free); * take the list_lock. */ static int slub_min_order; -static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; +static int slub_max_order; static int slub_min_objects; /* diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 27071ee2a4e..c9f46b01e7f 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -16,6 +16,12 @@ #include #include +#define INVALID_UID ((uid_t) -1) +#define uid_valid(uid) ((uid) != -1) +#define uid_lte(a, b) ((a) <= (b)) +#define uid_eq(a, b) ((a) == (b)) +#define uid_gte(a, b) ((a) >= (b)) + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -30,6 +36,8 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; + r->uid_start = INVALID_UID; + r->uid_end = INVALID_UID; r->fr_net = hold_net(ops->fro_net); /* The lock is not required here, the list in unreacheable @@ -176,6 +184,23 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); +static inline uid_t fib_nl_uid(struct nlattr *nla) +{ + return nla_get_u32(nla); +} + +static int nla_put_uid(struct sk_buff *skb, int idx, uid_t uid) +{ + return nla_put_u32(skb, idx, uid); +} + +static int fib_uid_range_match(struct flowi *fl, struct fib_rule *rule) +{ + return (!uid_valid(rule->uid_start) && !uid_valid(rule->uid_end)) || + (uid_gte(fl->flowi_uid, rule->uid_start) && + uid_lte(fl->flowi_uid, rule->uid_end)); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags) { @@ -190,6 +215,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; + if (!fib_uid_range_match(fl, rule)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -360,6 +388,19 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } else if (rule->action == FR_ACT_GOTO) goto errout_free; + /* UID start and end must either both be valid or both unspecified. */ + rule->uid_start = rule->uid_end = INVALID_UID; + if (tb[FRA_UID_START] || tb[FRA_UID_END]) { + if (tb[FRA_UID_START] && tb[FRA_UID_END]) { + rule->uid_start = fib_nl_uid(tb[FRA_UID_START]); + rule->uid_end = fib_nl_uid(tb[FRA_UID_END]); + } + if (!uid_valid(rule->uid_start) || + !uid_valid(rule->uid_end) || + !uid_lte(rule->uid_start, rule->uid_end)) + goto errout_free; + } + err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; @@ -442,7 +483,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (frh->action && (frh->action != rule->action)) continue; - if (frh->table && (frh_get_table(frh, tb) != rule->table)) + if (frh_get_table(frh, tb) && + (frh_get_table(frh, tb) != rule->table)) continue; if (tb[FRA_PRIORITY] && @@ -465,6 +507,14 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) continue; + if (tb[FRA_UID_START] && + !uid_eq(rule->uid_start, fib_nl_uid(tb[FRA_UID_START]))) + continue; + + if (tb[FRA_UID_END] && + !uid_eq(rule->uid_end, fib_nl_uid(tb[FRA_UID_END]))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -519,7 +569,9 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_PRIORITY */ + nla_total_size(4) /* FRA_TABLE */ + nla_total_size(4) /* FRA_FWMARK */ - + nla_total_size(4); /* FRA_FWMASK */ + + nla_total_size(4) /* FRA_FWMASK */ + + nla_total_size(4) /* FRA_UID_START */ + + nla_total_size(4); /* FRA_UID_END */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -577,6 +629,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->target) NLA_PUT_U32(skb, FRA_GOTO, rule->target); + if (uid_valid(rule->uid_start)) + nla_put_uid(skb, FRA_UID_START, rule->uid_start); + + if (uid_valid(rule->uid_end)) + nla_put_uid(skb, FRA_UID_END, rule->uid_end); + if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index bf488051a8d..da5a884db47 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1559,7 +1559,7 @@ static const struct net_protocol udp_protocol = { static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, - .err_handler = ping_err, + .err_handler = ping_v4_err, .no_policy = 1, .netns_ok = 1, }; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 92fc5f69f5d..a54817aced3 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -482,6 +482,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_FLOW] = { .type = NLA_U32 }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 23ef31baa1a..19d18cb46c2 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -334,6 +334,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct sock *sk; struct inet_sock *inet; __be32 daddr; + u32 mark = IP4_REPLY_MARK(net, skb->mark); if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; @@ -346,6 +347,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) icmp_param->data.icmph.checksum = 0; inet->tos = ip_hdr(skb)->tos; + sk->sk_mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; ipc.opt = NULL; ipc.tx_flags = 0; @@ -357,6 +359,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = rt->rt_spec_dst; + fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); @@ -375,7 +378,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, const struct iphdr *iph, - __be32 saddr, u8 tos, + __be32 saddr, u8 tos, u32 mark, int type, int code, struct icmp_bxm *param) { @@ -387,6 +390,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->daddr = (param->replyopts.opt.opt.srr ? param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; + fl4->flowi4_mark = mark; fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; @@ -484,6 +488,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct flowi4 fl4; __be32 saddr; u8 tos; + u32 mark; struct net *net; struct sock *sk; @@ -580,6 +585,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; + mark = IP4_REPLY_MARK(net, skb_in->mark); if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) goto out_unlock; @@ -596,11 +602,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; + sk->sk_mark = mark; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts.opt; ipc.tx_flags = 0; - rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, + rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, type, code, &icmp_param); if (IS_ERR(rt)) goto out_unlock; @@ -790,7 +797,7 @@ static void icmp_redirect(struct sk_buff *skb) if (iph->protocol == IPPROTO_ICMP && iph->ihl >= 5 && pskb_may_pull(skb, (iph->ihl<<2)+8)) { - ping_err(skb, icmp_hdr(skb)->un.gateway); + ping_v4_err(skb, icmp_hdr(skb)->un.gateway); } out: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c14d88ad348..2b8e7d7df33 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -358,11 +358,12 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct ip_options_rcu *opt = inet_rsk(req)->opt; struct net *net = sock_net(sk); - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -391,11 +392,12 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, struct rtable *rt; fl4 = &newinet->cork.fl.u.ip4; - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -604,6 +606,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; + newsk->sk_mark = inet_rsk(req)->ir_mark; + newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; newicsk->icsk_probes_out = 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8c6563361ab..16ac1635db0 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1487,12 +1487,14 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, daddr = replyopts.opt.opt.faddr; } - flowi4_init_output(&fl4, arg->bound_dev_if, 0, + flowi4_init_output(&fl4, arg->bound_dev_if, + IP4_REPLY_MARK(sock_net(sk), skb->mark), RT_TOS(ip_hdr(skb)->tos), RT_SCOPE_UNIVERSE, sk->sk_protocol, ip_reply_arg_flowi_flags(arg), daddr, rt->rt_spec_dst, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 39b403f854c..b79fa527029 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -46,8 +45,18 @@ #include #include +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#include +#include +#include +#include +#include +#endif -static struct ping_table ping_table; + +struct ping_table ping_table; +struct pingv6_ops pingv6_ops; +EXPORT_SYMBOL_GPL(pingv6_ops); static u16 ping_port_rover; @@ -57,6 +66,7 @@ static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask) pr_debug("hash(%d) = %d\n", num, res); return res; } +EXPORT_SYMBOL_GPL(ping_hash); static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, struct net *net, unsigned num) @@ -64,7 +74,7 @@ static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; } -static int ping_v4_get_port(struct sock *sk, unsigned short ident) +int ping_get_port(struct sock *sk, unsigned short ident) { struct hlist_nulls_node *node; struct hlist_nulls_head *hlist; @@ -102,6 +112,10 @@ static int ping_v4_get_port(struct sock *sk, unsigned short ident) ping_portaddr_for_each_entry(sk2, node, hlist) { isk2 = inet_sk(sk2); + /* BUG? Why is this reuse and not reuseaddr? ping.c + * doesn't turn off SO_REUSEADDR, and it doesn't expect + * that other ping processes can steal its packets. + */ if ((isk2->inet_num == ident) && (sk2 != sk) && (!sk2->sk_reuse || !sk->sk_reuse)) @@ -124,17 +138,18 @@ static int ping_v4_get_port(struct sock *sk, unsigned short ident) write_unlock_bh(&ping_table.lock); return 1; } +EXPORT_SYMBOL_GPL(ping_get_port); -static void ping_v4_hash(struct sock *sk) +void ping_hash(struct sock *sk) { - pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); + pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); BUG(); /* "Please do not press this button again." */ } -static void ping_v4_unhash(struct sock *sk) +void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); - pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); if (sk_hashed(sk)) { write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); @@ -144,31 +159,61 @@ static void ping_v4_unhash(struct sock *sk) write_unlock_bh(&ping_table.lock); } } +EXPORT_SYMBOL_GPL(ping_unhash); -static struct sock *ping_v4_lookup(struct net *net, u32 saddr, u32 daddr, - u16 ident, int dif) +static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) { struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); struct sock *sk = NULL; struct inet_sock *isk; struct hlist_nulls_node *hnode; + int dif = skb->dev->ifindex; + + if (skb->protocol == htons(ETH_P_IP)) { + pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", + (int)ident, &ip_hdr(skb)->daddr, dif); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", + (int)ident, &ipv6_hdr(skb)->daddr, dif); +#endif + } - pr_debug("try to find: num = %d, daddr = %ld, dif = %d\n", - (int)ident, (unsigned long)daddr, dif); read_lock_bh(&ping_table.lock); ping_portaddr_for_each_entry(sk, hnode, hslot) { isk = inet_sk(sk); - pr_debug("found: %p: num = %d, daddr = %ld, dif = %d\n", sk, - (int)isk->inet_num, (unsigned long)isk->inet_rcv_saddr, - sk->sk_bound_dev_if); - pr_debug("iterate\n"); if (isk->inet_num != ident) continue; - if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr) - continue; + + if (skb->protocol == htons(ETH_P_IP) && + sk->sk_family == AF_INET) { + pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk, + (int) isk->inet_num, &isk->inet_rcv_saddr, + sk->sk_bound_dev_if); + + if (isk->inet_rcv_saddr && + isk->inet_rcv_saddr != ip_hdr(skb)->daddr) + continue; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (skb->protocol == htons(ETH_P_IPV6) && + sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, + (int) isk->inet_num, + &inet6_sk(sk)->rcv_saddr, + sk->sk_bound_dev_if); + + if (!ipv6_addr_any(&np->rcv_saddr) && + !ipv6_addr_equal(&np->rcv_saddr, + &ipv6_hdr(skb)->daddr)) + continue; +#endif + } + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; @@ -197,34 +242,42 @@ static void inet_get_ping_group_range_net(struct net *net, gid_t *low, } -static int ping_init_sock(struct sock *sk) +int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); gid_t group = current_egid(); gid_t range[2]; - struct group_info *group_info = get_current_groups(); - int i, j, count = group_info->ngroups; + struct group_info *group_info; + int i, j, count; + int ret = 0; inet_get_ping_group_range_net(net, range, range+1); if (range[0] <= group && group <= range[1]) return 0; + group_info = get_current_groups(); + count = group_info->ngroups; for (i = 0; i < group_info->nblocks; i++) { int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); for (j = 0; j < cp_count; j++) { group = group_info->blocks[i][j]; if (range[0] <= group && group <= range[1]) - return 0; + goto out_release_group; } count -= cp_count; } - return -EACCES; + ret = -EACCES; + +out_release_group: + put_group_info(group_info); + return ret; } +EXPORT_SYMBOL_GPL(ping_init_sock); -static void ping_close(struct sock *sk, long timeout) +void ping_close(struct sock *sk, long timeout) { pr_debug("ping_close(sk=%p,sk->num=%u)\n", inet_sk(sk), inet_sk(sk)->inet_num); @@ -232,36 +285,122 @@ static void ping_close(struct sock *sk, long timeout) sk_common_release(sk); } +EXPORT_SYMBOL_GPL(ping_close); + +/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ +int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, + struct sockaddr *uaddr, int addr_len) { + struct net *net = sock_net(sk); + if (sk->sk_family == AF_INET) { + struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + int chk_addr_ret; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", + sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); + + chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + + if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) + chk_addr_ret = RTN_LOCAL; + + if ((sysctl_ip_nonlocal_bind == 0 && + isk->freebind == 0 && isk->transparent == 0 && + chk_addr_ret != RTN_LOCAL) || + chk_addr_ret == RTN_MULTICAST || + chk_addr_ret == RTN_BROADCAST) + return -EADDRNOTAVAIL; + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (sk->sk_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; + int addr_type, scoped, has_addr; + struct net_device *dev = NULL; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", + sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); + + addr_type = ipv6_addr_type(&addr->sin6_addr); + scoped = __ipv6_addr_needs_scope_id(addr_type); + if ((addr_type != IPV6_ADDR_ANY && + !(addr_type & IPV6_ADDR_UNICAST)) || + (scoped && !addr->sin6_scope_id)) + return -EINVAL; + + rcu_read_lock(); + if (addr->sin6_scope_id) { + dev = dev_get_by_index_rcu(net, addr->sin6_scope_id); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + } + has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, + scoped); + rcu_read_unlock(); + + if (!(isk->freebind || isk->transparent || has_addr || + addr_type == IPV6_ADDR_ANY)) + return -EADDRNOTAVAIL; + + if (scoped) + sk->sk_bound_dev_if = addr->sin6_scope_id; +#endif + } else { + return -EAFNOSUPPORT; + } + return 0; +} +void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) +{ + if (saddr->sa_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + struct sockaddr_in *addr = (struct sockaddr_in *) saddr; + isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (saddr->sa_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; + struct ipv6_pinfo *np = inet6_sk(sk); + np->rcv_saddr = np->saddr = addr->sin6_addr; +#endif + } +} + +void ping_clear_saddr(struct sock *sk, int dif) +{ + sk->sk_bound_dev_if = dif; + if (sk->sk_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + isk->inet_rcv_saddr = isk->inet_saddr = 0; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr)); + memset(&np->saddr, 0, sizeof(np->saddr)); +#endif + } +} /* * We need our own bind because there are no privileged id's == local ports. * Moreover, we don't allow binding to multi- and broadcast addresses. */ -static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct inet_sock *isk = inet_sk(sk); unsigned short snum; - int chk_addr_ret; int err; + int dif = sk->sk_bound_dev_if; - if (addr_len < sizeof(struct sockaddr_in)) - return -EINVAL; - - pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n", - sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); - - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); - if (addr->sin_addr.s_addr == INADDR_ANY) - chk_addr_ret = RTN_LOCAL; - - if ((sysctl_ip_nonlocal_bind == 0 && - isk->freebind == 0 && isk->transparent == 0 && - chk_addr_ret != RTN_LOCAL) || - chk_addr_ret == RTN_MULTICAST || - chk_addr_ret == RTN_BROADCAST) - return -EADDRNOTAVAIL; + err = ping_check_bind_addr(sk, isk, uaddr, addr_len); + if (err) + return err; lock_sock(sk); @@ -270,42 +409,50 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto out; err = -EADDRINUSE; - isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; - snum = ntohs(addr->sin_port); - if (ping_v4_get_port(sk, snum) != 0) { - isk->inet_saddr = isk->inet_rcv_saddr = 0; + ping_set_saddr(sk, uaddr); + snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port); + if (ping_get_port(sk, snum) != 0) { + ping_clear_saddr(sk, dif); goto out; } - pr_debug("after bind(): num = %d, daddr = %ld, dif = %d\n", - (int)isk->inet_num, - (unsigned long) isk->inet_rcv_saddr, - (int)sk->sk_bound_dev_if); + pr_debug("after bind(): num = %d, dif = %d\n", + (int)isk->inet_num, + (int)sk->sk_bound_dev_if); err = 0; - if (isk->inet_rcv_saddr) + if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) || + (sk->sk_family == AF_INET6 && + !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr))) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; + if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; isk->inet_sport = htons(isk->inet_num); isk->inet_daddr = 0; isk->inet_dport = 0; + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + if (sk->sk_family == AF_INET6) + memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr)); +#endif + sk_dst_reset(sk); out: release_sock(sk); pr_debug("ping_v4_bind -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_bind); /* * Is this a supported type of ICMP message? */ -static inline int ping_supported(int type, int code) +static inline int ping_supported(int family, int type, int code) { - if (type == ICMP_ECHO && code == 0) - return 1; - return 0; + return (family == AF_INET && type == ICMP_ECHO && code == 0) || + (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0); } /* @@ -313,30 +460,44 @@ static inline int ping_supported(int type, int code) * sort of error condition. */ -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); - -void ping_err(struct sk_buff *skb, u32 info) +void ping_err(struct sk_buff *skb, int offset, u32 info) { - struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int family; + struct icmphdr *icmph; struct inet_sock *inet_sock; - int type = icmph->type; - int code = icmph->code; + int type; + int code; struct net *net = dev_net(skb->dev); struct sock *sk; int harderr; int err; + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + offset = iph->ihl << 2; + family = AF_INET; + type = icmp_hdr(skb)->type; + code = icmp_hdr(skb)->code; + icmph = (struct icmphdr *)(skb->data + offset); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + family = AF_INET6; + type = icmp6_hdr(skb)->icmp6_type; + code = icmp6_hdr(skb)->icmp6_code; + icmph = (struct icmphdr *) (skb->data + offset); + } else { + BUG(); + } + /* We assume the packet has already been checked by icmp_unreach */ - if (!ping_supported(icmph->type, icmph->code)) + if (!ping_supported(family, icmph->type, icmph->code)) return; - pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type, - code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); + pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n", + skb->protocol, type, code, ntohs(icmph->un.echo.id), + ntohs(icmph->un.echo.sequence)); - sk = ping_v4_lookup(net, iph->daddr, iph->saddr, - ntohs(icmph->un.echo.id), skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk == NULL) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); pr_debug("no socket, dropping\n"); @@ -348,70 +509,85 @@ void ping_err(struct sk_buff *skb, u32 info) harderr = 0; inet_sock = inet_sk(sk); - switch (type) { - default: - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - case ICMP_SOURCE_QUENCH: - /* This is not a real error but ping wants to see it. - * Report it with some fake errno. */ - err = EREMOTEIO; - break; - case ICMP_PARAMETERPROB: - err = EPROTO; - harderr = 1; - break; - case ICMP_DEST_UNREACH: - if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ - if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { - err = EMSGSIZE; - harderr = 1; - break; + if (skb->protocol == htons(ETH_P_IP)) { + switch (type) { + default: + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + case ICMP_SOURCE_QUENCH: + /* This is not a real error but ping wants to see it. + * Report it with some fake errno. */ + err = EREMOTEIO; + break; + case ICMP_PARAMETERPROB: + err = EPROTO; + harderr = 1; + break; + case ICMP_DEST_UNREACH: + if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { + err = EMSGSIZE; + harderr = 1; + break; + } + goto out; } - goto out; - } - err = EHOSTUNREACH; - if (code <= NR_ICMP_UNREACH) { - harderr = icmp_err_convert[code].fatal; - err = icmp_err_convert[code].errno; + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + case ICMP_REDIRECT: + /* See ICMP_SOURCE_QUENCH */ + err = EREMOTEIO; + break; } - break; - case ICMP_REDIRECT: - /* See ICMP_SOURCE_QUENCH */ - err = EREMOTEIO; - break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + harderr = pingv6_ops.icmpv6_err_convert(type, code, &err); +#endif } /* * RFC1122: OK. Passes ICMP errors back to application, as per * 4.1.3.3. */ - if (!inet_sock->recverr) { + if ((family == AF_INET && !inet_sock->recverr) || + (family == AF_INET6 && !inet6_sk(sk)->recverr)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { - ip_icmp_error(sk, skb, err, 0 /* no remote port */, - info, (u8 *)icmph); + if (family == AF_INET) { + ip_icmp_error(sk, skb, err, 0 /* no remote port */, + info, (u8 *)icmph); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (family == AF_INET6) { + pingv6_ops.ipv6_icmp_error(sk, skb, err, 0, + info, (u8 *)icmph); +#endif + } } sk->sk_err = err; sk->sk_error_report(sk); out: sock_put(sk); } +EXPORT_SYMBOL_GPL(ping_err); + +void ping_v4_err(struct sk_buff *skb, u32 info) +{ + ping_err(skb, 0, info); +} /* - * Copy and checksum an ICMP Echo packet from user space into a buffer. + * Copy and checksum an ICMP Echo packet from user space into a buffer + * starting from the payload. */ -struct pingfakehdr { - struct icmphdr icmph; - struct iovec *iov; - u32 wcheck; -}; - -static int ping_getfrag(void *from, char * to, - int offset, int fraglen, int odd, struct sk_buff *skb) +int ping_getfrag(void *from, char *to, + int offset, int fraglen, int odd, struct sk_buff *skb) { struct pingfakehdr *pfh = (struct pingfakehdr *)from; @@ -422,20 +598,33 @@ static int ping_getfrag(void *from, char * to, pfh->iov, 0, fraglen - sizeof(struct icmphdr), &pfh->wcheck)) return -EFAULT; + } else if (offset < sizeof(struct icmphdr)) { + BUG(); + } else { + if (csum_partial_copy_fromiovecend + (to, pfh->iov, offset - sizeof(struct icmphdr), + fraglen, &pfh->wcheck)) + return -EFAULT; + } - return 0; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + /* For IPv6, checksum each skb as we go along, as expected by + * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in + * wcheck, it will be finalized in ping_v4_push_pending_frames. + */ + if (pfh->family == AF_INET6) { + skb->csum = pfh->wcheck; + skb->ip_summed = CHECKSUM_NONE; + pfh->wcheck = 0; } - if (offset < sizeof(struct icmphdr)) - BUG(); - if (csum_partial_copy_fromiovecend - (to, pfh->iov, offset - sizeof(struct icmphdr), - fraglen, &pfh->wcheck)) - return -EFAULT; +#endif + return 0; } +EXPORT_SYMBOL_GPL(ping_getfrag); -static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, - struct flowi4 *fl4) +static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, + struct flowi4 *fl4) { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); @@ -447,24 +636,9 @@ static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, return ip_push_pending_frames(sk, fl4); } -static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) -{ - struct net *net = sock_net(sk); - struct flowi4 fl4; - struct inet_sock *inet = inet_sk(sk); - struct ipcm_cookie ipc; - struct icmphdr user_icmph; - struct pingfakehdr pfh; - struct rtable *rt = NULL; - struct ip_options_data opt_copy; - int free = 0; - u32 saddr, daddr, faddr; - u8 tos; - int err; - - pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); - +int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, + void *user_icmph, size_t icmph_len) { + u8 type, code; if (len > 0xFFFF) return -EMSGSIZE; @@ -479,15 +653,53 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* * Fetch the ICMP header provided by the userland. - * iovec is modified! + * iovec is modified! The ICMP header is consumed. */ - - if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov, - sizeof(struct icmphdr))) + if (memcpy_fromiovec(user_icmph, msg->msg_iov, icmph_len)) return -EFAULT; - if (!ping_supported(user_icmph.type, user_icmph.code)) + + if (family == AF_INET) { + type = ((struct icmphdr *) user_icmph)->type; + code = ((struct icmphdr *) user_icmph)->code; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (family == AF_INET6) { + type = ((struct icmp6hdr *) user_icmph)->icmp6_type; + code = ((struct icmp6hdr *) user_icmph)->icmp6_code; +#endif + } else { + BUG(); + } + + if (!ping_supported(family, type, code)) return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(ping_common_sendmsg); + +int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct net *net = sock_net(sk); + struct flowi4 fl4; + struct inet_sock *inet = inet_sk(sk); + struct ipcm_cookie ipc; + struct icmphdr user_icmph; + struct pingfakehdr pfh; + struct rtable *rt = NULL; + struct ip_options_data opt_copy; + int free = 0; + __be32 saddr, daddr, faddr; + u8 tos; + int err; + + pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + /* * Get and verify the address. */ @@ -559,7 +771,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sock_i_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); @@ -592,13 +805,14 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; pfh.iov = msg->msg_iov; pfh.wcheck = 0; + pfh.family = AF_INET; err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, 0, &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else - err = ping_push_pending_frames(sk, &pfh, &fl4); + err = ping_v4_push_pending_frames(sk, &pfh, &fl4); release_sock(sk); out: @@ -619,11 +833,13 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; } -static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + int family = sk->sk_family; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; struct sk_buff *skb; int copied, err; @@ -632,11 +848,22 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); + if (addr_len) { + if (family == AF_INET) + *addr_len = sizeof(*sin); + else if (family == AF_INET6 && addr_len) + *addr_len = sizeof(*sin6); + } - if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + if (flags & MSG_ERRQUEUE) { + if (family == AF_INET) { + return ip_recv_error(sk, msg, len); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (family == AF_INET6) { + return pingv6_ops.ipv6_recv_error(sk, msg, len); +#endif + } + } skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -655,15 +882,45 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sock_recv_timestamp(msg, sk, skb); - /* Copy the address. */ - if (sin) { - sin->sin_family = AF_INET; - sin->sin_port = 0 /* skb->h.uh->source */; - sin->sin_addr.s_addr = ip_hdr(skb)->saddr; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + /* Copy the address and add cmsg data. */ + if (family == AF_INET) { + sin = (struct sockaddr_in *) msg->msg_name; + if (sin) { + sin->sin_family = AF_INET; + sin->sin_port = 0 /* skb->h.uh->source */; + sin->sin_addr.s_addr = ip_hdr(skb)->saddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + } + + if (isk->cmsg_flags) + ip_cmsg_recv(msg, skb); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6hdr *ip6 = ipv6_hdr(skb); + sin6 = (struct sockaddr_in6 *) msg->msg_name; + + if (sin6) { + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_addr = ip6->saddr; + sin6->sin6_flowinfo = 0; + if (np->sndflow) + sin6->sin6_flowinfo = + *(__be32 *)ip6 & IPV6_FLOWINFO_MASK; + sin6->sin6_scope_id = + ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); + } + + if (inet6_sk(sk)->rxopt.all) + pingv6_ops.datagram_recv_ctl(sk, msg, skb); +#endif + } else { + BUG(); } - if (isk->cmsg_flags) - ip_cmsg_recv(msg, skb); + err = copied; done: @@ -672,8 +929,9 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pr_debug("ping_recvmsg -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_recvmsg); -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); @@ -685,6 +943,7 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } return 0; } +EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); /* @@ -695,10 +954,7 @@ void ping_rcv(struct sk_buff *skb) { struct sock *sk; struct net *net = dev_net(skb->dev); - struct iphdr *iph = ip_hdr(skb); struct icmphdr *icmph = icmp_hdr(skb); - u32 saddr = iph->saddr; - u32 daddr = iph->daddr; /* We assume the packet has already been checked by icmp_rcv */ @@ -708,8 +964,7 @@ void ping_rcv(struct sk_buff *skb) /* Push ICMP header back */ skb_push(skb, skb->data - (u8 *)icmph); - sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id), - skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk != NULL) { pr_debug("rcv on socket %p\n", sk); ping_queue_rcv_skb(sk, skb_get(skb)); @@ -720,6 +975,7 @@ void ping_rcv(struct sk_buff *skb) /* We're called from icmp_rcv(). kfree_skb() is done there. */ } +EXPORT_SYMBOL_GPL(ping_rcv); struct proto ping_prot = { .name = "PING", @@ -730,13 +986,13 @@ struct proto ping_prot = { .disconnect = udp_disconnect, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .sendmsg = ping_sendmsg, + .sendmsg = ping_v4_sendmsg, .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, - .hash = ping_v4_hash, - .unhash = ping_v4_unhash, - .get_port = ping_v4_get_port, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, .obj_size = sizeof(struct inet_sock), }; EXPORT_SYMBOL(ping_prot); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 61714bd5292..415b3a806bb 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -564,7 +564,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, - daddr, saddr, 0, 0); + daddr, saddr, 0, 0, + sock_i_uid(sk)); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b5638545deb..6c58c9238c9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -741,6 +741,7 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) (rt1->rt_mark ^ rt2->rt_mark) | (rt1->rt_key_tos ^ rt2->rt_key_tos) | (rt1->rt_route_iif ^ rt2->rt_route_iif) | + (rt1->rt_uid ^ rt2->rt_uid) | (rt1->rt_oif ^ rt2->rt_oif)) == 0; } @@ -1886,6 +1887,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) fl4.flowi4_oif = rt->dst.dev->ifindex; fl4.flowi4_iif = skb->dev->ifindex; fl4.flowi4_mark = skb->mark; + fl4.flowi4_uid = skb->sk ? sock_i_uid(skb->sk) : 0; rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) @@ -2065,6 +2067,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_uid = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; @@ -2200,6 +2203,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_uid = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; @@ -2383,6 +2387,7 @@ out: return err; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_uid = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; @@ -2587,6 +2592,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; rth->rt_mark = fl4->flowi4_mark; + rth->rt_uid = fl4->flowi4_uid; rth->rt_gateway = fl4->daddr; rth->rt_spec_dst= fl4->saddr; rth->rt_peer_genid = 0; @@ -2838,6 +2844,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) rt_is_output_route(rth) && rth->rt_oif == flp4->flowi4_oif && rth->rt_mark == flp4->flowi4_mark && + rth->rt_uid == flp4->flowi4_uid && !((rth->rt_key_tos ^ flp4->flowi4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && @@ -2917,6 +2924,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; rt->rt_mark = ort->rt_mark; + rt->rt_uid = ort->rt_uid; rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; @@ -3012,6 +3020,9 @@ static int rt_fill_info(struct net *net, if (rt->rt_mark) NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); + if (rt->rt_uid != (uid_t) -1) + NLA_PUT_BE32(skb, RTA_UID, rt->rt_uid); + error = rt->dst.error; if (peer) { inet_peer_refcheck(rt->peer); @@ -3127,6 +3138,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void .flowi4_tos = rtm->rtm_tos, .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, .flowi4_mark = mark, + .flowi4_uid = tb[RTA_UID] ? nla_get_u32(tb[RTA_UID]) : current_uid(), }; rt = ip_route_output_key(net, &fl4); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3bc5c8f7c71..184a40f4564 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -310,6 +310,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; + ireq->ir_mark = inet_request_mark(sk, skb); ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -348,11 +349,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, { struct flowi4 fl4; - flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), + flowi4_init_output(&fl4, 0, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, - ireq->loc_addr, th->source, th->dest); + ireq->loc_addr, th->source, th->dest, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 69fd7201129..2a4b1815172 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -119,6 +119,21 @@ static int ipv4_ping_group_range(ctl_table *table, int write, return ret; } +/* Validate changes from /proc interface. */ +static int proc_tcp_default_init_rwnd(ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int old_value = *(int *)ctl->data; + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + int new_value = *(int *)ctl->data; + + if (write && ret == 0 && (new_value < 3 || new_value > 100)) + *(int *)ctl->data = old_value; + + return ret; +} + static int proc_tcp_congestion_control(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -631,13 +646,20 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { + { .procname = "tcp_thin_dupack", .data = &sysctl_tcp_thin_dupack, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_default_init_rwnd", + .data = &sysctl_tcp_default_init_rwnd, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tcp_default_init_rwnd + }, { .procname = "udp_mem", .data = &sysctl_udp_mem, @@ -721,6 +743,20 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_ping_group_range, }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv4.sysctl_fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_fwmark_accept", + .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 09ced58e6a5..dcd64f4699b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -485,14 +485,12 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) !tp->urg_data || before(tp->urg_seq, tp->copied_seq) || !before(tp->urg_seq, tp->rcv_nxt)) { - struct sk_buff *skb; answ = tp->rcv_nxt - tp->copied_seq; - /* Subtract 1, if FIN is in queue. */ - skb = skb_peek_tail(&sk->sk_receive_queue); - if (answ && skb) - answ -= tcp_hdr(skb)->fin; + /* Subtract 1, if FIN was received */ + if (answ && sock_flag(sk, SOCK_DONE)) + answ--; } else answ = tp->urg_seq - tp->copied_seq; release_sock(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d73aab3fbfc..ec0cdab8ed9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -97,6 +97,7 @@ int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_abc __read_mostly; +int sysctl_tcp_default_init_rwnd __read_mostly = TCP_DEFAULT_INIT_RCVWND; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -342,14 +343,16 @@ static void tcp_fixup_rcvbuf(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); - /* Try to select rcvbuf so that 4 mss-sized segments - * will fit to window and corresponding skbs will fit to our rcvbuf. - * (was 3; 4 is minimum to allow fast retransmit to work.) + /* Try to select rcvbuf so that sysctl_tcp_default_init_rwnd mss-sized + * segments will fit to window and corresponding skbs will fit to our + * rcvbuf. + * (was 3; then 4 as then minimum to allow fast retransmit to work.) */ while (tcp_win_from_space(rcvmem) < tp->advmss) rcvmem += 128; - if (sk->sk_rcvbuf < 4 * rcvmem) - sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); + if (sk->sk_rcvbuf < sysctl_tcp_default_init_rwnd * rcvmem) + sk->sk_rcvbuf = min(sysctl_tcp_default_init_rwnd * rcvmem, + sysctl_tcp_rmem[2]); } /* 4. Try to fixup all. It is made immediately after connection enters diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6cdf6a28f6b..0b4a35e03df 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -650,6 +650,12 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; + /* When socket is gone, all binding information is lost. + * routing might fail in this case. No choice here, if we choose to force + * input interface, we will misroute in case of asymmetric route. + */ + if (sk) + arg.bound_dev_if = sk->sk_bound_dev_if; net = dev_net(skb_dst(skb)->dev); ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, @@ -1338,6 +1344,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->rmt_addr = saddr; ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(sk, skb); + ireq->ir_mark = inet_request_mark(sk, skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index faf257b9415..a9f3481b041 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -229,14 +229,13 @@ void tcp_select_initial_window(int __space, __u32 mss, } /* Set initial window to a value enough for senders starting with - * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place + * initial congestion window of sysctl_tcp_default_init_rwnd. Place * a limit on the initial window when mss is larger than 1460. */ if (mss > (1 << *rcv_wscale)) { - int init_cwnd = TCP_DEFAULT_INIT_RCVWND; + int init_cwnd = sysctl_tcp_default_init_rwnd; if (mss > 1460) - init_cwnd = - max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); + init_cwnd = max_t(u32, (1460 * init_cwnd) / mss, 2); /* when initializing use the value from init_rcv_wnd * rather than the default from above */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b5a19340a9..55feb88305f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -928,7 +928,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sock_i_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a0b4c5da8d4..e8ee4279fd2 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -86,6 +86,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.rt.rt_oif = fl4->flowi4_oif; xdst->u.rt.rt_mark = fl4->flowi4_mark; + xdst->u.rt.rt_uid = fl4->flowi4_uid; xdst->u.dst.dev = dev; dev_hold(dev); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 686934acfac..753be5dd409 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ - raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ + raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1587d0d9295..b10720f2903 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -192,6 +192,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .accept_ra_rt_table = 0, .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, @@ -226,6 +227,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .accept_ra_rt_table = 0, .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, @@ -1680,6 +1682,31 @@ static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad } #endif +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table) { + /* Determines into what table to put autoconf PIO/RIO/default routes + * learned on this device. + * + * - If 0, use the same table for every device. This puts routes into + * one of RT_TABLE_{PREFIX,INFO,DFLT} depending on the type of route + * (but note that these three are currently all equal to + * RT6_TABLE_MAIN). + * - If > 0, use the specified table. + * - If < 0, put routes into table dev->ifindex + (-rt_table). + */ + struct inet6_dev *idev = in6_dev_get(dev); + u32 table; + int sysctl = idev->cnf.accept_ra_rt_table; + if (sysctl == 0) { + table = default_table; + } else if (sysctl > 0) { + table = (u32) sysctl; + } else { + table = (unsigned) dev->ifindex + (-sysctl); + } + in6_dev_put(idev); + return table; +} + /* * Add prefix route. */ @@ -1689,7 +1716,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, u32 flags) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_PREFIX, + .fc_table = addrconf_rt_table(dev, RT6_TABLE_PREFIX), .fc_metric = IP6_RT_PRIO_ADDRCONF, .fc_ifindex = dev->ifindex, .fc_expires = expires, @@ -3860,6 +3887,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif + array[DEVCONF_ACCEPT_RA_RT_TABLE] = cnf->accept_ra_rt_table; array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -4470,6 +4498,13 @@ static struct addrconf_sysctl_table }, #endif #endif + { + .procname = "accept_ra_rt_table", + .data = &ipv6_devconf.accept_ra_rt_table, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "proxy_ndp", .data = &ipv6_devconf.proxy_ndp, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4252b3cc183..d70560bd2e5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -701,6 +702,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); @@ -1129,6 +1131,9 @@ static int __init inet6_init(void) if (err) goto out_unregister_udplite_proto; + err = proto_register(&pingv6_prot, 1); + if (err) + goto out_unregister_ping_proto; /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. @@ -1222,6 +1227,10 @@ static int __init inet6_init(void) if (err) goto ipv6_packet_fail; + err = pingv6_init(); + if (err) + goto pingv6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -1234,6 +1243,8 @@ static int __init inet6_init(void) sysctl_fail: ipv6_packet_cleanup(); #endif +pingv6_fail: + pingv6_exit(); ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: @@ -1281,6 +1292,8 @@ static int __init inet6_init(void) rtnl_unregister_all(PF_INET6); out_sock_register_fail: rawv6_exit(); +out_unregister_ping_proto: + proto_unregister(&pingv6_prot); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b46e9f88ce3..c880af549e6 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -160,6 +160,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 11900417b1c..a83b79611aa 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -55,6 +55,7 @@ #include #include +#include #include #include #include @@ -80,10 +81,22 @@ static inline struct sock *icmpv6_sk(struct net *net) return net->ipv6.icmp_sk[smp_processor_id()]; } +static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ + struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); + + if (!(type & ICMPV6_INFOMSG_MASK)) + if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) + ping_err(skb, offset, info); +} + static int icmpv6_rcv(struct sk_buff *skb); static const struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, + .err_handler = icmpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; @@ -217,7 +230,8 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset) return (*op & 0xC0) == 0x80; } -static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) +int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; @@ -300,8 +314,8 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif -static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi6 *fl6) +struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, + struct sock *sk, struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; @@ -382,6 +396,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int len; int hlimit; int err = 0; + u32 mark = IP6_REPLY_MARK(net, skb->mark); if ((u8 *)hdr < skb->head || (skb->network_header + sizeof(*hdr)) > skb->tail) @@ -447,6 +462,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) ipv6_addr_copy(&fl6.daddr, &hdr->saddr); if (saddr) ipv6_addr_copy(&fl6.saddr, saddr); + fl6.flowi6_mark = mark; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; @@ -455,6 +471,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -529,6 +546,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; int err = 0; int hlimit; + u32 mark = IP6_REPLY_MARK(net, skb->mark); saddr = &ipv6_hdr(skb)->daddr; @@ -545,11 +563,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipv6_addr_copy(&fl6.saddr, saddr); fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) @@ -595,7 +615,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) icmpv6_xmit_unlock(sk); } -static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) +void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { const struct inet6_protocol *ipprot; int inner_offset; @@ -686,7 +706,8 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", + LIMIT_NETDEBUG(KERN_DEBUG + "ICMPv6 checksum failed [%pI6c > %pI6c]\n", saddr, daddr); goto discard_it; } @@ -707,7 +728,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - /* we couldn't care less */ + ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 8a58e8cf664..219023f1839 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -69,9 +69,10 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, final_p = fl6_update_dst(&fl6, np->opt, &final); ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); @@ -222,6 +223,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_sport = inet->inet_sport; fl6.fl6_dport = inet->inet_dport; + fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c new file mode 100644 index 00000000000..0082212c3d7 --- /dev/null +++ b/net/ipv6/ping.c @@ -0,0 +1,219 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * "Ping" sockets + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on ipv4/ping.c code. + * + * Authors: Lorenzo Colitti (IPv6 support) + * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6), + * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32) + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct proto pingv6_prot = { + .name = "PINGv6", + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .no_check = UDP_CSUM_DEFAULT, + .flags = INET_PROTOSW_REUSE, +}; + + +/* Compatibility glue so we can support IPv6 when it's compiled as a module */ +int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +{ + return -EAFNOSUPPORT; +} +int dummy_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + return -EAFNOSUPPORT; +} +int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) +{ + return -EAFNOSUPPORT; +} +void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) {} +int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev, int strict) +{ + return 0; +} + +int __init pingv6_init(void) +{ + pingv6_ops.ipv6_recv_error = ipv6_recv_error; + pingv6_ops.datagram_recv_ctl = datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; + return inet6_register_protosw(&pingv6_protosw); +} + +/* This never gets called because it's not possible to unload the ipv6 module, + * but just in case. + */ +void pingv6_exit(void) +{ + pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; + pingv6_ops.datagram_recv_ctl = dummy_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; + inet6_unregister_protosw(&pingv6_protosw); +} + +int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct icmp6hdr user_icmph; + int addr_type; + struct in6_addr *daddr; + int iif = 0; + struct flowi6 fl6; + int err; + int hlimit; + struct dst_entry *dst; + struct rt6_info *rt; + struct pingfakehdr pfh; + + pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + + if (msg->msg_name) { + struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; + if (msg->msg_namelen < sizeof(struct sockaddr_in6) || + u->sin6_family != AF_INET6) { + return -EINVAL; + } + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != u->sin6_scope_id) { + return -EINVAL; + } + daddr = &(u->sin6_addr); + iif = u->sin6_scope_id; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + daddr = &np->daddr; + } + + if (!iif) + iif = sk->sk_bound_dev_if; + + addr_type = ipv6_addr_type(daddr); + if (__ipv6_addr_needs_scope_id(addr_type) && !iif) + return -EINVAL; + if (addr_type & IPV6_ADDR_MAPPED) + return -EINVAL; + + /* TODO: use ip6_datagram_send_ctl to get options from cmsg */ + + memset(&fl6, 0, sizeof(fl6)); + + fl6.flowi6_proto = IPPROTO_ICMPV6; + fl6.saddr = np->saddr; + fl6.daddr = *daddr; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_icmp_type = user_icmph.icmp6_type; + fl6.fl6_icmp_code = user_icmph.icmp6_code; + fl6.flowi6_uid = sock_i_uid(sk); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1); + if (IS_ERR(dst)) + return PTR_ERR(dst); + rt = (struct rt6_info *) dst; + + np = inet6_sk(sk); + if (!np) + return -EBADF; + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + + pfh.icmph.type = user_icmph.icmp6_type; + pfh.icmph.code = user_icmph.icmp6_code; + pfh.icmph.checksum = 0; + pfh.icmph.un.echo.id = inet->inet_sport; + pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; + pfh.iov = msg->msg_iov; + pfh.wcheck = 0; + pfh.family = AF_INET6; + + if (ipv6_addr_is_multicast(&fl6.daddr)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = ip6_dst_hoplimit(dst); + + lock_sock(sk); + err = ip6_append_data(sk, ping_getfrag, &pfh, len, + 0, hlimit, + np->tclass, NULL, &fl6, rt, + MSG_DONTWAIT, np->dontfrag); + + if (err) { + ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, + ICMP6_MIB_OUTERRORS); + ip6_flush_pending_frames(sk); + } else { + err = icmpv6_push_pending_frames(sk, &fl6, + (struct icmp6hdr *) &pfh.icmph, + len); + } + release_sock(sk); + + if (err) + return err; + + return len; +} diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 343852e5c70..913830a4018 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -758,6 +758,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f02fe523bd3..113a70ff709 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -89,13 +89,13 @@ static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct rt6_info *rt6_add_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, unsigned pref); -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct rt6_info *rt6_get_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex); + const struct in6_addr *gwaddr); #endif static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) @@ -547,7 +547,6 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr) { - struct net *net = dev_net(dev); struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; @@ -589,8 +588,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, - dev->ifindex); + rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr); if (rt && !lifetime) { ip6_del_rt(rt); @@ -598,8 +596,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (!rt && lifetime) - rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, - pref); + rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); @@ -1791,15 +1788,16 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, } #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct rt6_info *rt6_get_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex) + const struct in6_addr *gwaddr) { struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(net, RT6_TABLE_INFO); + table = fib6_get_table(dev_net(dev), + addrconf_rt_table(dev, RT6_TABLE_INFO)); if (table == NULL) return NULL; @@ -1809,7 +1807,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, goto out; for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_dev->ifindex != ifindex) + if (rt->rt6i_dev->ifindex != dev->ifindex) continue; if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; @@ -1823,21 +1821,21 @@ static struct rt6_info *rt6_get_route_info(struct net *net, return rt; } -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct rt6_info *rt6_add_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, unsigned pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_INFO, + .fc_table = addrconf_rt_table(dev, RT6_TABLE_INFO), .fc_metric = IP6_RT_PRIO_USER, - .fc_ifindex = ifindex, + .fc_ifindex = dev->ifindex, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), .fc_nlinfo.pid = 0, .fc_nlinfo.nlh = NULL, - .fc_nlinfo.nl_net = net, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_copy(&cfg.fc_dst, prefix); @@ -1849,7 +1847,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, ip6_route_add(&cfg); - return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); + return rt6_get_route_info(dev, prefix, prefixlen, gwaddr); } #endif @@ -1858,7 +1856,8 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), + addrconf_rt_table(dev, RT6_TABLE_DFLT)); if (table == NULL) return NULL; @@ -1880,7 +1879,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, unsigned int pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_DFLT, + .fc_table = addrconf_rt_table(dev, RT6_TABLE_DFLT), .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | @@ -1897,27 +1896,17 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, return rt6_get_dflt_router(gwaddr, dev); } -void rt6_purge_dflt_routers(struct net *net) -{ - struct rt6_info *rt; - struct fib6_table *table; - /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(net, RT6_TABLE_DFLT); - if (table == NULL) - return; +int rt6_addrconf_purge(struct rt6_info *rt, void *arg) { + if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) + return -1; + return 0; +} -restart: - read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - ip6_del_rt(rt); - goto restart; - } - } - read_unlock_bh(&table->tb6_lock); +void rt6_purge_dflt_routers(struct net *net) +{ + fib6_clean_all(net, rt6_addrconf_purge, 0, NULL); } static void rtmsg_to_fib6_config(struct net *net, @@ -2220,6 +2209,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2519,6 +2509,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (tb[RTA_OIF]) fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_UID]) + fl6.flowi6_uid = nla_get_u32(tb[RTA_UID]); + else + fl6.flowi6_uid = (iif ? (uid_t) -1 : current_uid()); + if (iif) { struct net_device *dev; dev = __dev_get_by_index(net, iif); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ac838965ff3..a6a636d6a0e 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -215,6 +215,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq6->iif = inet6_iif(skb); + ireq->ir_mark = inet_request_mark(sk, skb); + req->expires = 0UL; req->retrans = 0; ireq->ecn_ok = ecn_ok; @@ -241,9 +243,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) final_p = fl6_update_dst(&fl6, np->opt, &final); ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 6dcf5e7d661..4c27009d39e 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -47,6 +47,13 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv6.sysctl.fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cdbce216521..0c08b0bbca6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -251,6 +251,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); final_p = fl6_update_dst(&fl6, np->opt, &final); @@ -404,6 +405,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); @@ -493,9 +495,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); fl6.flowlabel = 0; fl6.flowi6_oif = treq->iif; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); opt = np->opt; @@ -1046,7 +1049,9 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; - fl6.flowi6_oif = inet6_iif(skb); + if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = inet6_iif(skb); + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); @@ -1252,6 +1257,14 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, tcp_hdr(skb)); + treq->iif = sk->sk_bound_dev_if; + inet_rsk(req)->ir_mark = inet_request_mark(sk, skb); + + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) + treq->iif = inet6_iif(skb); + if (!isn) { struct inet_peer *peer = NULL; @@ -1261,12 +1274,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) atomic_inc(&skb->users); treq->pktopts = skb; } - treq->iif = sk->sk_bound_dev_if; - - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = inet6_iif(skb); if (want_cookie) { isn = cookie_v6_init_sequence(sk, skb, &req->mss); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bb95e8e1c6f..b9bc3ca4995 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1084,6 +1084,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); if (msg->msg_controllen) { opt = &opt_space; diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index dbc5b399a29..c6a76a80ee5 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -42,6 +42,11 @@ #include #include #include +#include +#include +#include +#include +#include #include struct idletimer_tg_attr { @@ -58,36 +63,91 @@ struct idletimer_tg { struct kobject *kobj; struct idletimer_tg_attr attr; + struct timespec delayed_timer_trigger; + struct timespec last_modified_timer; + struct timespec last_suspend_time; + struct notifier_block pm_nb; + + int timeout; unsigned int refcnt; + bool work_pending; bool send_nl_msg; bool active; }; static LIST_HEAD(idletimer_tg_list); static DEFINE_MUTEX(list_mutex); +static DEFINE_SPINLOCK(timestamp_lock); static struct kobject *idletimer_tg_kobj; -static void notify_netlink_uevent(const char *label, struct idletimer_tg *timer) +static bool check_for_delayed_trigger(struct idletimer_tg *timer, + struct timespec *ts) +{ + bool state; + struct timespec temp; + spin_lock_bh(×tamp_lock); + timer->work_pending = false; + if ((ts->tv_sec - timer->last_modified_timer.tv_sec) > timer->timeout || + timer->delayed_timer_trigger.tv_sec != 0) { + state = false; + temp.tv_sec = timer->timeout; + temp.tv_nsec = 0; + if (timer->delayed_timer_trigger.tv_sec != 0) { + temp = timespec_add(timer->delayed_timer_trigger, temp); + ts->tv_sec = temp.tv_sec; + ts->tv_nsec = temp.tv_nsec; + timer->delayed_timer_trigger.tv_sec = 0; + timer->work_pending = true; + schedule_work(&timer->work); + } else { + temp = timespec_add(timer->last_modified_timer, temp); + ts->tv_sec = temp.tv_sec; + ts->tv_nsec = temp.tv_nsec; + } + } else { + state = timer->active; + } + spin_unlock_bh(×tamp_lock); + return state; +} + +static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) { - char label_msg[NLMSG_MAX_SIZE]; + char iface_msg[NLMSG_MAX_SIZE]; char state_msg[NLMSG_MAX_SIZE]; - char *envp[] = { label_msg, state_msg, NULL }; + char timestamp_msg[NLMSG_MAX_SIZE]; + char *envp[] = { iface_msg, state_msg, timestamp_msg, NULL }; int res; + struct timespec ts; + uint64_t time_ns; + bool state; - res = snprintf(label_msg, NLMSG_MAX_SIZE, "LABEL=%s", - label); + res = snprintf(iface_msg, NLMSG_MAX_SIZE, "INTERFACE=%s", + iface); if (NLMSG_MAX_SIZE <= res) { pr_err("message too long (%d)", res); return; } + + get_monotonic_boottime(&ts); + state = check_for_delayed_trigger(timer, &ts); res = snprintf(state_msg, NLMSG_MAX_SIZE, "STATE=%s", - timer->active ? "active" : "inactive"); + state ? "active" : "inactive"); + if (NLMSG_MAX_SIZE <= res) { pr_err("message too long (%d)", res); return; } - pr_debug("putting nlmsg: <%s> <%s>\n", label_msg, state_msg); + + time_ns = timespec_to_ns(&ts); + res = snprintf(timestamp_msg, NLMSG_MAX_SIZE, "TIME_NS=%llu", time_ns); + if (NLMSG_MAX_SIZE <= res) { + timestamp_msg[0] = '\0'; + pr_err("message too long (%d)", res); + } + + pr_debug("putting nlmsg: <%s> <%s>\n", iface_msg, state_msg); kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp); return; @@ -151,9 +211,55 @@ static void idletimer_tg_expired(unsigned long data) struct idletimer_tg *timer = (struct idletimer_tg *) data; pr_debug("timer %s expired\n", timer->attr.attr.name); - + spin_lock_bh(×tamp_lock); timer->active = false; + timer->work_pending = true; schedule_work(&timer->work); + spin_unlock_bh(×tamp_lock); +} + +static int idletimer_resume(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + struct timespec ts; + unsigned long time_diff, now = jiffies; + struct idletimer_tg *timer = container_of(notifier, + struct idletimer_tg, pm_nb); + if (!timer) + return NOTIFY_DONE; + switch (pm_event) { + case PM_SUSPEND_PREPARE: + get_monotonic_boottime(&timer->last_suspend_time); + break; + case PM_POST_SUSPEND: + spin_lock_bh(×tamp_lock); + if (!timer->active) { + spin_unlock_bh(×tamp_lock); + break; + } + /* since jiffies are not updated when suspended now represents + * the time it would have suspended */ + if (time_after(timer->timer.expires, now)) { + get_monotonic_boottime(&ts); + ts = timespec_sub(ts, timer->last_suspend_time); + time_diff = timespec_to_jiffies(&ts); + if (timer->timer.expires > (time_diff + now)) { + mod_timer_pending(&timer->timer, + (timer->timer.expires - time_diff)); + } else { + del_timer(&timer->timer); + timer->timer.expires = 0; + timer->active = false; + timer->work_pending = true; + schedule_work(&timer->work); + } + } + spin_unlock_bh(×tamp_lock); + break; + default: + break; + } + return NOTIFY_DONE; } static int idletimer_tg_create(struct idletimer_tg_info *info) @@ -189,6 +295,18 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) info->timer->refcnt = 1; info->timer->send_nl_msg = (info->send_nl_msg == 0) ? false : true; info->timer->active = true; + info->timer->timeout = info->timeout; + + info->timer->delayed_timer_trigger.tv_sec = 0; + info->timer->delayed_timer_trigger.tv_nsec = 0; + info->timer->work_pending = false; + get_monotonic_boottime(&info->timer->last_modified_timer); + + info->timer->pm_nb.notifier_call = idletimer_resume; + ret = register_pm_notifier(&info->timer->pm_nb); + if (ret) + printk(KERN_WARNING "[%s] Failed to register pm notifier %d\n", + __func__, ret); mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); @@ -205,6 +323,34 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) return ret; } +static void reset_timer(const struct idletimer_tg_info *info) +{ + unsigned long now = jiffies; + struct idletimer_tg *timer = info->timer; + bool timer_prev; + + spin_lock_bh(×tamp_lock); + timer_prev = timer->active; + timer->active = true; + /* timer_prev is used to guard overflow problem in time_before*/ + if (!timer_prev || time_before(timer->timer.expires, now)) { + pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", + timer->timer.expires, now); + /* checks if there is a pending inactive notification*/ + if (timer->work_pending) + timer->delayed_timer_trigger = timer->last_modified_timer; + else { + timer->work_pending = true; + schedule_work(&timer->work); + } + } + + get_monotonic_boottime(&timer->last_modified_timer); + mod_timer(&timer->timer, + msecs_to_jiffies(info->timeout * 1000) + now); + spin_unlock_bh(×tamp_lock); +} + /* * The actual xt_tables plugin. */ @@ -228,9 +374,7 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb, } /* TODO: Avoid modifying timers on each packet */ - mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + now); - + reset_timer(info); return XT_CONTINUE; } @@ -238,7 +382,6 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) { struct idletimer_tg_info *info = par->targinfo; int ret; - unsigned long now = jiffies; pr_debug("checkentry targinfo %s\n", info->label); @@ -259,17 +402,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { info->timer->refcnt++; - info->timer->active = true; - - if (time_before(info->timer->timer.expires, now)) { - schedule_work(&info->timer->work); - pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", - info->timer->timer.expires, now); - } - - mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + now); - + reset_timer(info); pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); } else { @@ -300,6 +433,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) list_del(&info->timer->entry); del_timer_sync(&info->timer->timer); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); + unregister_pm_notifier(&info->timer->pm_nb); kfree(info->timer->attr.attr.name); kfree(info->timer); } else { diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index ea716b31e2a..aa5f0919a1b 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -53,25 +54,22 @@ static unsigned int proc_stats_perms = S_IRUGO; module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR); static struct proc_dir_entry *xt_qtaguid_ctrl_file; -#ifdef CONFIG_ANDROID_PARANOID_NETWORK + +/* Everybody can write. But proc_ctrl_write_limited is true by default which + * limits what can be controlled. See the can_*() functions. + */ static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO; -#else -static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR; -#endif module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR); -#ifdef CONFIG_ANDROID_PARANOID_NETWORK -#include -static gid_t proc_stats_readall_gid = AID_NET_BW_STATS; -static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT; -#else -/* 0 means, don't limit anybody */ -static gid_t proc_stats_readall_gid; -static gid_t proc_ctrl_write_gid; -#endif -module_param_named(stats_readall_gid, proc_stats_readall_gid, uint, +/* Limited by default, so the gid of the ctrl and stats proc entries + * will limit what can be done. See the can_*() functions. + */ +static bool proc_stats_readall_limited = true; +static bool proc_ctrl_write_limited = true; + +module_param_named(stats_readall_limited, proc_stats_readall_limited, bool, S_IRUGO | S_IWUSR); -module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint, +module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool, S_IRUGO | S_IWUSR); /* @@ -242,8 +240,9 @@ static struct qtaguid_event_counts qtu_events; static bool can_manipulate_uids(void) { /* root pwnd */ - return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid) - || in_egroup_p(proc_ctrl_write_gid); + return in_egroup_p(xt_qtaguid_ctrl_file->gid) + || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited) + || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid); } static bool can_impersonate_uid(uid_t uid) @@ -254,9 +253,10 @@ static bool can_impersonate_uid(uid_t uid) static bool can_read_other_uid_stats(uid_t uid) { /* root pwnd */ - return unlikely(!current_fsuid()) || uid == current_fsuid() - || unlikely(!proc_stats_readall_gid) - || in_egroup_p(proc_stats_readall_gid); + return in_egroup_p(xt_qtaguid_stats_file->gid) + || unlikely(!current_fsuid()) || uid == current_fsuid() + || unlikely(!proc_stats_readall_limited) + || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid); } static inline void dc_add_byte_packets(struct data_counters *counters, int set, @@ -269,24 +269,6 @@ static inline void dc_add_byte_packets(struct data_counters *counters, int set, counters->bpc[set][direction][ifs_proto].packets += packets; } -static inline uint64_t dc_sum_bytes(struct data_counters *counters, - int set, - enum ifs_tx_rx direction) -{ - return counters->bpc[set][direction][IFS_TCP].bytes - + counters->bpc[set][direction][IFS_UDP].bytes - + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; -} - -static inline uint64_t dc_sum_packets(struct data_counters *counters, - int set, - enum ifs_tx_rx direction) -{ - return counters->bpc[set][direction][IFS_TCP].packets - + counters->bpc[set][direction][IFS_UDP].packets - + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; -} - static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag) { struct rb_node *node = root->rb_node; @@ -788,6 +770,53 @@ static struct iface_stat *get_iface_entry(const char *ifname) return iface_entry; } +/* This is for fmt2 only */ +static int pp_iface_stat_line(bool header, char *outp, + int char_count, struct iface_stat *iface_entry) +{ + int len; + if (header) { + len = snprintf(outp, char_count, + "ifname " + "total_skb_rx_bytes total_skb_rx_packets " + "total_skb_tx_bytes total_skb_tx_packets " + "rx_tcp_bytes rx_tcp_packets " + "rx_udp_bytes rx_udp_packets " + "rx_other_bytes rx_other_packets " + "tx_tcp_bytes tx_tcp_packets " + "tx_udp_bytes tx_udp_packets " + "tx_other_bytes tx_other_packets\n" + ); + } else { + struct data_counters *cnts; + int cnt_set = 0; /* We only use one set for the device */ + cnts = &iface_entry->totals_via_skb; + len = snprintf( + outp, char_count, + "%s " + "%llu %llu %llu %llu %llu %llu %llu %llu " + "%llu %llu %llu %llu %llu %llu %llu %llu\n", + iface_entry->ifname, + dc_sum_bytes(cnts, cnt_set, IFS_RX), + dc_sum_packets(cnts, cnt_set, IFS_RX), + dc_sum_bytes(cnts, cnt_set, IFS_TX), + dc_sum_packets(cnts, cnt_set, IFS_TX), + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); + } + return len; +} + static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, off_t items_to_skip, int char_count, int *eof, void *data) @@ -817,11 +846,7 @@ static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, return 0; if (fmt == 2 && item_index++ >= items_to_skip) { - len = snprintf(outp, char_count, - "ifname " - "total_skb_rx_bytes total_skb_rx_packets " - "total_skb_tx_bytes total_skb_tx_packets\n" - ); + len = pp_iface_stat_line(true, outp, char_count, NULL); if (len >= char_count) { *outp = '\0'; return outp - page; @@ -866,16 +891,8 @@ static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, stats->tx_bytes, stats->tx_packets ); } else { - len = snprintf( - outp, char_count, - "%s " - "%llu %llu %llu %llu\n", - iface_entry->ifname, - iface_entry->totals_via_skb[IFS_RX].bytes, - iface_entry->totals_via_skb[IFS_RX].packets, - iface_entry->totals_via_skb[IFS_TX].bytes, - iface_entry->totals_via_skb[IFS_TX].packets - ); + len = pp_iface_stat_line(false, outp, char_count, + iface_entry); } if (len >= char_count) { spin_unlock_bh(&iface_stat_list_lock); @@ -1092,18 +1109,13 @@ static void iface_stat_create(struct net_device *net_dev, spin_lock_bh(&iface_stat_list_lock); entry = get_iface_entry(ifname); if (entry != NULL) { - bool activate = !ipv4_is_loopback(ipaddr); IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", ifname, entry); iface_check_stats_reset_and_adjust(net_dev, entry); - _iface_stat_set_active(entry, net_dev, activate); + _iface_stat_set_active(entry, net_dev, true); IF_DEBUG("qtaguid: %s(%s): " "tracking now %d on ip=%pI4\n", __func__, - entry->ifname, activate, &ipaddr); - goto done_unlock_put; - } else if (ipv4_is_loopback(ipaddr)) { - IF_DEBUG("qtaguid: iface_stat: create(%s): " - "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr); + entry->ifname, true, &ipaddr); goto done_unlock_put; } @@ -1154,19 +1166,13 @@ static void iface_stat_create_ipv6(struct net_device *net_dev, spin_lock_bh(&iface_stat_list_lock); entry = get_iface_entry(ifname); if (entry != NULL) { - bool activate = !(addr_type & IPV6_ADDR_LOOPBACK); IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, ifname, entry); iface_check_stats_reset_and_adjust(net_dev, entry); - _iface_stat_set_active(entry, net_dev, activate); + _iface_stat_set_active(entry, net_dev, true); IF_DEBUG("qtaguid: %s(%s): " "tracking now %d on ip=%pI6c\n", __func__, - entry->ifname, activate, &ifa->addr); - goto done_unlock_put; - } else if (addr_type & IPV6_ADDR_LOOPBACK) { - IF_DEBUG("qtaguid: %s(%s): " - "ignore loopback dev. ip=%pI6c\n", __func__, - ifname, &ifa->addr); + entry->ifname, true, &ifa->addr); goto done_unlock_put; } @@ -1292,6 +1298,38 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only) spin_unlock_bh(&iface_stat_list_lock); } +/* Guarantied to return a net_device that has a name */ +static void get_dev_and_dir(const struct sk_buff *skb, + struct xt_action_param *par, + enum ifs_tx_rx *direction, + const struct net_device **el_dev) +{ + BUG_ON(!direction || !el_dev); + + if (par->in) { + *el_dev = par->in; + *direction = IFS_RX; + } else if (par->out) { + *el_dev = par->out; + *direction = IFS_TX; + } else { + pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n", + par->hooknum, __func__); + BUG(); + } + if (unlikely(!(*el_dev)->name)) { + pr_err("qtaguid[%d]: %s(): no dev->name?!!\n", + par->hooknum, __func__); + BUG(); + } + if (skb->dev && *el_dev != skb->dev) { + MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs par->%s=%p %s\n", + par->hooknum, skb->dev, skb->dev->name, + *direction == IFS_RX ? "in" : "out", *el_dev, + (*el_dev)->name); + } +} + /* * Update stats for the specified interface from the skb. * Do nothing if the entry @@ -1303,53 +1341,31 @@ static void iface_stat_update_from_skb(const struct sk_buff *skb, { struct iface_stat *entry; const struct net_device *el_dev; - enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX; + enum ifs_tx_rx direction; int bytes = skb->len; + int proto; - if (!skb->dev) { - MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); - el_dev = par->in ? : par->out; - } else { - const struct net_device *other_dev; - el_dev = skb->dev; - other_dev = par->in ? : par->out; - if (el_dev != other_dev) { - MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " - "par->(in/out)=%p %s\n", - par->hooknum, el_dev, el_dev->name, other_dev, - other_dev->name); - } - } - - if (unlikely(!el_dev)) { - pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n", - par->hooknum, __func__); - BUG(); - } else if (unlikely(!el_dev->name)) { - pr_err("qtaguid[%d]: %s(): no dev->name?!!\n", - par->hooknum, __func__); - BUG(); - } else { - int proto = ipx_proto(skb, par); - MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", - par->hooknum, el_dev->name, el_dev->type, - par->family, proto); - } + get_dev_and_dir(skb, par, &direction, &el_dev); + proto = ipx_proto(skb, par); + MT_DEBUG("qtaguid[%d]: iface_stat: %s(%s): " + "type=%d fam=%d proto=%d dir=%d\n", + par->hooknum, __func__, el_dev->name, el_dev->type, + par->family, proto, direction); spin_lock_bh(&iface_stat_list_lock); entry = get_iface_entry(el_dev->name); if (entry == NULL) { - IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n", - __func__, el_dev->name); + IF_DEBUG("qtaguid[%d]: iface_stat: %s(%s): not tracked\n", + par->hooknum, __func__, el_dev->name); spin_unlock_bh(&iface_stat_list_lock); return; } - IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, + IF_DEBUG("qtaguid[%d]: %s(%s): entry=%p\n", par->hooknum, __func__, el_dev->name, entry); - entry->totals_via_skb[direction].bytes += bytes; - entry->totals_via_skb[direction].packets++; + data_counters_update(&entry->totals_via_skb, 0, direction, proto, + bytes); spin_unlock_bh(&iface_stat_list_lock); } @@ -1410,13 +1426,13 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, iface_entry = get_iface_entry(ifname); if (!iface_entry) { - pr_err("qtaguid: iface_stat: stat_update() %s not found\n", - ifname); + pr_err_ratelimited("qtaguid: tag_stat: stat_update() " + "%s not found\n", ifname); return; } /* It is ok to process data when an iface_entry is inactive */ - MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n", + MT_DEBUG("qtaguid: tag_stat: stat_update() dev=%s entry=%p\n", ifname, iface_entry); /* @@ -1433,7 +1449,7 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, tag = combine_atag_with_uid(acct_tag, uid); uid_tag = make_tag_from_uid(uid); } - MT_DEBUG("qtaguid: iface_stat: stat_update(): " + MT_DEBUG("qtaguid: tag_stat: stat_update(): " " looking for tag=0x%llx (uid=%u) in ife=%p\n", tag, get_uid_from_tag(tag), iface_entry); /* Loop over tag list under this interface for {acct_tag,uid_tag} */ @@ -1461,6 +1477,8 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats. */ new_tag_stat = create_if_tag_stat(iface_entry, uid_tag); + if (!new_tag_stat) + goto unlock; uid_tag_counters = &new_tag_stat->counters; } else { uid_tag_counters = &tag_stat_entry->counters; @@ -1469,6 +1487,8 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, if (acct_tag) { /* Create the child {acct_tag, uid_tag} and hook up parent. */ new_tag_stat = create_if_tag_stat(iface_entry, tag); + if (!new_tag_stat) + goto unlock; new_tag_stat->parent_counters = uid_tag_counters; } else { /* @@ -1482,6 +1502,7 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, BUG_ON(!new_tag_stat); } tag_stat_update(new_tag_stat, direction, proto, bytes); +unlock: spin_unlock_bh(&iface_entry->tag_stat_list_lock); } @@ -1661,8 +1682,8 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb, struct sock *sk; unsigned int hook_mask = (1 << par->hooknum); - MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb, - par->hooknum, par->family); + MT_DEBUG("qtaguid[%d]: find_sk(skb=%p) family=%d\n", + par->hooknum, skb, par->family); /* * Let's not abuse the the xt_socket_get*_sk(), or else it will @@ -1688,8 +1709,8 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb, * Not fixed in 3.0-r3 :( */ if (sk) { - MT_DEBUG("qtaguid: %p->sk_proto=%u " - "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state); + MT_DEBUG("qtaguid[%d]: %p->sk_proto=%u->sk_state=%d\n", + par->hooknum, sk, sk->sk_protocol, sk->sk_state); if (sk->sk_state == TCP_TIME_WAIT) { xt_socket_put_sk(sk); sk = NULL; @@ -1703,37 +1724,19 @@ static void account_for_uid(const struct sk_buff *skb, struct xt_action_param *par) { const struct net_device *el_dev; + enum ifs_tx_rx direction; + int proto; - if (!skb->dev) { - MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); - el_dev = par->in ? : par->out; - } else { - const struct net_device *other_dev; - el_dev = skb->dev; - other_dev = par->in ? : par->out; - if (el_dev != other_dev) { - MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " - "par->(in/out)=%p %s\n", - par->hooknum, el_dev, el_dev->name, other_dev, - other_dev->name); - } - } - - if (unlikely(!el_dev)) { - pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum); - } else if (unlikely(!el_dev->name)) { - pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum); - } else { - int proto = ipx_proto(skb, par); - MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", - par->hooknum, el_dev->name, el_dev->type, - par->family, proto); + get_dev_and_dir(skb, par, &direction, &el_dev); + proto = ipx_proto(skb, par); + MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d dir=%d\n", + par->hooknum, el_dev->name, el_dev->type, + par->family, proto, direction); - if_tag_stat_update(el_dev->name, uid, - skb->sk ? skb->sk : alternate_sk, - par->in ? IFS_RX : IFS_TX, - proto, skb->len); - } + if_tag_stat_update(el_dev->name, uid, + skb->sk ? skb->sk : alternate_sk, + direction, + proto, skb->len); } static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) @@ -1744,6 +1747,11 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk; uid_t sock_uid; bool res; + /* + * TODO: unhack how to force just accounting. + * For now we only do tag stats when the uid-owner is not requested + */ + bool do_tag_stat = !(info->match & XT_QTAGUID_UID); if (unlikely(module_passive)) return (info->match ^ info->invert) == 0; @@ -1808,12 +1816,7 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) * couldn't find the owner, so for now we just count them * against the system. */ - /* - * TODO: unhack how to force just accounting. - * For now we only do iface stats when the uid-owner is not - * requested. - */ - if (!(info->match & XT_QTAGUID_UID)) + if (do_tag_stat) account_for_uid(skb, sk, 0, par); MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", par->hooknum, @@ -1828,18 +1831,15 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) filp = sk->sk_socket->file; if (filp == NULL) { MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum); - account_for_uid(skb, sk, 0, par); + if (do_tag_stat) + account_for_uid(skb, sk, 0, par); res = ((info->match ^ info->invert) & (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0; atomic64_inc(&qtu_events.match_no_sk_file); goto put_sock_ret_res; } sock_uid = filp->f_cred->fsuid; - /* - * TODO: unhack how to force just accounting. - * For now we only do iface stats when the uid-owner is not requested - */ - if (!(info->match & XT_QTAGUID_UID)) + if (do_tag_stat) account_for_uid(skb, sk, sock_uid, par); /* @@ -2297,11 +2297,12 @@ static int ctrl_cmd_tag(const char *input) } CT_DEBUG("qtaguid: ctrl_tag(%s): " "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " - "in_group=%d in_egroup=%d\n", + "ctrl.gid=%u in_group()=%d in_egroup()=%d\n", input, current->pid, current->tgid, current_uid(), current_euid(), current_fsuid(), - in_group_p(proc_ctrl_write_gid), - in_egroup_p(proc_ctrl_write_gid)); + xt_qtaguid_ctrl_file->gid, + in_group_p(xt_qtaguid_ctrl_file->gid), + in_egroup_p(xt_qtaguid_ctrl_file->gid)); if (argc < 4) { uid = current_fsuid(); } else if (!can_impersonate_uid(uid)) { @@ -2593,10 +2594,11 @@ static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) && !can_read_other_uid_stats(stat_uid)) { CT_DEBUG("qtaguid: stats line: " "%s 0x%llx %u: insufficient priv " - "from pid=%u tgid=%u uid=%u\n", + "from pid=%u tgid=%u uid=%u stats.gid=%u\n", ppi->iface_entry->ifname, get_atag_from_tag(tag), stat_uid, - current->pid, current->tgid, current_fsuid()); + current->pid, current->tgid, current_fsuid(), + xt_qtaguid_stats_file->gid); return 0; } if (ppi->item_index++ < ppi->items_to_skip) @@ -2752,7 +2754,7 @@ static int qtudev_open(struct inode *inode, struct file *file) utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); if (IS_ERR_OR_NULL(utd_entry)) { res = PTR_ERR(utd_entry); - goto err; + goto err_unlock; } /* Look for existing PID based proc_data */ @@ -2794,8 +2796,8 @@ static int qtudev_open(struct inode *inode, struct file *file) rb_erase(&utd_entry->node, &uid_tag_data_tree); kfree(utd_entry); } +err_unlock: spin_unlock_bh(&uid_tag_data_tree_lock); -err: return res; } diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h index d79f8383abf..6dc14a9c688 100644 --- a/net/netfilter/xt_qtaguid_internal.h +++ b/net/netfilter/xt_qtaguid_internal.h @@ -179,6 +179,25 @@ struct data_counters { struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; }; +static inline uint64_t dc_sum_bytes(struct data_counters *counters, + int set, + enum ifs_tx_rx direction) +{ + return counters->bpc[set][direction][IFS_TCP].bytes + + counters->bpc[set][direction][IFS_UDP].bytes + + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; +} + +static inline uint64_t dc_sum_packets(struct data_counters *counters, + int set, + enum ifs_tx_rx direction) +{ + return counters->bpc[set][direction][IFS_TCP].packets + + counters->bpc[set][direction][IFS_UDP].packets + + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; +} + + /* Generic X based nodes used as a base for rb_tree ops */ struct tag_node { struct rb_node node; @@ -203,7 +222,7 @@ struct iface_stat { struct net_device *net_dev; struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS]; - struct byte_packet_counters totals_via_skb[IFS_MAX_DIRECTIONS]; + struct data_counters totals_via_skb; /* * We keep the last_known, because some devices reset their counters * just before NETDEV_UP, while some will reset just before diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c index 8cbd8e42bcc..f6a00a3520e 100644 --- a/net/netfilter/xt_qtaguid_print.c +++ b/net/netfilter/xt_qtaguid_print.c @@ -177,9 +177,10 @@ char *pp_tag_stat(struct tag_stat *ts) char *pp_iface_stat(struct iface_stat *is) { char *res; - if (!is) + if (!is) { res = kasprintf(GFP_ATOMIC, "iface_stat@null{}"); - else + } else { + struct data_counters *cnts = &is->totals_via_skb; res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" "list=list_head{...}, " "ifname=%s, " @@ -206,10 +207,10 @@ char *pp_iface_stat(struct iface_stat *is) is->totals_via_dev[IFS_RX].packets, is->totals_via_dev[IFS_TX].bytes, is->totals_via_dev[IFS_TX].packets, - is->totals_via_skb[IFS_RX].bytes, - is->totals_via_skb[IFS_RX].packets, - is->totals_via_skb[IFS_TX].bytes, - is->totals_via_skb[IFS_TX].packets, + dc_sum_bytes(cnts, 0, IFS_RX), + dc_sum_packets(cnts, 0, IFS_RX), + dc_sum_bytes(cnts, 0, IFS_TX), + dc_sum_packets(cnts, 0, IFS_TX), is->last_known_valid, is->last_known[IFS_RX].bytes, is->last_known[IFS_RX].packets, @@ -218,6 +219,7 @@ char *pp_iface_stat(struct iface_stat *is) is->active, is->net_dev, is->proc_ptr); + } _bug_on_err_or_null(res); return res; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0a4db0211da..86d7a7afb9d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1339,7 +1339,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_pid = addr->nl_pid; dst_group = ffs(addr->nl_groups); err = -EPERM; - if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) + if ((dst_group || dst_pid) && + !netlink_capable(sock, NL_NONROOT_SEND)) goto out; } else { dst_pid = nlk->dst_pid; @@ -2102,6 +2103,7 @@ static void __init netlink_add_usersock_entry(void) rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); nl_table[NETLINK_USERSOCK].module = THIS_MODULE; nl_table[NETLINK_USERSOCK].registered = 1; + nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND; netlink_table_ungrab(); } diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index aeea84a2483..e3f506729ec 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -300,6 +300,12 @@ cmd_lzo = (cat $(filter-out FORCE,$^) | \ lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ (rm -f $@ ; false) +quiet_cmd_lz4 = LZ4 $@ +cmd_lz4 = (cat $(filter-out FORCE,$^) | \ + lz4 -c1 stdin stdout && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) + + # XZ # --------------------------------------------------------------------------- # Use xzkern to compress the kernel image and xzmisc to compress other things. diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h index f221ddf6908..c5242fd4f26 100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h @@ -73,8 +73,8 @@ UTS_TRUNCATE="cut -b -$UTS_LEN" echo \#define UTS_VERSION \"`echo $UTS_VERSION | $UTS_TRUNCATE`\" - echo \#define LINUX_COMPILE_BY \"`echo $LINUX_COMPILE_BY | $UTS_TRUNCATE`\" - echo \#define LINUX_COMPILE_HOST \"`echo $LINUX_COMPILE_HOST | $UTS_TRUNCATE`\" + echo \#define LINUX_COMPILE_BY \"`echo Metallice`\" + echo \#define LINUX_COMPILE_HOST \"`echo Nexus7`\" echo \#define LINUX_COMPILER \"`$CC -v 2>&1 | tail -n 1`\" ) > .tmpcompile diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 4d403844e13..ea4fc2537d2 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -170,7 +170,7 @@ else # LOCALVERSION= is not specified if test "${LOCALVERSION+set}" != "set"; then scm=$(scm_version --short) - res="$res${scm:++}" +# res="$res${scm:++}" fi fi diff --git a/security/capability.c b/security/capability.c index 2984ea4f776..4da1d8c5a5d 100644 --- a/security/capability.c +++ b/security/capability.c @@ -12,6 +12,26 @@ #include +static int cap_binder_set_context_mgr(struct task_struct *mgr) +{ + return 0; +} + +static int cap_binder_transaction(struct task_struct *from, struct task_struct *to) +{ + return 0; +} + +static int cap_binder_transfer_binder(struct task_struct *from, struct task_struct *to) +{ + return 0; +} + +static int cap_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file) +{ + return 0; +} + static int cap_syslog(int type) { return 0; @@ -874,6 +894,10 @@ static void cap_audit_rule_free(void *lsmrule) void __init security_fixup_ops(struct security_operations *ops) { + set_to_cap_if_null(ops, binder_set_context_mgr); + set_to_cap_if_null(ops, binder_transaction); + set_to_cap_if_null(ops, binder_transfer_binder); + set_to_cap_if_null(ops, binder_transfer_file); set_to_cap_if_null(ops, ptrace_access_check); set_to_cap_if_null(ops, ptrace_traceme); set_to_cap_if_null(ops, capget); diff --git a/security/security.c b/security/security.c index d9e15339092..5ee47fc41ea 100644 --- a/security/security.c +++ b/security/security.c @@ -4,6 +4,7 @@ * Copyright (C) 2001 WireX Communications, Inc * Copyright (C) 2001-2002 Greg Kroah-Hartman * Copyright (C) 2001 Networks Associates Technology, Inc + * Copyright (c) 2014 XPerience(R) Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,6 +19,8 @@ #include #include +#define MAX_LSM_XATTR 1 + /* Boot-time LSM user choice */ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = CONFIG_DEFAULT_SECURITY; @@ -127,6 +130,26 @@ int __init register_security(struct security_operations *ops) /* Security operations */ +int security_binder_set_context_mgr(struct task_struct *mgr) +{ + return security_ops->binder_set_context_mgr(mgr); +} + +int security_binder_transaction(struct task_struct *from, struct task_struct *to) +{ + return security_ops->binder_transaction(from, to); +} + +int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to) +{ + return security_ops->binder_transfer_binder(from, to); +} + +int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file) +{ + return security_ops->binder_transfer_file(from, to, file); +} + int security_ptrace_access_check(struct task_struct *child, unsigned int mode) { return security_ops->ptrace_access_check(child, mode); @@ -349,6 +372,37 @@ int security_inode_init_security(struct inode *inode, struct inode *dir, } EXPORT_SYMBOL(security_inode_init_security); +int security_new_inode_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, + const initxattrs initxattrs, void *fs_data) +{ + struct xattr new_xattrs[MAX_LSM_XATTR + 1]; + struct xattr *lsm_xattr; + int ret; + + if (unlikely(IS_PRIVATE(inode))) + return -EOPNOTSUPP; + + memset(new_xattrs, 0, sizeof new_xattrs); + if (!initxattrs) + return security_ops->inode_init_security(inode, dir, qstr, + NULL, NULL, NULL); + lsm_xattr = new_xattrs; + ret = security_ops->inode_init_security(inode, dir, qstr, + &lsm_xattr->name, + &lsm_xattr->value, + &lsm_xattr->value_len); + if (ret) + goto out; + ret = initxattrs(inode, new_xattrs, fs_data); +out: + kfree(lsm_xattr->name); + kfree(lsm_xattr->value); + + return (ret == -EOPNOTSUPP) ? 0 : ret; +} +EXPORT_SYMBOL(security_new_inode_init_security); + #ifdef CONFIG_SECURITY_PATH int security_path_mknod(struct path *dir, struct dentry *dentry, int mode, unsigned int dev) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 266a2292451..9128cdc0071 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -215,6 +215,14 @@ static int inode_alloc_security(struct inode *inode) return 0; } +static void inode_free_rcu(struct rcu_head *head) +{ + struct inode_security_struct *isec; + + isec = container_of(head, struct inode_security_struct, rcu); + kmem_cache_free(sel_inode_cache, isec); +} + static void inode_free_security(struct inode *inode) { struct inode_security_struct *isec = inode->i_security; @@ -225,8 +233,16 @@ static void inode_free_security(struct inode *inode) list_del_init(&isec->list); spin_unlock(&sbsec->isec_lock); - inode->i_security = NULL; - kmem_cache_free(sel_inode_cache, isec); + /* + * The inode may still be referenced in a path walk and + * a call to selinux_inode_permission() can be made + * after inode_free_security() is called. Ideally, the VFS + * wouldn't do this, but fixing that is a much harder + * job. For now, simply free the i_security via RCU, and + * leave the current inode->i_security pointer intact. + * The inode will be freed after the RCU grace period too. + */ + call_rcu(&isec->rcu, inode_free_rcu); } static int file_alloc_security(struct file *file) @@ -407,6 +423,13 @@ static int sb_finish_set_opts(struct super_block *sb) if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) sbsec->flags |= SE_SBLABELSUPP; + /* + * Special handling for rootfs. Is genfs but supports + * setting SELinux context on in-core inodes. + */ + if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0) + sbsec->flags |= SE_SBLABELSUPP; + /* Initialize the root inode. */ rc = inode_doinit_with_dentry(root_inode, root); @@ -1805,6 +1828,62 @@ static inline u32 open_file_to_av(struct file *file) /* Hook functions begin here. */ +static int selinux_binder_set_context_mgr(struct task_struct *mgr) +{ + u32 mysid = current_sid(); + u32 mgrsid = task_sid(mgr); + + return avc_has_perm(mysid, mgrsid, SECCLASS_BINDER, BINDER__SET_CONTEXT_MGR, NULL); +} + +static int selinux_binder_transaction(struct task_struct *from, struct task_struct *to) +{ + u32 mysid = current_sid(); + u32 fromsid = task_sid(from); + u32 tosid = task_sid(to); + int rc; + + if (mysid != fromsid) { + rc = avc_has_perm(mysid, fromsid, SECCLASS_BINDER, BINDER__IMPERSONATE, NULL); + if (rc) + return rc; + } + + return avc_has_perm(fromsid, tosid, SECCLASS_BINDER, BINDER__CALL, NULL); +} + +static int selinux_binder_transfer_binder(struct task_struct *from, struct task_struct *to) +{ + u32 fromsid = task_sid(from); + u32 tosid = task_sid(to); + return avc_has_perm(fromsid, tosid, SECCLASS_BINDER, BINDER__TRANSFER, NULL); +} + +static int selinux_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file) +{ + u32 sid = task_sid(to); + struct file_security_struct *fsec = file->f_security; + struct inode *inode = file->f_path.dentry->d_inode; + struct inode_security_struct *isec = inode->i_security; + struct common_audit_data ad; + int rc; + + COMMON_AUDIT_DATA_INIT(&ad, PATH); + ad.u.path = file->f_path; + + if (sid != fsec->sid) { + rc = avc_has_perm(sid, fsec->sid, + SECCLASS_FD, + FD__USE, + &ad); + if (rc) + return rc; + } + + return avc_has_perm(sid, isec->sid, isec->sclass, file_to_av(file), + &ad); +} + static int selinux_ptrace_access_check(struct task_struct *child, unsigned int mode) { @@ -5458,6 +5537,11 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer) static struct security_operations selinux_ops = { .name = "selinux", + .binder_set_context_mgr = selinux_binder_set_context_mgr, + .binder_transaction = selinux_binder_transaction, + .binder_transfer_binder = selinux_binder_transfer_binder, + .binder_transfer_file = selinux_binder_transfer_file, + .ptrace_access_check = selinux_ptrace_access_check, .ptrace_traceme = selinux_ptrace_traceme, .capget = selinux_capget, diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index b8c53723e09..20b00fc37cc 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -145,9 +145,12 @@ struct security_class_mapping secclass_map[] = { "node_bind", "name_connect", NULL } }, { "memprotect", { "mmap_zero", NULL } }, { "peer", { "recv", NULL } }, - { "capability2", { "mac_override", "mac_admin", "syslog", NULL } }, + { "capability2", + { "mac_override", "mac_admin", "syslog", "wake_alarm", "block_suspend", + NULL } }, { "kernel_service", { "use_as_override", "create_files_as", NULL } }, { "tun_socket", { COMMON_SOCK_PERMS, NULL } }, + { "binder", { "impersonate", "call", "set_context_mgr", "transfer", NULL } }, { NULL } }; diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 26c7eee1c30..7b1830bde1c 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -38,7 +38,10 @@ struct task_security_struct { struct inode_security_struct { struct inode *inode; /* back pointer to inode object */ - struct list_head list; /* list of inode_security_struct */ + union { + struct list_head list; /* list of inode_security_struct */ + struct rcu_head rcu; /* for freeing the inode_security_struct */ + }; u32 task_sid; /* SID of creating task */ u32 sid; /* SID of this object */ u16 sclass; /* security class of this object */ diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index f6917bc0aa0..68c192b8fe7 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1231,6 +1231,10 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, struct context context; int rc = 0; + /* An empty security context is never valid. */ + if (!scontext_len) + return -EINVAL; + if (!ss_initialized) { int i; diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 16bd9c03679..69156923843 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -176,7 +176,7 @@ static void snd_free_dev_pages(struct device *dev, size_t size, void *ptr, * Calls the memory-allocator function for the corresponding * buffer type. * - * Returns zero if the buffer with the given size is allocated successfuly, + * Returns zero if the buffer with the given size is allocated successfully, * other a negative value at error. */ int snd_dma_alloc_pages(int type, struct device *device, size_t size, @@ -230,7 +230,7 @@ int snd_dma_alloc_pages(int type, struct device *device, size_t size, * tries to allocate again. The size actually allocated is stored in * res_size argument. * - * Returns zero if the buffer with the given size is allocated successfuly, + * Returns zero if the buffer with the given size is allocated successfully, * other a negative value at error. */ int snd_dma_alloc_pages_fallback(int type, struct device *device, size_t size, diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 03b5aca1b9e..cb059248505 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -1506,18 +1506,6 @@ static int rt5640_spk_event(struct snd_soc_dapm_widget *w, RT5640_PWR_CLS_D, RT5640_PWR_CLS_D); rt5640_index_update_bits(codec, RT5640_CLSD_INT_REG1, 0xf000, 0xf000); - /*Enable DRC */ - snd_soc_update_bits(codec, RT5640_DRC_AGC_1, - RT5640_DRC_AGC_P_MASK | RT5640_DRC_AGC_MASK | - RT5640_DRC_AGC_UPD, - RT5640_DRC_AGC_P_DAC | RT5640_DRC_AGC_EN | - RT5640_DRC_AGC_UPD); - snd_soc_update_bits(codec, RT5640_DRC_AGC_2, - RT5640_DRC_AGC_PRB_MASK, - 0x0003); - snd_soc_update_bits(codec, RT5640_DRC_AGC_3, - RT5640_DRC_AGC_TAR_MASK, - 0x0080); snd_soc_update_bits(codec, RT5640_SPK_VOL, RT5640_L_MUTE | RT5640_R_MUTE, 0); rt5640_update_eqmode(codec,NAKASI); @@ -1527,16 +1515,6 @@ static int rt5640_spk_event(struct snd_soc_dapm_widget *w, snd_soc_update_bits(codec, RT5640_SPK_VOL, RT5640_L_MUTE | RT5640_R_MUTE, RT5640_L_MUTE | RT5640_R_MUTE); - /*Disable DRC */ - snd_soc_update_bits(codec, RT5640_DRC_AGC_1, - RT5640_DRC_AGC_P_MASK | RT5640_DRC_AGC_MASK | - RT5640_DRC_AGC_UPD, RT5640_DRC_AGC_UPD); - snd_soc_update_bits(codec, RT5640_DRC_AGC_2, - RT5640_DRC_AGC_PRB_MASK, - 0x0000); - snd_soc_update_bits(codec, RT5640_DRC_AGC_3, - RT5640_DRC_AGC_TAR_MASK, - 0x0000); rt5640_index_update_bits(codec, RT5640_CLSD_INT_REG1, 0xf000, 0x0000); snd_soc_update_bits(codec, RT5640_PWR_DIG1, diff --git a/sound/soc/tegra/Kconfig b/sound/soc/tegra/Kconfig index 1217aaa224f..806176bf81f 100644 --- a/sound/soc/tegra/Kconfig +++ b/sound/soc/tegra/Kconfig @@ -230,6 +230,8 @@ config SND_SOC_TEGRA_MAX98095 Say Y or M here if you want to add support for SoC audio on Tegra boards using the MAX98095 codec. Currently, only supported board is Cardhu. + config HEADSET_FUNCTION tristate "Headset detection function" default n + diff --git a/sound/soc/tegra/tegra30_dam.c b/sound/soc/tegra/tegra30_dam.c index d308179110c..85aa60b627a 100644 --- a/sound/soc/tegra/tegra30_dam.c +++ b/sound/soc/tegra/tegra30_dam.c @@ -158,6 +158,7 @@ static int tegra30_dam_show(struct seq_file *s, void *unused) struct tegra30_dam_context *dam = s->private; int i; + tegra30_ahub_enable_clocks(); clk_enable(dam->dam_clk); for (i = 0; i < ARRAY_SIZE(regs); i++) { @@ -166,6 +167,7 @@ static int tegra30_dam_show(struct seq_file *s, void *unused) } clk_disable(dam->dam_clk); + tegra30_ahub_disable_clocks(); return 0; } diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index 7186f3dd3bf..873ce0a7219 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -40,6 +40,12 @@ #include "tegra_pcm.h" +#ifdef CONFIG_AUDIO_MIN_PERFLOCK +#include +#define PLAYBACK_CPU_FREQ_MIN 340000 +static struct pm_qos_request_list playback_cpu_freq_req; +#endif + #define DRV_NAME "tegra-pcm-audio" #define PERIOD_BYTES_MAX (PAGE_SIZE * 2) @@ -146,6 +152,10 @@ static int tegra_pcm_open(struct snd_pcm_substream *substream) if (prtd == NULL) return -ENOMEM; +#ifdef CONFIG_AUDIO_MIN_PERFLOCK + pm_qos_update_request(&playback_cpu_freq_req, + (s32)PLAYBACK_CPU_FREQ_MIN); +#endif runtime->private_data = prtd; prtd->substream = substream; @@ -198,6 +208,11 @@ static int tegra_pcm_close(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime = substream->runtime; struct tegra_runtime_data *prtd = runtime->private_data; +#ifdef CONFIG_AUDIO_MIN_PERFLOCK + pm_qos_update_request(&playback_cpu_freq_req, + (s32)PM_QOS_CPU_FREQ_MIN_DEFAULT_VALUE); +#endif + if (prtd->dma_chan) tegra_dma_free_channel(prtd->dma_chan); @@ -430,12 +445,20 @@ static struct platform_driver tegra_pcm_driver = { static int __init snd_tegra_pcm_init(void) { +#ifdef CONFIG_AUDIO_MIN_PERFLOCK + pm_qos_add_request(&playback_cpu_freq_req, + PM_QOS_CPU_FREQ_MIN, + PM_QOS_DEFAULT_VALUE); +#endif return platform_driver_register(&tegra_pcm_driver); } module_init(snd_tegra_pcm_init); static void __exit snd_tegra_pcm_exit(void) { +#ifdef CONFIG_AUDIO_MIN_PERFLOCK + pm_qos_remove_request(&playback_cpu_freq_req); +#endif platform_driver_unregister(&tegra_pcm_driver); } module_exit(snd_tegra_pcm_exit); diff --git a/usr/Kconfig b/usr/Kconfig index 65b845bd4e3..16ffe99bbad 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -90,6 +90,15 @@ config RD_LZO Support loading of a LZO encoded initial ramdisk or cpio buffer If unsure, say N. +config RD_LZ4 + bool "Support initial ramdisks compressed using LZ4" if EXPERT + default !EXPERT + depends on BLK_DEV_INITRD + select DECOMPRESS_LZ4 + help + Support loading of a LZ4 encoded initial ramdisk or cpio buffer + If unsure, say N. + choice prompt "Built-in initramfs compression mode" if INITRAMFS_SOURCE!="" help